diff --git a/.github/workflows/Snapshot.yml b/.github/workflows/Snapshot.yml index 764b9077..e2f56de1 100644 --- a/.github/workflows/Snapshot.yml +++ b/.github/workflows/Snapshot.yml @@ -11,7 +11,8 @@ jobs: dotnet-version: '10.0.x' project-names: 'Numerics' version: '2.0.0' - run-tests: true + # PR integration already runs the full multi-target test suite. + run-tests: false nuget-source: 'https://www.hec.usace.army.mil/nexus/repository/consequences-nuget-public/' secrets: NUGET_API_KEY: ${{ secrets.NEXUS_NUGET_APIKEY }} diff --git a/Numerics/Data/Statistics/YeoJohnson.cs b/Numerics/Data/Statistics/YeoJohnson.cs index ce4e32d6..b85adf11 100644 --- a/Numerics/Data/Statistics/YeoJohnson.cs +++ b/Numerics/Data/Statistics/YeoJohnson.cs @@ -37,6 +37,20 @@ public static void FitLambda(IList values, out double lambda) } } + /// + /// Fit the transform parameter using maximum likelihood estimation. + /// + /// The list of values to transform. + /// The fitted transformation exponent. + public static double FitLambda(IList values) + { + if (values == null) throw new ArgumentNullException(nameof(values)); + if (values.Count < 2) throw new ArgumentException("At least 2 values are required to fit lambda.", nameof(values)); + + FitLambda(values, out double lambda); + return lambda; + } + /// /// The log-likelihood function. The transformed observations are assumed to come from a /// normal distribution. The change of variable formula is used to write the log-likelihood function. @@ -160,6 +174,19 @@ public static double Transform(double value, double lambda) } } + /// + /// Returns the derivative of the Yeo-Johnson transformation with respect to the original value. + /// + /// The value at which to evaluate the derivative. + /// The transformation exponent. Range -5 to +5. + public static double Derivative(double value, double lambda) + { + if (Math.Abs(lambda) > 5d) return double.NaN; + return value >= 0d + ? Math.Pow(value + 1d, lambda - 1d) + : Math.Pow(-value + 1d, 1d - lambda); + } + /// /// Returns the Yeo-Johnson transformation of each value in the list. /// diff --git a/Numerics/Data/Time Series/Support/TimeSeriesDownload.cs b/Numerics/Data/Time Series/Support/TimeSeriesDownload.cs index 3360492a..6c294aa1 100644 --- a/Numerics/Data/Time Series/Support/TimeSeriesDownload.cs +++ b/Numerics/Data/Time Series/Support/TimeSeriesDownload.cs @@ -35,7 +35,40 @@ public class TimeSeriesDownload private const string UserAgent = "USACE-Numerics/2.0"; /// - /// Checks if there is an Internet connection. + /// GET with up to three attempts and exponential backoff (0.5s, 1s, 2s + /// + jitter) on transient failures. Retries 5xx, 408, 429, network + /// exceptions, and KiWIS DatasourceError bodies. Does NOT retry 4xx — + /// those mean the request itself is wrong. + /// + private static async Task<(HttpStatusCode status, string body)> GetWithRetryAsync( + HttpClient client, string url, int maxAttempts = 3) + { + Exception? lastNet = null; + for (int attempt = 1; attempt <= maxAttempts; attempt++) + { + try + { + using var resp = await client.GetAsync(url); + string body = await resp.Content.ReadAsStringAsync(); + bool transient = + (int)resp.StatusCode >= 500 || + resp.StatusCode == HttpStatusCode.RequestTimeout || + (int)resp.StatusCode == 429 || + body.Contains("DatasourceError") || + body.Contains("Error connecting to WDP"); + if (!transient || attempt == maxAttempts) return (resp.StatusCode, body); + } + catch (HttpRequestException ex) { lastNet = ex; if (attempt == maxAttempts) throw; } + catch (TaskCanceledException ex) { lastNet = ex; if (attempt == maxAttempts) throw; } + + await Task.Delay(500 * (int)Math.Pow(2, attempt - 1)); + } + throw lastNet ?? new Exception("GetWithRetryAsync exhausted retries"); + } + + /// + /// Checks if there is an Internet connection by probing Google's + /// connectivity-check endpoint (HTTP 204 No Content). /// public static async Task IsConnectedToInternet() { @@ -43,8 +76,9 @@ public static async Task IsConnectedToInternet() { using (var cts = new CancellationTokenSource(TimeSpan.FromSeconds(5))) { - await _defaultClient.GetAsync("https://waterservices.usgs.gov/nwis/", cts.Token); - return true; + var resp = await _defaultClient.GetAsync( + "https://www.google.com/generate_204", cts.Token); + return resp.IsSuccessStatusCode; } } catch @@ -192,10 +226,28 @@ public static async Task FromGHCN(string siteNumber, TimeSeriesType string stationFileUrl = $"{ghcnBaseUrl}{Uri.EscapeDataString(siteNumber)}.dly"; { var client = _defaultClient; - // Request the file and ensure that response headers are read first. - using (HttpResponseMessage response = await client.GetAsync(stationFileUrl, HttpCompletionOption.ResponseHeadersRead)) + // GHCN is large enough to want streaming, so we can't use GetWithRetryAsync + // directly (it buffers the whole body). Inline the same retry shape on the + // headers-only GetAsync, then stream from the successful response. + HttpResponseMessage? response = null; + for (int attempt = 1; attempt <= 3; attempt++) + { + try + { + response = await client.GetAsync(stationFileUrl, HttpCompletionOption.ResponseHeadersRead); + if ((int)response.StatusCode < 500 + && response.StatusCode != HttpStatusCode.RequestTimeout + && (int)response.StatusCode != 429) break; + if (attempt == 3) break; + response.Dispose(); + } + catch (HttpRequestException) when (attempt < 3) { /* retry */ } + catch (TaskCanceledException) when (attempt < 3) { /* retry */ } + await Task.Delay(500 * (int)Math.Pow(2, attempt - 1)); + } + using (response) { - response.EnsureSuccessStatusCode(); + response!.EnsureSuccessStatusCode(); using (Stream stream = await response.Content.ReadAsStreamAsync()) using (FileStream fs = new FileStream(tempFilePath, FileMode.Create, FileAccess.Write, FileShare.None, bufferSize: 81920, useAsync: true)) { @@ -221,8 +273,8 @@ public static async Task FromGHCN(string siteNumber, TimeSeriesType { // Extract year, month, and parse days - int.TryParse(line.Substring(11, 4), out var year); - int.TryParse(line.Substring(15, 2), out var month); + int.TryParse(line.Substring(11, 4), NumberStyles.Integer, CultureInfo.InvariantCulture, out var year); + int.TryParse(line.Substring(15, 2), NumberStyles.Integer, CultureInfo.InvariantCulture, out var month); int daysInMonth = DateTime.DaysInMonth(year, month); for (int i = 0; i < daysInMonth; i++) @@ -238,7 +290,7 @@ public static async Task FromGHCN(string siteNumber, TimeSeriesType } // Parse the precipitation or snow value - if (int.TryParse(strgValue, out int rawValue) && rawValue != -9999) // Valid precipitation value + if (int.TryParse(strgValue, NumberStyles.Integer, CultureInfo.InvariantCulture, out int rawValue) && rawValue != -9999) // Valid precipitation value { double convertedValue = ConvertToDesiredUnit(rawValue, unit); timeSeries.Add(new SeriesOrdinate(currentDate, convertedValue)); @@ -389,60 +441,66 @@ private static string CreateURLForUSGSDownload(string siteNumber, TimeSeriesType { var client = _decompressClient; - HttpResponseMessage response = await client.GetAsync(url); - response.EnsureSuccessStatusCode(); - - using (Stream contentStream = await response.Content.ReadAsStreamAsync()) - using (StreamReader reader = new StreamReader(contentStream)) + var (status, body) = await GetWithRetryAsync(client, url); + if ((int)status >= 400) { - string? line; - bool isHeader = true; + string snippet = body.Length > 500 ? body.Substring(0, 500) + "…" : body; + throw new Exception( + $"Failed to retrieve USGS data. Status={(int)status} ({status}). Body={snippet}"); + } + + using var reader = new StringReader(body); + string? line; + bool isHeader = true; - while ((line = await reader.ReadLineAsync()) != null) + while ((line = await reader.ReadLineAsync()) != null) + { + // Skip header row + if (isHeader) { - // Skip header row - if (isHeader) - { - isHeader = false; - continue; - } + isHeader = false; + continue; + } - // Skip comment lines - if (line.StartsWith("#")) + // Skip comment lines + if (line.StartsWith("#")) + { + if (line.Trim() == "# No sites found matching all criteria") { - if (line.Trim() == "# No sites found matching all criteria") - { - throw new Exception("No data found matching all criteria."); - } - continue; + throw new Exception("No data found matching all criteria."); } + continue; + } - string[] fields = line.Split('\t'); - // Validate expected number of fields and record type - if (fields.Length <= valueIndex || fields[0] != "USGS") - continue; + string[] fields = line.Split('\t'); + // Validate expected number of fields and record type + if (fields.Length <= valueIndex || fields[0] != "USGS") + continue; - // Parse date (assume fields[2] contains the date) - if (!DateTime.TryParse(fields[2], out DateTime index)) - { - continue; - } + // USGS instantaneous timestamps are local-time strings without an offset; + // the tz_cd column at fields[3] reports the offset separately. We currently + // parse these as naive DateTime (Kind=Unspecified). Future work: synthesize + // a DateTimeOffset from fields[2] + fields[3] for full timezone fidelity. + if (!DateTime.TryParse(fields[2], CultureInfo.InvariantCulture, DateTimeStyles.None, out DateTime index)) + { + continue; + } - // Fill in missing days only for daily series (not instantaneous) - if (!isInstantaneous && timeSeries.Count > 0 && index != TimeSeries.AddTimeInterval(timeSeries.Last().Index, TimeInterval.OneDay)) + // Fill in missing days only for daily series (not instantaneous) + if (!isInstantaneous && timeSeries.Count > 0 && index != TimeSeries.AddTimeInterval(timeSeries.Last().Index, TimeInterval.OneDay)) + { + while (timeSeries.Last().Index < TimeSeries.SubtractTimeInterval(index, TimeInterval.OneDay)) { - while (timeSeries.Last().Index < TimeSeries.SubtractTimeInterval(index, TimeInterval.OneDay)) - { - timeSeries.Add(new SeriesOrdinate( - TimeSeries.AddTimeInterval(timeSeries.Last().Index, TimeInterval.OneDay), double.NaN)); - } + timeSeries.Add(new SeriesOrdinate( + TimeSeries.AddTimeInterval(timeSeries.Last().Index, TimeInterval.OneDay), double.NaN)); } - - // Get and parse value - string valueStr = fields[valueIndex]; - double value = string.IsNullOrWhiteSpace(valueStr) ? double.NaN : double.TryParse(valueStr, out double tempVal) ? tempVal : double.NaN; - timeSeries.Add(new SeriesOrdinate(index, value)); } + + // Get and parse value + string valueStr = fields[valueIndex]; + double value = string.IsNullOrWhiteSpace(valueStr) ? double.NaN + : double.TryParse(valueStr, NumberStyles.Float, CultureInfo.InvariantCulture, out double tempVal) ? tempVal : double.NaN; + timeSeries.Add(new SeriesOrdinate(index, value)); } } } @@ -450,14 +508,25 @@ private static string CreateURLForUSGSDownload(string siteNumber, TimeSeriesType else if (timeSeriesType == TimeSeriesType.PeakDischarge || timeSeriesType == TimeSeriesType.PeakStage) { { - // Download data as string (assumes USGS peak data is not compressed) - textDownload = await _defaultClient.GetStringAsync(url); + var (peakStatus, peakBody) = await GetWithRetryAsync(_defaultClient, url); + if ((int)peakStatus >= 400) + { + string snippet = peakBody.Length > 500 ? peakBody.Substring(0, 500) + "…" : peakBody; + throw new Exception( + $"Failed to retrieve USGS peak data. Status={(int)peakStatus} ({peakStatus}). Body={snippet}"); + } + textDownload = peakBody; + int idx = timeSeriesType == TimeSeriesType.PeakDischarge ? 4 : 6; var lines = textDownload.Split(new[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries); foreach (string line in lines) { var segments = line.Split('\t'); - if (segments.First() == "USGS" && segments.Count() >= 5) + // Bounds-check against the actual column we'll read (peak_va at 4 for + // discharge, gage_ht at 6 for stage), not a fixed minimum. Old guard + // was Length >= 5 which would IndexOutOfRange on a 5-column historical + // row when reading stage at index 6. + if (segments.Length > idx && segments[0] == "USGS") { // Get date DateTime index = DateTime.Now; @@ -465,35 +534,34 @@ private static string CreateURLForUSGSDownload(string siteNumber, TimeSeriesType int month = 1; int day = 1; var dateString = segments[2].Split('-'); - DateTime.TryParse(segments[2], out index); + DateTime.TryParse(segments[2], CultureInfo.InvariantCulture, DateTimeStyles.None, out index); if (index == DateTime.MinValue) { // The date parsing failed, so try to manually parse it if (dateString[1] == "00" && dateString[2] != "00") { - int.TryParse(dateString[0], out year); - int.TryParse(dateString[2], out day); + int.TryParse(dateString[0], NumberStyles.Integer, CultureInfo.InvariantCulture, out year); + int.TryParse(dateString[2], NumberStyles.Integer, CultureInfo.InvariantCulture, out day); index = new DateTime(year, month, day, 0, 0, 0); } else if (dateString[1] != "00" && dateString[2] == "00") { - int.TryParse(dateString[0], out year); - int.TryParse(dateString[1], out month); + int.TryParse(dateString[0], NumberStyles.Integer, CultureInfo.InvariantCulture, out year); + int.TryParse(dateString[1], NumberStyles.Integer, CultureInfo.InvariantCulture, out month); index = new DateTime(year, month, day, 0, 0, 0); } else if (dateString[1] == "00" && dateString[2] == "00") { - int.TryParse(dateString[0], out year); + int.TryParse(dateString[0], NumberStyles.Integer, CultureInfo.InvariantCulture, out year); index = new DateTime(year, month, day, 0, 0, 0); } } // Get value double value = 0; - int idx = timeSeriesType == TimeSeriesType.PeakDischarge ? 4 : 6; - if (segments[idx] != "" && segments[idx] != " " && segments[idx] != " " && !string.IsNullOrEmpty(segments[idx])) + if (!string.IsNullOrWhiteSpace(segments[idx])) { - double.TryParse(segments[idx], out value); + double.TryParse(segments[idx], NumberStyles.Float, CultureInfo.InvariantCulture, out value); timeSeries.Add(new SeriesOrdinate(index, value)); } } @@ -530,17 +598,13 @@ private static async Task ParseUSGSOgcApiPages(HttpClient client, string initial while (nextUrl != null) { - // Retry with exponential backoff for rate limiting (429) - HttpResponseMessage response = await client.GetAsync(nextUrl); - for (int attempt = 1; attempt < 6; attempt++) + var (status, json) = await GetWithRetryAsync(client, nextUrl); + if ((int)status >= 400) { - if ((int)response.StatusCode != 429) break; - int delayMs = Math.Min((int)Math.Pow(2, attempt) * 2000, 60000); - await System.Threading.Tasks.Task.Delay(delayMs); - response = await client.GetAsync(nextUrl); + string snippet = json.Length > 500 ? json.Substring(0, 500) + "…" : json; + throw new Exception( + $"USGS OGC API error. Status={(int)status} ({status}). Body={snippet}"); } - response.EnsureSuccessStatusCode(); - var json = await response.Content.ReadAsStringAsync(); if (rawText != null) { @@ -668,11 +732,14 @@ public static async Task FromCHMN( string paramCode = isDischarge ? "47" : "46"; DateTime sd = startDate ?? DateTime.UtcNow.AddMonths(-18); DateTime ed = endDate ?? DateTime.UtcNow; + // Use '+' as the URL-safe space separator instead of a literal space + // in the query string. ECCC accepts the literal space, but some + // intermediate proxies and CDNs reject it. url = "https://wateroffice.ec.gc.ca/services/real_time_data/csv/inline" + $"?stations[]={Uri.EscapeDataString(stationNumber)}" + $"¶meters[]={paramCode}" + - $"&start_date={sd:yyyy-MM-dd HH:mm:ss}" + - $"&end_date={ed:yyyy-MM-dd HH:mm:ss}"; + $"&start_date={sd:yyyy-MM-dd}+{sd:HH:mm:ss}" + + $"&end_date={ed:yyyy-MM-dd}+{ed:HH:mm:ss}"; interval = TimeInterval.FiveMinute; } else // PeakDischarge or PeakStage @@ -694,13 +761,13 @@ public static async Task FromCHMN( { var client = _decompressClient; - // The endpoint returns CSV (automatically decompressed by HttpClientHandler) - using HttpResponseMessage resp = await client.GetAsync(url); - resp.EnsureSuccessStatusCode(); - - string csv; + // The endpoint returns CSV (automatically decompressed by HttpClientHandler). + var (status, csv) = await GetWithRetryAsync(client, url); + if ((int)status >= 400) { - csv = await resp.Content.ReadAsStringAsync(); + string snippet = csv.Length > 500 ? csv.Substring(0, 500) + "…" : csv; + throw new Exception( + $"Failed to retrieve CHMN data. Status={(int)status} ({status}). Body={snippet}"); } // Parse the CSV response based on endpoint type @@ -773,9 +840,15 @@ private static void ParseCHMNDailyCsv(string csv, string stationNumber, TimeSeri if (!isDischarge && !isLevelParam) continue; } - // Parse date - if (!DateTime.TryParse(parts[dateCol].Trim(), CultureInfo.InvariantCulture, DateTimeStyles.None, out DateTime date)) + // Real-time CHMN timestamps include explicit Pacific/Eastern offsets, + // e.g. "2024-11-01T08:00:00-08:00". DateTime.TryParse with + // DateTimeStyles.None converts these to local time, shifting them on + // any non-Canadian-Pacific machine. Use DateTimeOffset to preserve + // the offset, then take the wall-clock value the gauge reported. + if (!DateTimeOffset.TryParse(parts[dateCol].Trim(), CultureInfo.InvariantCulture, + DateTimeStyles.None, out DateTimeOffset dto)) continue; + DateTime date = isDailyType ? dto.Date : dto.DateTime; // Parse value double val = double.NaN; @@ -967,10 +1040,11 @@ public static async Task FromABOM( client.DefaultRequestHeaders.Add("Connection", "keep-alive"); client.Timeout = TimeSpan.FromSeconds(30); - HttpResponseMessage response; + HttpStatusCode listStatus; + string listResponse; try { - response = await client.GetAsync(tsListUrl); + (listStatus, listResponse) = await GetWithRetryAsync(client, tsListUrl); } catch (HttpRequestException ex) { @@ -981,15 +1055,13 @@ public static async Task FromABOM( throw new Exception("Request to BOM API timed out.", ex); } - var listResponse = await response.Content.ReadAsStringAsync(); - // Surface the response body in the exception so transient upstream failures // (KiWIS 5xx with a JSON error payload) are self-documenting. - if (!response.IsSuccessStatusCode) + if ((int)listStatus >= 400) { string snippet = listResponse.Length > 500 ? listResponse.Substring(0, 500) + "…" : listResponse; throw new Exception( - $"Failed to retrieve time series list from BOM API. Status={(int)response.StatusCode} ({response.StatusCode}). Body={snippet}"); + $"Failed to retrieve time series list from BOM API. Status={(int)listStatus} ({listStatus}). Body={snippet}"); } // Check for error response from KiWIS @@ -1060,14 +1132,18 @@ public static async Task FromABOM( } } - // If no match found, take the first available series - if (tsId == null && root.GetArrayLength() > 1) + // No silent fallback: if the requested cadence isn't present we'd rather + // throw than quietly hand back hourly data when the caller asked for daily. + if (tsId == null) { - tsId = root[1][tsIdIndex].GetString(); + var available = new List(); + for (int i = 1; i < root.GetArrayLength() && available.Count < 10; i++) + if (tsNameIndex >= 0) + available.Add(root[i][tsNameIndex].GetString() ?? ""); + throw new Exception( + $"No suitable time series found for station {stationNumber} matching '{tsNamePattern}'. " + + $"Available ts_name values: [{string.Join(", ", available)}]"); } - - if (tsId == null) - throw new Exception($"No suitable time series found for station {stationNumber}"); } // Step 2: Get time series values using the ts_id @@ -1098,10 +1174,11 @@ public static async Task FromABOM( client.DefaultRequestHeaders.Add("Connection", "keep-alive"); client.Timeout = TimeSpan.FromSeconds(60); - HttpResponseMessage response; + HttpStatusCode valuesStatus; + string valuesResponse; try { - response = await client.GetAsync(valuesUrl); + (valuesStatus, valuesResponse) = await GetWithRetryAsync(client, valuesUrl); } catch (HttpRequestException ex) { @@ -1112,16 +1189,14 @@ public static async Task FromABOM( throw new Exception("Request to BOM API timed out while retrieving values.", ex); } - var valuesResponse = await response.Content.ReadAsStringAsync(); - // Surface the response body in the exception so transient upstream failures // (e.g. KiWIS returning 500 with `{"code":"DatasourceError","message":"Error connecting to WDP."}`) - // are self-documenting. Without this, EnsureSuccessStatusCode would discard the body. - if (!response.IsSuccessStatusCode) + // are self-documenting after the retry helper has exhausted its attempts. + if ((int)valuesStatus >= 400) { string snippet = valuesResponse.Length > 500 ? valuesResponse.Substring(0, 500) + "…" : valuesResponse; throw new Exception( - $"Failed to retrieve time series values from BOM API. Status={(int)response.StatusCode} ({response.StatusCode}). Body={snippet}"); + $"Failed to retrieve time series values from BOM API. Status={(int)valuesStatus} ({valuesStatus}). Body={snippet}"); } // Check for error response @@ -1146,10 +1221,15 @@ public static async Task FromABOM( { if (point.GetArrayLength() < 2) continue; - // Parse timestamp + // BOM returns ISO-8601 with explicit offset, e.g. "2021-11-01T00:00:00.000+10:00". + // DateTime.TryParse converts these to LOCAL time, which on a non-AEST machine + // shifts daily series labels by a calendar day. DateTimeOffset preserves the + // offset; .Date gives the wall-clock date BOM intended. string? timestampStr = point[0].GetString(); - if (!DateTime.TryParse(timestampStr, out DateTime date)) + if (!DateTimeOffset.TryParse(timestampStr, CultureInfo.InvariantCulture, + DateTimeStyles.None, out DateTimeOffset dto)) continue; + DateTime date = isInstantaneous ? dto.DateTime : dto.Date; // Parse value double val = double.NaN; diff --git a/Numerics/Data/Time Series/TimeSeries.cs b/Numerics/Data/Time Series/TimeSeries.cs index dd1ed23d..b739b413 100644 --- a/Numerics/Data/Time Series/TimeSeries.cs +++ b/Numerics/Data/Time Series/TimeSeries.cs @@ -127,7 +127,7 @@ public TimeSeries(XElement xElement) var valueAttr = ordinate.Attribute("Value"); if (indexAttr != null && !DateTime.TryParseExact(indexAttr.Value, "o", CultureInfo.InvariantCulture, DateTimeStyles.RoundtripKind, out index)) { - DateTime.TryParse(indexAttr.Value, out index); + DateTime.TryParse(indexAttr.Value, CultureInfo.InvariantCulture, DateTimeStyles.None, out index); } if (valueAttr != null) double.TryParse(valueAttr.Value, NumberStyles.Any, CultureInfo.InvariantCulture, out value); @@ -945,28 +945,41 @@ private bool CheckIfMinStepsExceeded(DateTime startTime, DateTime endTime, int m /// Returns a moving average time-series based on the specified time period. The average is computed based on the previous n=period ordinates. /// /// The time period to average over. If time interval is 1-hour, and period is 12, the moving average will be computed over a moving 12 hour block. - public TimeSeries MovingAverage(int period) + /// + /// Optional. The minimum number of non-missing (non-NaN) values that must be present in a window for the average to be computed; otherwise the output is NaN. + /// Defaults to (strict propagation: any NaN in the window produces NaN). Pass a smaller value to skip missing values and average over the observed entries only, + /// matching the semantics of pandas min_periods and bottleneck min_count. + /// + public TimeSeries MovingAverage(int period, int? minValidCount = null) { if (period >= Count) throw new ArgumentException(nameof(period), "The period must be less than the length of the time-series."); + int minCount = minValidCount ?? period; + if (minCount < 1 || minCount > period) + throw new ArgumentOutOfRangeException(nameof(minValidCount), "minValidCount must be between 1 and period."); SortByTime(); var timeSeries = new TimeSeries(TimeInterval); double sum = 0d; - double avg = 0d; + int nanCount = 0; for (int i = 1; i <= Count; i++) { - sum += !double.IsNaN(this[i - 1].Value) ? this[i - 1].Value : 0; + double newVal = this[i - 1].Value; + if (double.IsNaN(newVal)) nanCount++; + else sum += newVal; + if (i > period) { - sum -= !double.IsNaN(this[i - period - 1].Value) ? this[i - period - 1].Value : 0; - avg = sum / period; - } - else - { - avg = sum / i; + double oldVal = this[i - period - 1].Value; + if (double.IsNaN(oldVal)) nanCount--; + else sum -= oldVal; } + if (i >= period) + { + int validCount = period - nanCount; + double avg = validCount >= minCount ? sum / validCount : double.NaN; timeSeries.Add(new SeriesOrdinate(this[i - 1].Index, avg)); + } } return timeSeries; } @@ -975,20 +988,41 @@ public TimeSeries MovingAverage(int period) /// Returns a moving sum time-series based on the specified time period. The sum is computed based on the previous n=period ordinates. /// /// The time period to sum over. If time interval is 1-hour, and period is 12, the moving sum will be computed over a moving 12 hour block. - public TimeSeries MovingSum(int period) + /// + /// Optional. The minimum number of non-missing (non-NaN) values that must be present in a window for the sum to be computed; otherwise the output is NaN. + /// Defaults to (strict propagation: any NaN in the window produces NaN). Pass a smaller value to sum only the observed entries within each window, + /// matching the semantics of pandas min_periods and bottleneck min_count. Note that no scaling is applied — the sum is over observed values only. + /// + public TimeSeries MovingSum(int period, int? minValidCount = null) { if (period >= Count) throw new ArgumentException(nameof(period), "The period must be less than the length of the time-series."); + int minCount = minValidCount ?? period; + if (minCount < 1 || minCount > period) + throw new ArgumentOutOfRangeException(nameof(minValidCount), "minValidCount must be between 1 and period."); SortByTime(); var timeSeries = new TimeSeries(TimeInterval); double sum = 0d; + int nanCount = 0; for (int i = 1; i <= Count; i++) { - sum += !double.IsNaN(this[i - 1].Value) ? this[i - 1].Value : 0; + double newVal = this[i - 1].Value; + if (double.IsNaN(newVal)) nanCount++; + else sum += newVal; + if (i > period) - sum -= !double.IsNaN(this[i - period - 1].Value) ? this[i - period - 1].Value : 0; + { + double oldVal = this[i - period - 1].Value; + if (double.IsNaN(oldVal)) nanCount--; + else sum -= oldVal; + } + if (i >= period) - timeSeries.Add(new SeriesOrdinate(this[i - 1].Index, sum)); + { + int validCount = period - nanCount; + double output = validCount >= minCount ? sum : double.NaN; + timeSeries.Add(new SeriesOrdinate(this[i - 1].Index, output)); + } } return timeSeries; } @@ -1610,20 +1644,26 @@ public TimeSeries CalendarYearSeries(BlockFunctionType blockFunction = BlockFunc double sum = 0; for (int j = 0; j < blockData.Count; j++) { - sum += !double.IsNaN(blockData[j].Value) ? blockData[j].Value : 0; + sum += blockData[j].Value; + } + if (blockData.Count > 0) + { + ordinate.Index = blockData.Last().Index; + ordinate.Value = sum; } - ordinate.Index = blockData.Last().Index; - ordinate.Value = sum; } else if (blockFunction == BlockFunctionType.Average) { double sum = 0; for (int j = 0; j < blockData.Count; j++) { - sum += !double.IsNaN(blockData[j].Value) ? blockData[j].Value : 0; + sum += blockData[j].Value; + } + if (blockData.Count > 0) + { + ordinate.Index = blockData.Last().Index; + ordinate.Value = sum / blockData.Count; } - ordinate.Index = blockData.Last().Index; - ordinate.Value = sum / blockData.Count; } if (!double.IsNaN(ordinate.Value)) @@ -1633,7 +1673,7 @@ public TimeSeries CalendarYearSeries(BlockFunctionType blockFunction = BlockFunc } /// - /// Returns an annual (irregular) block series based on a 12-month year with a customized starting month. + /// Returns an annual (irregular) block series based on a 12-month year with a customized starting month. /// /// Optional. The month when the custom year begins. Default = 10 (or October). /// Optional. The block function type; e.g. min, max, sum, or average. Default = Maximum. @@ -1704,20 +1744,26 @@ public TimeSeries CustomYearSeries(int startMonth = 10, BlockFunctionType blockF double sum = 0; for (int j = 0; j < blockData.Count; j++) { - sum += !double.IsNaN(blockData[j].Value) ? blockData[j].Value : 0; + sum += blockData[j].Value; + } + if (blockData.Count > 0) + { + ordinate.Index = blockData.Last().Index; + ordinate.Value = sum; } - ordinate.Index = blockData.Last().Index; - ordinate.Value = sum; } else if (blockFunction == BlockFunctionType.Average) { double sum = 0; for (int j = 0; j < blockData.Count; j++) { - sum += !double.IsNaN(blockData[j].Value) ? blockData[j].Value : 0; + sum += blockData[j].Value; + } + if (blockData.Count > 0) + { + ordinate.Index = blockData.Last().Index; + ordinate.Value = sum / blockData.Count; } - ordinate.Index = blockData.Last().Index; - ordinate.Value = sum / blockData.Count; } if (!double.IsNaN(ordinate.Value)) @@ -1826,20 +1872,26 @@ public TimeSeries CustomYearSeries(int startMonth, int endMonth, BlockFunctionTy double sum = 0; for (int j = 0; j < blockData.Count; j++) { - sum += !double.IsNaN(blockData[j].Value) ? blockData[j].Value : 0; + sum += blockData[j].Value; + } + if (blockData.Count > 0) + { + ordinate.Index = blockData.Last().Index; + ordinate.Value = sum; } - ordinate.Index = blockData.Last().Index; - ordinate.Value = sum; } else if (blockFunction == BlockFunctionType.Average) { double sum = 0; for (int j = 0; j < blockData.Count; j++) { - sum += !double.IsNaN(blockData[j].Value) ? blockData[j].Value : 0; + sum += blockData[j].Value; + } + if (blockData.Count > 0) + { + ordinate.Index = blockData.Last().Index; + ordinate.Value = sum / blockData.Count; } - ordinate.Index = blockData.Last().Index; - ordinate.Value = sum / blockData.Count; } if (!double.IsNaN(ordinate.Value)) @@ -1849,7 +1901,7 @@ public TimeSeries CustomYearSeries(int startMonth, int endMonth, BlockFunctionTy } /// - /// Returns an monthly (irregular) block series. + /// Returns an monthly (irregular) block series. /// /// Optional. The block function type; e.g. min, max, sum, or average. Default = Maximum. /// Optional. The smoothing function type. Default = None. @@ -1914,8 +1966,11 @@ public TimeSeries MonthlySeries(BlockFunctionType blockFunction = BlockFunctionT { sum += blockData[j].Value; } - ordinate.Index = blockData.Last().Index; - ordinate.Value = sum; + if (blockData.Count > 0) + { + ordinate.Index = blockData.Last().Index; + ordinate.Value = sum; + } } else if (blockFunction == BlockFunctionType.Average) { @@ -1924,8 +1979,11 @@ public TimeSeries MonthlySeries(BlockFunctionType blockFunction = BlockFunctionT { sum += blockData[j].Value; } - ordinate.Index = blockData.Last().Index; - ordinate.Value = sum / blockData.Count; + if (blockData.Count > 0) + { + ordinate.Index = blockData.Last().Index; + ordinate.Value = sum / blockData.Count; + } } if (!double.IsNaN(ordinate.Value)) @@ -1938,7 +1996,7 @@ public TimeSeries MonthlySeries(BlockFunctionType blockFunction = BlockFunctionT } /// - /// Returns an quarterly (irregular) block series. + /// Returns an quarterly (irregular) block series. /// /// Optional. The block function type; e.g. min, max, sum, or average. Default = Maximum. /// Optional. The smoothing function type. Default = None. @@ -2011,8 +2069,11 @@ public TimeSeries QuarterlySeries(BlockFunctionType blockFunction = BlockFunctio { sum += blockData[j].Value; } - ordinate.Index = blockData.Last().Index; - ordinate.Value = sum; + if (blockData.Count > 0) + { + ordinate.Index = blockData.Last().Index; + ordinate.Value = sum; + } } else if (blockFunction == BlockFunctionType.Average) { @@ -2021,8 +2082,11 @@ public TimeSeries QuarterlySeries(BlockFunctionType blockFunction = BlockFunctio { sum += blockData[j].Value; } - ordinate.Index = blockData.Last().Index; - ordinate.Value = sum / blockData.Count; + if (blockData.Count > 0) + { + ordinate.Index = blockData.Last().Index; + ordinate.Value = sum / blockData.Count; + } } if (!double.IsNaN(ordinate.Value)) @@ -2131,15 +2195,28 @@ public TimeSeries PeaksOverThresholdSeries(double threshold, int minStepsBetween #endregion /// - /// Resample the time series using k-Nearest neighbors. + /// Resample the time series using the conditional k-Nearest Neighbors bootstrap of + /// Lall and Sharma (1996). At each step, finds the K historical observations whose + /// (standardized) value is closest to the current state, randomly picks one of them + /// at index j, and advances by taking x[j+1] — the observation that historically + /// came AFTER the chosen neighbor. This preserves the conditional p(x_{t+1} | x_t). /// /// The number of time steps to resample. - /// The number of nearest neighbors. - /// The prng seed. Default = 12345. - /// + /// The number of nearest neighbors. Lall-Sharma suggests floor(sqrt(n)). + /// The PRNG seed. Default = 12345. + /// A new of length . + /// + /// Thrown when the series has fewer than 11 observations (the underlying KNN search + /// requires at least 10 candidate points after the last observation is excluded so + /// that x[j+1] is always in range). + /// + /// + /// Lall, U., Sharma, A. (1996). A nearest neighbor bootstrap for resampling + /// hydrologic time series. Water Resources Research, 32(3), 679-693. + /// public TimeSeries ResampleWithKNN(int timeSteps, int k, int seed = 12345) { - if (Count < 2) throw new ArgumentException("Need at least 2 points for resampling."); + if (Count < 11) throw new ArgumentException("Need at least 11 points for KNN resampling."); if (timeSteps < 1) throw new ArgumentException("The number of time steps must be at least 1."); // Initialize @@ -2157,15 +2234,25 @@ public TimeSeries ResampleWithKNN(int timeSteps, int k, int seed = 12345) stdData.Standardize(); var stdValues = stdData.ValuesToArray(); + // Exclude the last observation from the KNN candidate set so that any returned + // neighbor index j satisfies j + 1 < Count and x[j+1] is always in range. + var candidateValues = new double[Count - 1]; + Array.Copy(stdValues, 0, candidateValues, 0, Count - 1); + // Perform k-NN - var kNN = new KNearestNeighbors(stdValues, stdValues, k); + var kNN = new KNearestNeighbors(candidateValues, candidateValues, k); for (int i = 1; i < timeSteps; i++) { double val = (currentValue - mean) / stdDev; var kNearest = kNN.GetNeighbors([val]); if (kNearest == null) continue; int selectedIdx = kNearest[prng.Next(kNearest.Length)]; - currentValue = this[selectedIdx].Value; + // Advance to the observation that came AFTER the chosen neighbor — this is + // what makes it a CONDITIONAL bootstrap of p(x_{t+1} | x_t). Returning + // this[selectedIdx] would collapse to a near-stationary process around the + // initial value because every returned neighbor is, by construction, close + // in value to currentValue. + currentValue = this[selectedIdx + 1].Value; currentDate = TimeSeries.AddTimeInterval(currentDate, TimeInterval); timeSeries.Add(new SeriesOrdinate(currentDate, currentValue)); } @@ -2174,12 +2261,21 @@ public TimeSeries ResampleWithKNN(int timeSteps, int k, int seed = 12345) } /// - /// Resample the time series using the block bootstrap. + /// Resample the time series using a non-overlapping fixed-block bootstrap. + /// Collects every contiguous block of length from the + /// observed series, draws blocks uniformly at random with replacement, and + /// concatenates them until values have been produced. + /// Preserves marginal mean / variance and within-block autocorrelation; introduces + /// discontinuities at block boundaries. /// /// The number of time steps to resample. /// The resampling block size. - /// The prng seed. Default = 12345. - /// + /// The PRNG seed. Default = 12345. + /// A new of length . + /// + /// Künsch, H.R. (1989). The jackknife and the bootstrap for general stationary + /// observations. Annals of Statistics, 17(3), 1217-1241. + /// public TimeSeries ResampleWithBlockBootstrap(int timeSteps, int blockSize, int seed = 12345) { if (Count < 2) throw new ArgumentException("Need at least 2 points for resampling."); diff --git a/Numerics/Distributions/Bivariate Copulas/Base/BivariateCopula.cs b/Numerics/Distributions/Bivariate Copulas/Base/BivariateCopula.cs index 293ab03d..17a624d5 100644 --- a/Numerics/Distributions/Bivariate Copulas/Base/BivariateCopula.cs +++ b/Numerics/Distributions/Bivariate Copulas/Base/BivariateCopula.cs @@ -110,6 +110,8 @@ public bool ParametersValid public double LogPDF(double u, double v) { double f = PDF(u, v); + if (double.IsNaN(f) || double.IsInfinity(f) || f <= 0d) + return double.NegativeInfinity; return Math.Log(f); } @@ -194,7 +196,7 @@ public double PseudoLogLikelihood(IList sampleDataX, IList sampl double LogLH = 0; for (int i = 0; i < sampleDataX.Count; i++) LogLH += LogPDF(sampleDataX[i], sampleDataY[i]); - if (double.IsNaN(LogLH) || double.IsInfinity(LogLH)) return double.MinValue; + if (double.IsNaN(LogLH) || double.IsInfinity(LogLH)) return double.NegativeInfinity; return LogLH; } @@ -212,7 +214,7 @@ public double IFMLogLikelihood(IList sampleDataX, IList sampleDa double LogLH = 0; for (int i = 0; i < sampleDataX.Count; i++) LogLH += LogPDF(MarginalDistributionX.CDF(sampleDataX[i]), MarginalDistributionY.CDF(sampleDataY[i])); - if (double.IsNaN(LogLH) || double.IsInfinity(LogLH)) return double.MinValue; + if (double.IsNaN(LogLH) || double.IsInfinity(LogLH)) return double.NegativeInfinity; return LogLH; } @@ -230,7 +232,7 @@ public double LogLikelihood(IList sampleDataX, IList sampleDataY double LogLH = 0; for (int i = 0; i < sampleDataX.Count; i++) LogLH += LogPDF(MarginalDistributionX.CDF(sampleDataX[i]), MarginalDistributionY.CDF(sampleDataY[i])) + MarginalDistributionX.LogPDF(sampleDataX[i]) + MarginalDistributionY.LogPDF(sampleDataY[i]); - if (double.IsNaN(LogLH) || double.IsInfinity(LogLH)) return double.MinValue; + if (double.IsNaN(LogLH) || double.IsInfinity(LogLH)) return double.NegativeInfinity; return LogLH; } diff --git a/Numerics/Distributions/Multivariate/Base/MultivariateDistribution.cs b/Numerics/Distributions/Multivariate/Base/MultivariateDistribution.cs index cb456131..3a235880 100644 --- a/Numerics/Distributions/Multivariate/Base/MultivariateDistribution.cs +++ b/Numerics/Distributions/Multivariate/Base/MultivariateDistribution.cs @@ -55,8 +55,8 @@ public abstract class MultivariateDistribution : IMultivariateDistribution public virtual double LogPDF(double[] x) { double f = PDF(x); - // If the PDF returns an invalid probability, then return the worst log-probability. - if (double.IsNaN(f) || double.IsInfinity(f) || f <= 0d) return double.MinValue; + // If the PDF returns an invalid probability, then return log(0). + if (double.IsNaN(f) || double.IsInfinity(f) || f <= 0d) return double.NegativeInfinity; return Math.Log(f); } @@ -73,8 +73,8 @@ public virtual double LogPDF(double[] x) public virtual double LogCDF(double[] x) { double F = CDF(x); - // If the CDF returns an invalid probability, then return the worst log-probability. - if (double.IsNaN(F) || double.IsInfinity(F) || F <= 0d) return double.MinValue; + // If the CDF returns an invalid probability, then return log(0). + if (double.IsNaN(F) || double.IsInfinity(F) || F <= 0d) return double.NegativeInfinity; return Math.Log(F); } @@ -94,9 +94,9 @@ public virtual double CCDF(double[] x) public virtual double LogCCDF(double[] x) { double cF = CCDF(x); - // If the CCDF returns an invalid probability, then return the worst log-probability. - if (double.IsNaN(cF) || cF <= 0d) - return int.MinValue; + // If the CCDF returns an invalid probability, then return log(0). + if (double.IsNaN(cF) || double.IsInfinity(cF) || cF <= 0d) + return double.NegativeInfinity; return Math.Log(cF); } @@ -106,4 +106,4 @@ public virtual double LogCCDF(double[] x) public abstract MultivariateDistribution Clone(); } -} \ No newline at end of file +} diff --git a/Numerics/Distributions/Multivariate/Dirichlet.cs b/Numerics/Distributions/Multivariate/Dirichlet.cs index c2300aca..5737e92b 100644 --- a/Numerics/Distributions/Multivariate/Dirichlet.cs +++ b/Numerics/Distributions/Multivariate/Dirichlet.cs @@ -282,7 +282,7 @@ public override double PDF(double[] x) /// /// A point on the simplex. Components must be positive and sum to 1. /// - /// The log-density at x. Returns if x is not on the simplex. + /// The log-density at x. Returns if x is not on the simplex. /// /// /// @@ -295,16 +295,16 @@ public override double PDF(double[] x) /// public override double LogPDF(double[] x) { - if (x == null || x.Length != _dimension) return double.MinValue; + if (x == null || x.Length != _dimension) return double.NegativeInfinity; // Check that x is on the simplex double sum = 0; for (int i = 0; i < _dimension; i++) { - if (x[i] <= 0 || x[i] > 1) return double.MinValue; + if (x[i] <= 0 || x[i] > 1) return double.NegativeInfinity; sum += x[i]; } - if (Math.Abs(sum - 1.0) > 1e-10) return double.MinValue; + if (Math.Abs(sum - 1.0) > 1e-10) return double.NegativeInfinity; double logPdf = -_logNormalization; for (int i = 0; i < _dimension; i++) diff --git a/Numerics/Distributions/Multivariate/Multinomial.cs b/Numerics/Distributions/Multivariate/Multinomial.cs index 31d546ee..ff8019e6 100644 --- a/Numerics/Distributions/Multivariate/Multinomial.cs +++ b/Numerics/Distributions/Multivariate/Multinomial.cs @@ -204,20 +204,20 @@ public override double PDF(double[] x) /// Computes the log of the probability mass function. /// /// The count vector. Each xᵢ must be a non-negative integer and Σxᵢ = N. - /// The log probability. Returns if x is not valid. + /// The log probability. Returns if x is not valid. public double LogPMF(double[] x) { - if (x == null || x.Length != _dimension) return double.MinValue; + if (x == null || x.Length != _dimension) return double.NegativeInfinity; // Validate: all non-negative integers that sum to N int sum = 0; for (int i = 0; i < _dimension; i++) { int xi = (int)Math.Round(x[i]); - if (xi < 0 || Math.Abs(x[i] - xi) > 1e-10) return double.MinValue; + if (xi < 0 || Math.Abs(x[i] - xi) > 1e-10) return double.NegativeInfinity; sum += xi; } - if (sum != _n) return double.MinValue; + if (sum != _n) return double.NegativeInfinity; // Compute in log-space: log(N!) - sum(log(xi!)) + sum(xi * log(pi)) double logPmf = Factorial.LogFactorial(_n); @@ -227,7 +227,7 @@ public double LogPMF(double[] x) logPmf -= Factorial.LogFactorial(xi); if (xi > 0) { - if (_p[i] <= 0) return double.MinValue; + if (_p[i] <= 0) return double.NegativeInfinity; logPmf += xi * Math.Log(_p[i]); } } diff --git a/Numerics/Distributions/Multivariate/MultivariateNormal.cs b/Numerics/Distributions/Multivariate/MultivariateNormal.cs index 920108ad..90cedc82 100644 --- a/Numerics/Distributions/Multivariate/MultivariateNormal.cs +++ b/Numerics/Distributions/Multivariate/MultivariateNormal.cs @@ -338,7 +338,7 @@ public override double PDF(double[] x) public override double LogPDF(double[] x) { double f = -0.5d * Mahalanobis(x) + _lnconstant; - if (double.IsNaN(f) || double.IsInfinity(f)) return double.MinValue; + if (double.IsNaN(f) || double.IsInfinity(f)) return double.NegativeInfinity; return f; } @@ -2149,4 +2149,4 @@ public override MultivariateDistribution Clone() } } -} \ No newline at end of file +} diff --git a/Numerics/Distributions/Multivariate/MultivariateStudentT.cs b/Numerics/Distributions/Multivariate/MultivariateStudentT.cs index 66ce1031..5282f53a 100644 --- a/Numerics/Distributions/Multivariate/MultivariateStudentT.cs +++ b/Numerics/Distributions/Multivariate/MultivariateStudentT.cs @@ -400,7 +400,7 @@ public override double LogPDF(double[] x) { double Q = Mahalanobis(x); double f = _lnconstant - ((_degreesOfFreedom + _dimension) / 2.0) * Math.Log(1.0 + Q / _degreesOfFreedom); - if (double.IsNaN(f) || double.IsInfinity(f)) return double.MinValue; + if (double.IsNaN(f) || double.IsInfinity(f)) return double.NegativeInfinity; return f; } @@ -705,4 +705,4 @@ public override MultivariateDistribution Clone() } } -} \ No newline at end of file +} diff --git a/Numerics/Distributions/Univariate/Base/UnivariateDistributionBase.cs b/Numerics/Distributions/Univariate/Base/UnivariateDistributionBase.cs index 7c744f60..871e6992 100644 --- a/Numerics/Distributions/Univariate/Base/UnivariateDistributionBase.cs +++ b/Numerics/Distributions/Univariate/Base/UnivariateDistributionBase.cs @@ -150,7 +150,7 @@ public double LogLikelihood(IList sample) { logLH += LogPDF(sample[i]); } - if (double.IsNaN(logLH) || double.IsInfinity(logLH)) return double.MinValue; + if (double.IsNaN(logLH) || double.IsInfinity(logLH)) return double.NegativeInfinity; return logLH; } @@ -204,6 +204,8 @@ public double LogLikelihood_Intervals(double lowerLimit, double upperLimit) public virtual double LogPDF(double x) { double f = PDF(x); + if (double.IsNaN(f) || double.IsInfinity(f) || f <= 0.0) + return double.NegativeInfinity; return Math.Log(f); } @@ -220,6 +222,8 @@ public virtual double HF(double x) public virtual double LogCDF(double x) { double F = CDF(x); + if (double.IsNaN(F) || double.IsInfinity(F) || F <= 0.0) + return double.NegativeInfinity; return Math.Log(F); } @@ -233,6 +237,8 @@ public virtual double CCDF(double x) public virtual double LogCCDF(double x) { double cF = CCDF(x); + if (double.IsNaN(cF) || double.IsInfinity(cF) || cF <= 0.0) + return double.NegativeInfinity; return Math.Log(cF); } diff --git a/Numerics/Distributions/Univariate/CompetingRisks.cs b/Numerics/Distributions/Univariate/CompetingRisks.cs index 6627eaf8..a963b188 100644 --- a/Numerics/Distributions/Univariate/CompetingRisks.cs +++ b/Numerics/Distributions/Univariate/CompetingRisks.cs @@ -58,8 +58,8 @@ public CompetingRisks(IUnivariateDistribution[] distributions) private bool _mvnCreated = false; private MultivariateNormal _mvn = null!; - // Minimum log value to prevent -Infinity in log-likelihood - private const double _logZero = -745.0; // ≈ log(double.MinValue that's positive) + // Soft finite floor used in tail arithmetic before returning the final log-density. + private const double _logZero = -745.0; private const double _minDensity = 1E-300; /// @@ -579,7 +579,7 @@ public override double LogPDF(double x) // For dependent cases, fall back to numerical differentiation // but use a more stable approach double pdf = NumericalDerivative.Derivative(CDF, x); - return pdf > _minDensity ? Math.Log(pdf) : double.MinValue; + return pdf > _minDensity ? Math.Log(pdf) : double.NegativeInfinity; } } @@ -648,7 +648,7 @@ private double LogPDFMinimumIndependent(double x) // Final result: log(f) = log(sum h_i) + sum log(S_i) double logPdf = logSumHazard + sumLogSurvival; - return double.IsNaN(logPdf) || double.IsInfinity(logPdf) ? double.MinValue : logPdf; + return double.IsNaN(logPdf) || double.IsInfinity(logPdf) ? double.NegativeInfinity : logPdf; } /// @@ -714,7 +714,7 @@ private double LogPDFMaximumIndependent(double x) // Final result: log(f) = log(sum f_i/F_i) + sum log(F_i) double logPdf = logSumRatio + sumLogCdf; - return double.IsNaN(logPdf) || double.IsInfinity(logPdf) ? double.MinValue : logPdf; + return double.IsNaN(logPdf) || double.IsInfinity(logPdf) ? double.NegativeInfinity : logPdf; } diff --git a/Numerics/Distributions/Univariate/Mixture.cs b/Numerics/Distributions/Univariate/Mixture.cs index f6f311a8..564eef67 100644 --- a/Numerics/Distributions/Univariate/Mixture.cs +++ b/Numerics/Distributions/Univariate/Mixture.cs @@ -672,7 +672,7 @@ double EStep(double[] x) for (int i = 0; i < N; i++) { // Get max likelihood - double max = double.MinValue; + double max = double.NegativeInfinity; for (int k = 0; k < K; k++) { if (likelihood[i, k] > max) @@ -680,6 +680,13 @@ double EStep(double[] x) max = likelihood[i, k]; } } + if (double.IsNegativeInfinity(max)) + { + for (int k = 0; k < K; k++) + likelihood[i, k] = 0.0; + return double.NegativeInfinity; + } + // log-sum-exp trick begins here double sum = 0; for (int k = 0; k < K; k++) @@ -721,7 +728,7 @@ double logLH(double[] x) var dist = (Mixture)Clone(); dist.SetParameters(mleWeights, x); double lh = dist.LogLikelihood(sample); - if (double.IsNaN(lh) || double.IsInfinity(lh)) return double.MinValue; + if (double.IsNaN(lh) || double.IsInfinity(lh)) return double.NegativeInfinity; return lh; } diff --git a/Numerics/Functions/Link Functions/FisherZLink.cs b/Numerics/Functions/Link Functions/FisherZLink.cs new file mode 100644 index 00000000..bbbe4dac --- /dev/null +++ b/Numerics/Functions/Link Functions/FisherZLink.cs @@ -0,0 +1,61 @@ +using System; +using System.Xml.Linq; + +namespace Numerics.Functions +{ + /// + /// Fisher z link function mapping correlations from (-1, 1) to the unconstrained real line. + /// + /// + /// + /// Domain: -1 < x < 1. h(x) = atanh(x), h⁻¹(η) = tanh(η), + /// h′(x) = 1 / (1 - x^2). + /// + /// + /// This link is commonly used for correlation parameters and other signed bounded parameters. + /// + /// + /// Authors: + /// Haden Smith, USACE Risk Management Center, cole.h.smith@usace.army.mil + /// + /// + [Serializable] + public sealed class FisherZLink : ILinkFunction + { + /// + /// Thrown when is not in (-1, 1). + public double Link(double x) + { + if (!Tools.IsFinite(x) || x <= -1d || x >= 1d) + throw new ArgumentOutOfRangeException(nameof(x), "FisherZLink requires -1 < x < 1."); + + return 0.5d * Math.Log((1d + x) / (1d - x)); + } + + /// + public double InverseLink(double eta) + { + if (eta >= 0d) + { + double e = Math.Exp(-2d * eta); + return (1d - e) / (1d + e); + } + + double expEta = Math.Exp(2d * eta); + return (expEta - 1d) / (expEta + 1d); + } + + /// + /// Thrown when is not in (-1, 1). + public double DLink(double x) + { + if (!Tools.IsFinite(x) || x <= -1d || x >= 1d) + throw new ArgumentOutOfRangeException(nameof(x), "FisherZLink derivative requires -1 < x < 1."); + + return 1d / (1d - x * x); + } + + /// + public XElement ToXElement() => new XElement(nameof(FisherZLink)); + } +} diff --git a/Numerics/Functions/Link Functions/LinkController.cs b/Numerics/Functions/Link Functions/LinkController.cs index 94bc6712..0c3b6d89 100644 --- a/Numerics/Functions/Link Functions/LinkController.cs +++ b/Numerics/Functions/Link Functions/LinkController.cs @@ -1,4 +1,5 @@ using System; +using System.Globalization; using System.Linq; using System.Xml.Linq; using Numerics.Mathematics.LinearAlgebra; @@ -73,7 +74,7 @@ public LinkController(XElement xElement) { var indexAttr = slot.Attribute("Index"); if (indexAttr == null) continue; - if (!int.TryParse(indexAttr.Value, out int index)) continue; + if (!int.TryParse(indexAttr.Value, NumberStyles.Integer, CultureInfo.InvariantCulture, out int index)) continue; if (index < 0 || index >= Links.Length) continue; var child = slot.Elements().FirstOrDefault(); if (child != null) diff --git a/Numerics/Functions/Link Functions/LinkFunctionFactory.cs b/Numerics/Functions/Link Functions/LinkFunctionFactory.cs index 4413a037..b932dc2e 100644 --- a/Numerics/Functions/Link Functions/LinkFunctionFactory.cs +++ b/Numerics/Functions/Link Functions/LinkFunctionFactory.cs @@ -35,6 +35,10 @@ public static ILinkFunction Create(LinkFunctionType type) return new ProbitLink(); case LinkFunctionType.ComplementaryLogLog: return new ComplementaryLogLogLink(); + case LinkFunctionType.YeoJohnson: + return new YeoJohnsonLink(); + case LinkFunctionType.FisherZ: + return new FisherZLink(); default: throw new ArgumentOutOfRangeException(nameof(type), $"Unknown link function type: {type}."); } @@ -63,6 +67,10 @@ public static ILinkFunction CreateFromXElement(XElement xElement) return new ProbitLink(); case nameof(ComplementaryLogLogLink): return new ComplementaryLogLogLink(); + case nameof(YeoJohnsonLink): + return new YeoJohnsonLink(xElement); + case nameof(FisherZLink): + return new FisherZLink(); default: throw new NotSupportedException($"Unknown link function type: '{xElement.Name.LocalName}'."); } diff --git a/Numerics/Functions/Link Functions/LinkFunctionType.cs b/Numerics/Functions/Link Functions/LinkFunctionType.cs index 5b58bdcf..56eb341f 100644 --- a/Numerics/Functions/Link Functions/LinkFunctionType.cs +++ b/Numerics/Functions/Link Functions/LinkFunctionType.cs @@ -39,6 +39,16 @@ public enum LinkFunctionType /// /// Complementary log-log link: η = log(−log(1 − x)). Used for asymmetric binary response models. /// - ComplementaryLogLog + ComplementaryLogLog, + + /// + /// Yeo-Johnson power-transformation link function for real-valued skew or shape parameters. + /// + YeoJohnson, + + /// + /// Fisher z link: η = atanh(x), commonly used for correlation parameters. + /// + FisherZ } } diff --git a/Numerics/Functions/Link Functions/YeoJohnsonLink.cs b/Numerics/Functions/Link Functions/YeoJohnsonLink.cs new file mode 100644 index 00000000..61b9bdec --- /dev/null +++ b/Numerics/Functions/Link Functions/YeoJohnsonLink.cs @@ -0,0 +1,128 @@ +using Numerics.Data.Statistics; +using System; +using System.Globalization; +using System.Xml.Linq; + +namespace Numerics.Functions +{ + /// + /// Yeo-Johnson power-transformation link function. + /// + /// + /// + /// The Yeo-Johnson transformation is defined on the full real line. It is useful + /// for skew or shape parameters that do not have a closed-form + /// variance-stabilizing link. + /// + /// + /// The transformation parameter lambda is restricted to the range used by + /// , -5 <= lambda <= 5. + /// + /// + /// Authors: + /// Haden Smith, USACE Risk Management Center, cole.h.smith@usace.army.mil + /// + /// + [Serializable] + public sealed class YeoJohnsonLink : ILinkFunction + { + private const double MinimumLambda = -5d; + private const double MaximumLambda = 5d; + + /// + /// Initializes a new instance of the class with lambda equal to 1. + /// + public YeoJohnsonLink() + { + Lambda = 1d; + } + + /// + /// Initializes a new instance of the class. + /// + /// The Yeo-Johnson transformation parameter. + /// Thrown when is not finite or is outside [-5, 5]. + public YeoJohnsonLink(double lambda) + { + Lambda = ValidateLambda(lambda, nameof(lambda)); + } + + /// + /// Initializes a new instance of the class by estimating lambda from representative values. + /// + /// Representative values used to estimate lambda. + /// Thrown when is null. + /// Thrown when fewer than two finite values are supplied or lambda fitting fails. + public YeoJohnsonLink(double[] values) + { + if (values == null) + throw new ArgumentNullException(nameof(values)); + if (values.Length < 2) + throw new ArgumentException("At least 2 values are required to fit lambda.", nameof(values)); + for (int i = 0; i < values.Length; i++) + if (!Tools.IsFinite(values[i])) + throw new ArgumentException("Every representative value must be finite.", nameof(values)); + + Lambda = ValidateLambda(YeoJohnson.FitLambda(values), nameof(values)); + } + + /// + /// Initializes a new instance of the class from its XML representation. + /// + /// The XML element to read. + /// Thrown when is null. + /// Thrown when the element does not contain a Lambda attribute. + public YeoJohnsonLink(XElement xElement) + { + if (xElement == null) + throw new ArgumentNullException(nameof(xElement)); + + XAttribute? lambdaAttribute = xElement.Attribute("Lambda"); + if (lambdaAttribute == null) + throw new ArgumentException("The YeoJohnsonLink element must contain a Lambda attribute.", nameof(xElement)); + + Lambda = ValidateLambda(double.Parse(lambdaAttribute.Value, NumberStyles.Any, CultureInfo.InvariantCulture), "Lambda"); + } + + /// + /// Gets the Yeo-Johnson transformation parameter. + /// + public double Lambda { get; } + + /// + public double Link(double x) + { + return YeoJohnson.Transform(x, Lambda); + } + + /// + public double InverseLink(double eta) + { + return YeoJohnson.InverseTransform(eta, Lambda); + } + + /// + public double DLink(double x) + { + return YeoJohnson.Derivative(x, Lambda); + } + + /// + public XElement ToXElement() => new XElement(nameof(YeoJohnsonLink), new XAttribute("Lambda", Lambda.ToString("G17", CultureInfo.InvariantCulture))); + + /// + /// Validates a Yeo-Johnson lambda value. + /// + /// The lambda value to validate. + /// The parameter name to use in the exception. + /// The validated lambda value. + /// Thrown when is not finite or is outside [-5, 5]. + private static double ValidateLambda(double lambda, string paramName) + { + if (!Tools.IsFinite(lambda) || lambda < MinimumLambda || lambda > MaximumLambda) + throw new ArgumentOutOfRangeException(paramName, "Lambda must be finite and in the range [-5, 5]."); + + return lambda; + } + } +} diff --git a/Numerics/Sampling/Bootstrap/Bootstrap.cs b/Numerics/Sampling/Bootstrap/Bootstrap.cs index 8c1c1ba9..51c6402c 100644 --- a/Numerics/Sampling/Bootstrap/Bootstrap.cs +++ b/Numerics/Sampling/Bootstrap/Bootstrap.cs @@ -1,7 +1,11 @@ using Numerics.Data.Statistics; using Numerics.Distributions; +using Numerics.Functions; +using Numerics.Mathematics.LinearAlgebra; using Numerics.Mathematics.Optimization; using System; +using System.Collections.Generic; +using System.Diagnostics; using System.Linq; using System.Threading; using System.Threading.Tasks; @@ -11,7 +15,7 @@ namespace Numerics.Sampling /// /// A general-purpose bootstrap class for parametric or non-parametric bootstrap analysis. - /// Supports all major confidence interval methods: Percentile, Bias-Corrected, BCa, Normal, and Bootstrap-t. + /// Supports Percentile, Bias-Corrected, BCa, Normal, Bootstrap-t, and covariance-aware pivotal bootstrap workflows. /// /// The type of data being bootstrapped. /// @@ -20,16 +24,21 @@ namespace Numerics.Sampling /// Haden Smith, USACE Risk Management Center, cole.h.smith@usace.army.mil /// /// + /// The regular bootstrap methods use and require . + /// The pivotal bootstrap is a separate covariance-aware run mode that uses + /// and . The two modes share + /// result storage, but their required delegates and supported confidence interval methods are validated separately. + /// + /// /// /// /// public class Bootstrap { - #region Construction /// - /// Constructs a new bootstrap analysis. + /// Constructs a new regular bootstrap analysis. /// /// The original data. /// The original fitted parameter set. @@ -39,20 +48,75 @@ public Bootstrap(TData originalData, ParameterSet originalParameters) _originalParameters = originalParameters; } + /// + /// Constructs a new covariance-aware bootstrap analysis. + /// + /// The original data. + /// The original fitted parameter set and covariance matrix. + /// Thrown when is null. + public Bootstrap(TData originalData, BootstrapFit originalFit) + { + if (originalFit == null) + throw new ArgumentNullException(nameof(originalFit)); + + _originalData = originalData; + _originalParameters = originalFit.Parameters.Clone(); + _originalCovariance = originalFit.Covariance.Clone(); + } + #endregion #region Members - private TData _originalData; + /// + /// Identifies the workflow that produced the active bootstrap results. + /// + private enum BootstrapRunType + { + /// + /// No bootstrap workflow has completed. + /// + None, + + /// + /// Results came from . + /// + Regular, + + /// + /// Results came from . + /// + DoubleBootstrap, + + /// + /// Results came from . + /// + Studentized, + + /// + /// Results came from or . + /// + Pivotal + } + + private readonly TData _originalData; private ParameterSet _originalParameters; + private Matrix? _originalCovariance; private ParameterSet[] _bootstrapParameterSets = null!; private double[,] _bootstrapStatistics = null!; + private ParameterSet[] _rawBootstrapParameterSets = Array.Empty(); + private double[,]? _rawBootstrapStatistics; + private BootstrapFit[] _rawBootstrapFits = Array.Empty(); + private ILinkFunction?[] _pivotalLinks = Array.Empty(); + private PivotalBootstrapDiagnostics? _pivotalDiagnostics; + private double[]? _parentStatistics; private int _numStats; private int _numParams; private int _failedCount; private bool[] _validFlags = null!; private double[,]? _studentizedValues; private double[,]? _transformedStatistics; + private BootstrapRunType _runType; #endregion @@ -65,18 +129,24 @@ public Bootstrap(TData originalData, ParameterSet originalParameters) /// /// Delegate function for fitting a model to data and returning a parameter set. + /// Required by regular bootstrap methods. /// public Func? FitFunction { get; set; } + /// + /// Delegate function for fitting a model to data and returning a parameter set plus covariance matrix. + /// Required by . + /// + public Func? FitWithCovarianceFunction { get; set; } + /// /// Delegate function for extracting statistics from a fitted parameter set. - /// Returns an array of statistic values (e.g., quantiles at multiple probabilities). + /// Required by regular bootstrap methods and optional for pivotal bootstrap. /// public Func? StatisticFunction { get; set; } /// /// Optional delegate for computing a leave-one-out jackknife sample. - /// Takes the original data and the index of the observation to remove. /// Required for the BCa confidence interval method. /// public Func? JackknifeFunction { get; set; } @@ -89,13 +159,13 @@ public Bootstrap(TData originalData, ParameterSet originalParameters) /// /// Optional transform applied to statistic values before Normal and Bootstrap-t CI computation. - /// Default is cube-root: x → x^(1/3). Set to null for no transform. + /// Default is cube-root. /// public Func Transform { get; set; } = x => Math.Pow(x, 1d / 3d); /// - /// Optional inverse transform corresponding to Transform. - /// Default is cube: x → x^3. Set to null for no transform. + /// Optional inverse transform corresponding to . + /// Default is cube. /// public Func InverseTransform { get; set; } = x => Math.Pow(x, 3d); @@ -120,17 +190,95 @@ public Bootstrap(TData originalData, ParameterSet originalParameters) public int InnerReplicates { get; set; } = 300; /// - /// Gets the bootstrapped model parameter sets. + /// Gets or sets the original covariance matrix used by the pivotal bootstrap. + /// + public Matrix? OriginalCovariance + { + get => _originalCovariance?.Clone(); + set => _originalCovariance = value?.Clone(); + } + + /// + /// Gets or sets a factory that returns one optional per fitted parameter for pivotal bootstrap. + /// Null link entries are treated as identity links by . + /// + public Func? PivotalLinkFactory { get; set; } + + /// + /// Gets or sets an optional filter applied to valid raw covariance-aware bootstrap fits before pivotal transformation. + /// + public Func? PivotalReplicateFilter { get; set; } + + /// + /// Gets or sets an optional validator applied to transformed pivotal parameter values. + /// + public Func? PivotalParameterValidator { get; set; } + + /// + /// Gets or sets the policy used when a pivotal draw is invalid. Default is . + /// + public PivotalBootstrapInvalidDrawPolicy PivotalInvalidDrawPolicy { get; set; } = PivotalBootstrapInvalidDrawPolicy.Drop; + + /// + /// Gets or sets a value indicating whether pivotal bootstrap covariance matrices should be regularized before Cholesky decomposition. + /// + public bool RegularizePivotalCovariances { get; set; } = true; + + /// + /// Gets or sets an optional absolute component limit for the standardized pivotal vector. + /// + public double? PivotalZLimit { get; set; } + + /// + /// Gets or sets a value indicating whether Gaussian jitter should be added to pivotal vectors before the optional limit check. + /// + public bool AddPivotalJitter { get; set; } + + /// + /// Gets or sets the base standard deviation of optional Gaussian jitter. The applied scale is this value divided by sqrt(parameter count). + /// + public double PivotalJitterScale { get; set; } = 0.01d; + + /// + /// Gets the active bootstrapped model parameter sets. + /// For pivotal bootstrap, these are the retained pivotal parameter draws. /// public ParameterSet[] BootstrapParameterSets => _bootstrapParameterSets; /// - /// Gets the bootstrapped statistics as a 2D array [replicates, statistics]. + /// Gets the active bootstrapped statistics as a 2D array [replicate, statistic]. + /// For pivotal bootstrap, these are pivotal statistics when was supplied. /// public double[,] BootstrapStatistics => _bootstrapStatistics; + /// + /// Gets the accepted raw covariance-aware bootstrap fits from the most recent pivotal bootstrap run. + /// + public BootstrapFit[] RawBootstrapFits => _rawBootstrapFits; + + /// + /// Gets the accepted raw covariance-aware bootstrap parameter sets from the most recent pivotal bootstrap run. + /// + public ParameterSet[] RawBootstrapParameterSets => _rawBootstrapParameterSets; + + /// + /// Gets the accepted raw bootstrap statistics from the most recent pivotal bootstrap run, when was supplied. + /// + public double[,]? RawBootstrapStatistics => _rawBootstrapStatistics; + + /// + /// Gets the pivotal link functions used by the most recent pivotal bootstrap run. + /// + public ILinkFunction?[] PivotalLinks => (ILinkFunction?[])_pivotalLinks.Clone(); + + /// + /// Gets diagnostics from the most recent pivotal bootstrap run. + /// + public PivotalBootstrapDiagnostics? PivotalDiagnostics => _pivotalDiagnostics; + /// /// Gets the number of replicates that failed after all retries. + /// For pivotal bootstrap, this is the raw covariance-aware fit failure count. /// public int FailedReplicates => _failedCount; @@ -139,15 +287,19 @@ public Bootstrap(TData originalData, ParameterSet originalParameters) #region Run Methods /// - /// Runs the basic bootstrap procedure with error handling and retry logic. + /// Runs the regular bootstrap procedure with error handling and retry logic. /// + /// Thrown when a required regular-bootstrap delegate has not been set. + /// Thrown when or is not positive. public void Run() { ValidateCoreDelegates(); + ValidateReplicationSettings(); var resample = ResampleFunction!; var fit = FitFunction!; var statistic = StatisticFunction!; InitializeState(); + ResetPivotalState(); var prng = new MersenneTwister(PRNGSeed); var seeds = prng.NextIntegers(Replicates); @@ -162,19 +314,9 @@ public void Run() var rng = new MersenneTwister(seeds[idx] + 10 * retry); var sample = resample(_originalData, _originalParameters, rng); var fitResult = fit(sample); - var stat = statistic(fitResult); - - // Validate: check for NaN in statistics - bool hasNaN = false; - for (int k = 0; k < _numStats; k++) - { - if (double.IsNaN(stat[k]) || double.IsInfinity(stat[k])) - { - hasNaN = true; - break; - } - } - if (hasNaN) continue; + if (!HasExpectedFiniteParameterValues(fitResult, _numParams)) + continue; + var stat = ValidateStatistics(statistic(fitResult), _numStats); _bootstrapParameterSets[idx] = fitResult; for (int k = 0; k < _numStats; k++) @@ -184,29 +326,36 @@ public void Run() } catch (Exception) { - // retry + // Retry failed regular bootstrap replicates. } if (succeeded) break; } if (!succeeded) - { MarkFailed(idx); - } }); + + _runType = BootstrapRunType.Regular; } /// /// Runs the double bootstrap procedure with bias correction. /// /// Number of inner bootstrap replicates. Default = 300. + /// Thrown when a required regular-bootstrap delegate has not been set. + /// Thrown when replicate counts are not positive. public void RunDoubleBootstrap(int innerReplicates = 300) { ValidateCoreDelegates(); + ValidateReplicationSettings(); + if (innerReplicates < 1) + throw new ArgumentOutOfRangeException(nameof(innerReplicates), "The number of inner replicates must be positive."); + var resample = ResampleFunction!; var fit = FitFunction!; var statistic = StatisticFunction!; InitializeState(); + ResetPivotalState(); var prng = new MersenneTwister(PRNGSeed); var seeds = prng.NextIntegers(Replicates); @@ -220,13 +369,13 @@ public void RunDoubleBootstrap(int innerReplicates = 300) { var rng = new MersenneTwister(seeds[idx] + 10 * retry); - // Outer bootstrap var outerSample = resample(_originalData, _originalParameters, rng); var outerFit = fit(outerSample); - var outerStat = statistic(outerFit); + if (!HasExpectedFiniteParameterValues(outerFit, _numParams)) + continue; + var outerStat = ValidateStatistics(statistic(outerFit), _numStats); - // Inner bootstrap for bias estimation - int p = outerFit.Values.Length; + int p = _numParams; var parmsInnerSum = new double[p]; var statsInnerSum = new double[_numStats]; int validInner = 0; @@ -237,7 +386,9 @@ public void RunDoubleBootstrap(int innerReplicates = 300) { var innerSample = resample(outerSample, outerFit, rng); var innerFit = fit(innerSample); - var innerStat = statistic(innerFit); + if (!HasExpectedFiniteParameterValues(innerFit, p)) + continue; + var innerStat = ValidateStatistics(statistic(innerFit), _numStats); for (int i = 0; i < p; i++) parmsInnerSum[i] += innerFit.Values[i]; @@ -247,13 +398,12 @@ public void RunDoubleBootstrap(int innerReplicates = 300) } catch (Exception) { - // skip failed inner replicate + // Skip failed inner replicate. } } if (validInner == 0) continue; - // Bias-correct parameters var biasCorrectedParms = new double[p]; for (int i = 0; i < p; i++) { @@ -261,7 +411,6 @@ public void RunDoubleBootstrap(int innerReplicates = 300) biasCorrectedParms[i] = outerFit.Values[i] - (innerMean - outerFit.Values[i]); } - // Bias-correct statistics var biasCorrectedStats = new double[_numStats]; for (int i = 0; i < _numStats; i++) { @@ -269,7 +418,7 @@ public void RunDoubleBootstrap(int innerReplicates = 300) biasCorrectedStats[i] = outerStat[i] - (innerMean - outerStat[i]); } - _bootstrapParameterSets[idx] = new ParameterSet(biasCorrectedParms, outerFit.Fitness); + _bootstrapParameterSets[idx] = new ParameterSet(biasCorrectedParms, outerFit.Fitness, outerFit.Weight); for (int k = 0; k < _numStats; k++) _bootstrapStatistics[idx, k] = biasCorrectedStats[k]; _validFlags[idx] = true; @@ -277,34 +426,41 @@ public void RunDoubleBootstrap(int innerReplicates = 300) } catch (Exception) { - // retry + // Retry failed outer replicate. } if (succeeded) break; } if (!succeeded) - { MarkFailed(idx); - } }); + + _runType = BootstrapRunType.DoubleBootstrap; } /// - /// Runs the bootstrap procedure with nested inner bootstrap for studentized (Bootstrap-t) confidence intervals. - /// Must be called before requesting Bootstrap-t CIs. + /// Runs the regular bootstrap procedure with nested inner bootstrap for studentized Bootstrap-t confidence intervals. /// + /// Thrown when a required regular-bootstrap delegate has not been set. + /// Thrown when replicate counts are not positive. public void RunWithStudentizedBootstrap() { ValidateCoreDelegates(); + ValidateReplicationSettings(); + if (InnerReplicates < 1) + throw new ArgumentOutOfRangeException(nameof(InnerReplicates), "The number of inner replicates must be positive."); + var resample = ResampleFunction!; var fitFunc = FitFunction!; var statistic = StatisticFunction!; + ResetPivotalState(); var originalStats = statistic(_originalParameters); + ValidateStatistics(originalStats); + _parentStatistics = (double[])originalStats.Clone(); _numStats = originalStats.Length; _numParams = _originalParameters.Values.Length; - // Apply transform to population statistics var popTransformed = new double[_numStats]; for (int i = 0; i < _numStats; i++) popTransformed[i] = ApplyTransform(originalStats[i]); @@ -331,18 +487,18 @@ public void RunWithStudentizedBootstrap() var rng = new MersenneTwister(seeds[idx] + 10 * retry); var sample = resample(_originalData, _originalParameters, rng); var outerFit = fitFunc(sample); - var outerStats = statistic(outerFit); + if (!HasExpectedFiniteParameterValues(outerFit, _numParams)) + continue; + var outerStats = ValidateStatistics(statistic(outerFit), _numStats); _bootstrapParameterSets[idx] = outerFit; for (int k = 0; k < _numStats; k++) _bootstrapStatistics[idx, k] = outerStats[k]; - // Transform outer statistics var outerTransformed = new double[_numStats]; for (int j = 0; j < _numStats; j++) outerTransformed[j] = ApplyTransform(outerStats[j]); - // Inner bootstrap for SE estimation var innerPrng = new MersenneTwister(seeds[idx]); var innerSeeds = innerPrng.NextIntegers(InnerReplicates); var innerTransformed = new double[InnerReplicates, _numStats]; @@ -354,7 +510,14 @@ public void RunWithStudentizedBootstrap() { var innerSample = resample(sample, outerFit, new MersenneTwister(innerSeeds[k])); var innerFit = fitFunc(innerSample); - var innerStats = statistic(innerFit); + if (!HasExpectedFiniteParameterValues(innerFit, _numParams)) + { + for (int j = 0; j < _numStats; j++) + innerTransformed[k, j] = double.NaN; + continue; + } + var innerStats = ValidateStatistics(statistic(innerFit), _numStats); + for (int j = 0; j < _numStats; j++) innerTransformed[k, j] = ApplyTransform(innerStats[j]); validInner++; @@ -368,11 +531,10 @@ public void RunWithStudentizedBootstrap() if (validInner < 2) continue; - // Compute inner SE per statistic and studentized values for (int j = 0; j < _numStats; j++) { var col = innerTransformed.GetColumn(j); - var validCol = col.Where(x => !double.IsNaN(x) && !double.IsInfinity(x)).ToArray(); + var validCol = col.Where(Tools.IsFinite).ToArray(); double se = validCol.Length > 1 ? Statistics.StandardDeviation(validCol) : double.NaN; transformedStatistics[idx, j] = outerTransformed[j]; studentizedValues[idx, j] = se > 0 ? (popTransformed[j] - outerTransformed[j]) / se : double.NaN; @@ -383,7 +545,7 @@ public void RunWithStudentizedBootstrap() } catch (Exception) { - // retry + // Retry failed outer replicate. } if (succeeded) break; } @@ -398,6 +560,310 @@ public void RunWithStudentizedBootstrap() } } }); + + _runType = BootstrapRunType.Studentized; + } + + #endregion + + #region Pivotal Bootstrap + + /// + /// Runs the covariance-aware pivotal bootstrap as a distinct bootstrap mode. + /// + /// + /// Thrown when , , or + /// has not been supplied. + /// + /// Thrown when or is not positive. + public void RunPivotalBootstrap() + { + ValidatePivotalDelegates(); + ValidateReplicationSettings(); + + var parentFit = CreateOriginalFit(); + var resample = ResampleFunction!; + var fit = FitWithCovarianceFunction!; + + var stopwatch = Stopwatch.StartNew(); + var prng = new MersenneTwister(PRNGSeed); + var seeds = prng.NextIntegers(Replicates); + var rawFits = new BootstrapFit?[Replicates]; + int failed = 0; + + Parallel.For(0, Replicates, index => + { + bool succeeded = false; + for (int retry = 0; retry < MaxRetries; retry++) + { + try + { + var rng = new MersenneTwister(seeds[index] + 10 * retry); + TData sample = resample(_originalData, parentFit.Parameters, rng); + BootstrapFit rawFit = fit(sample); + if (!IsValidFit(rawFit, parentFit.ParameterCount)) + continue; + + rawFits[index] = rawFit; + succeeded = true; + } + catch (Exception) + { + // Retry failed pivotal bootstrap resamples or covariance-aware fits. + } + + if (succeeded) + break; + } + + if (!succeeded) + Interlocked.Increment(ref failed); + }); + + stopwatch.Stop(); + + var acceptedRawFits = rawFits.Where(f => f != null).Select(f => f!).ToArray(); + TransformPivotalBootstrap(acceptedRawFits, Replicates, failed, stopwatch.Elapsed); + } + + /// + /// Transforms precomputed raw covariance-aware bootstrap fits into pivotal bootstrap draws. + /// + /// Raw bootstrap fits and covariances to transform. + /// Thrown when is null. + /// Thrown when no valid raw fits are accepted. + public void TransformPivotalBootstrap(IEnumerable rawFits) + { + if (rawFits == null) + throw new ArgumentNullException(nameof(rawFits)); + + BootstrapFit[] suppliedFits = rawFits.ToArray(); + TransformPivotalBootstrap(suppliedFits, suppliedFits.Length, 0, TimeSpan.Zero); + } + + /// + /// Applies the pivotal transformation to raw fits and stores the resulting active bootstrap ensemble. + /// + /// Raw bootstrap fits supplied to the transformation. + /// The number of raw replicates requested or supplied. + /// The number of raw replicates that failed before transformation. + /// The elapsed raw resampling and fitting time. + /// Thrown when no valid raw fits are accepted. + private void TransformPivotalBootstrap(BootstrapFit[] rawFits, int requestedReplicates, int failedRawReplicates, TimeSpan resamplingTime) + { + BootstrapFit parentFit = CreateOriginalFit(); + int p = parentFit.ParameterCount; + var stopwatch = Stopwatch.StartNew(); + + BootstrapFit[] acceptedRawFits = rawFits + .Where(fit => IsAcceptedRawFit(fit, p)) + .ToArray(); + + if (acceptedRawFits.Length == 0) + throw new InvalidOperationException("No valid raw bootstrap fits were supplied for pivotal transformation."); + + ILinkFunction?[] links = CreatePivotalLinks(parentFit, acceptedRawFits); + var linkController = new LinkController(links); + double[] parentEta = linkController.Link(parentFit.Parameters.Values); + ValidateTransformedValues(parentEta, "The parent link transformation produced a non-finite value."); + + Matrix parentLinkCovariance = LinkCovariance(parentFit, linkController); + var parentCholesky = new CholeskyDecomposition(parentLinkCovariance); + var pivotalParameterSets = new List(acceptedRawFits.Length); + var jitterRng = new MersenneTwister(PRNGSeed); + int invalid = 0; + + for (int i = 0; i < acceptedRawFits.Length; i++) + { + BootstrapFit rawFit = acceptedRawFits[i]; + if (TryCreatePivotalDraw(rawFit, parentFit, linkController, parentEta, parentCholesky, jitterRng, out ParameterSet pivotalParameters)) + { + pivotalParameterSets.Add(pivotalParameters); + } + else + { + invalid++; + if (TryApplyInvalidPolicy(rawFit, parentFit, out ParameterSet fallbackParameters)) + pivotalParameterSets.Add(fallbackParameters); + } + } + + stopwatch.Stop(); + + _rawBootstrapFits = acceptedRawFits; + _rawBootstrapParameterSets = acceptedRawFits.Select(f => f.Parameters.Clone()).ToArray(); + _bootstrapParameterSets = pivotalParameterSets.ToArray(); + _numParams = p; + _failedCount = failedRawReplicates; + _validFlags = Enumerable.Repeat(true, _bootstrapParameterSets.Length).ToArray(); + _studentizedValues = null; + _transformedStatistics = null; + _pivotalLinks = links; + _pivotalDiagnostics = new PivotalBootstrapDiagnostics + { + RequestedReplicates = requestedReplicates, + FailedRawReplicates = failedRawReplicates, + RejectedRawReplicates = rawFits.Length - acceptedRawFits.Length, + AcceptedRawReplicates = acceptedRawFits.Length, + InvalidPivotalReplicates = invalid, + RetainedPivotalReplicates = _bootstrapParameterSets.Length, + ResamplingTime = resamplingTime, + TransformationTime = stopwatch.Elapsed + }; + + if (StatisticFunction != null) + { + _parentStatistics = ValidateStatistics(StatisticFunction(parentFit.Parameters)); + _numStats = _parentStatistics.Length; + _rawBootstrapStatistics = ComputeStatistics(_rawBootstrapParameterSets, StatisticFunction); + _bootstrapStatistics = ComputeStatistics(_bootstrapParameterSets, StatisticFunction); + } + else + { + _parentStatistics = Array.Empty(); + _numStats = 0; + _rawBootstrapStatistics = new double[_rawBootstrapParameterSets.Length, 0]; + _bootstrapStatistics = new double[_bootstrapParameterSets.Length, 0]; + } + + _runType = BootstrapRunType.Pivotal; + } + + /// + /// Attempts to create one pivotal draw from an accepted raw fit. + /// + /// The accepted raw bootstrap fit. + /// The parent bootstrap fit. + /// The parameter link controller. + /// The parent parameters in link space. + /// The Cholesky decomposition of the parent link-space covariance. + /// The random-number generator used for optional jitter. + /// The created pivotal parameters when the method returns true. + /// True when a finite and valid pivotal draw is created; otherwise false. + private bool TryCreatePivotalDraw( + BootstrapFit rawFit, + BootstrapFit parentFit, + LinkController linkController, + double[] parentEta, + CholeskyDecomposition parentCholesky, + Random jitterRng, + out ParameterSet pivotalParameters) + { + pivotalParameters = default; + try + { + double[] rawEta = linkController.Link(rawFit.Parameters.Values); + ValidateTransformedValues(rawEta, "The raw link transformation produced a non-finite value."); + + Matrix rawLinkCovariance = LinkCovariance(rawFit, linkController); + var rawCholesky = new CholeskyDecomposition(rawLinkCovariance); + var difference = new double[parentEta.Length]; + for (int j = 0; j < difference.Length; j++) + difference[j] = parentEta[j] - rawEta[j]; + + double[] z = rawCholesky.Forward(new Vector(difference)).ToArray(); + if (!ContainsOnlyFiniteValues(z)) + return false; + + if (AddPivotalJitter && PivotalJitterScale > 0d) + { + double jitterScale = PivotalJitterScale / Math.Sqrt(Math.Max(1, z.Length)); + for (int j = 0; j < z.Length; j++) + { + double u = Tools.Clamp(jitterRng.NextDouble(), 1e-16, 1d - 1e-16); + z[j] += jitterScale * Normal.StandardZ(u); + } + } + + if (PivotalZLimit.HasValue && z.Any(value => Math.Abs(value) > PivotalZLimit.Value)) + return false; + + double[] reinflated = parentCholesky.L * z; + var pivotalEta = new double[parentEta.Length]; + for (int j = 0; j < pivotalEta.Length; j++) + pivotalEta[j] = parentEta[j] + reinflated[j]; + + double[] pivotalValues = linkController.InverseLink(pivotalEta); + if (!ContainsOnlyFiniteValues(pivotalValues)) + return false; + if (PivotalParameterValidator != null && !PivotalParameterValidator(pivotalValues)) + return false; + + pivotalParameters = new ParameterSet(pivotalValues, rawFit.Parameters.Fitness, rawFit.Parameters.Weight); + return true; + } + catch + { + return false; + } + } + + /// + /// Applies the configured invalid pivotal draw policy. + /// + /// The raw bootstrap fit associated with the invalid pivotal draw. + /// The parent bootstrap fit. + /// The fallback parameter set when the method returns true. + /// True when a fallback parameter set should be retained; otherwise false. + /// Thrown when is unknown. + private bool TryApplyInvalidPolicy(BootstrapFit rawFit, BootstrapFit parentFit, out ParameterSet parameterSet) + { + parameterSet = default; + switch (PivotalInvalidDrawPolicy) + { + case PivotalBootstrapInvalidDrawPolicy.Drop: + return false; + case PivotalBootstrapInvalidDrawPolicy.UseRaw: + parameterSet = rawFit.Parameters.Clone(); + return true; + case PivotalBootstrapInvalidDrawPolicy.UseParent: + parameterSet = parentFit.Parameters.Clone(); + return true; + default: + throw new ArgumentOutOfRangeException(nameof(PivotalInvalidDrawPolicy), $"Unknown invalid-draw policy: {PivotalInvalidDrawPolicy}."); + } + } + + /// + /// Computes the link-space covariance for a bootstrap fit using the supplied link controller. + /// + /// The fit containing native parameter covariance. + /// The link controller used to compute the diagonal Jacobian. + /// The covariance matrix in link space. + private Matrix LinkCovariance(BootstrapFit fit, LinkController linkController) + { + Matrix jacobian = linkController.LinkJacobian(fit.Parameters.Values); + for (int i = 0; i < jacobian.NumberOfRows; i++) + { + if (!Tools.IsFinite(jacobian[i, i])) + throw new ArgumentException("The link derivative produced a non-finite value."); + } + + Matrix linkCovariance = jacobian * fit.Covariance * jacobian.Transpose(); + return RegularizePivotalCovariances + ? MatrixRegularization.MakeSymmetricPositiveDefinite(linkCovariance) + : linkCovariance; + } + + /// + /// Builds the pivotal link array from or the identity default. + /// + /// The original parent fit. + /// The accepted raw bootstrap fits. + /// One optional link function per fitted parameter. + /// Thrown when the link factory returns the wrong number of links. + private ILinkFunction?[] CreatePivotalLinks(BootstrapFit parentFit, BootstrapFit[] acceptedRawFits) + { + ILinkFunction?[] links = PivotalLinkFactory != null + ? PivotalLinkFactory(new PivotalBootstrapContext(parentFit, acceptedRawFits)) + : new ILinkFunction?[parentFit.ParameterCount]; + + if (links == null) + throw new ArgumentException("The pivotal link factory must not return null.", nameof(PivotalLinkFactory)); + if (links.Length != parentFit.ParameterCount) + throw new ArgumentException("The pivotal link factory must return one link per parameter.", nameof(PivotalLinkFactory)); + + return (ILinkFunction?[])links.Clone(); } #endregion @@ -408,24 +874,30 @@ public void RunWithStudentizedBootstrap() /// Computes bootstrap confidence intervals using the specified method. /// /// The confidence interval method. - /// The confidence level. Default = 0.1, resulting in 90% CIs. - /// A BootstrapResults object containing CIs for both parameters and statistics. + /// The two-sided alpha level. Default = 0.1, resulting in 90% confidence intervals. + /// A object containing confidence intervals for parameters and statistics. + /// Thrown when the requested interval method is incompatible with the last run mode. + /// Thrown when is not between zero and one. public BootstrapResults GetConfidenceIntervals(BootstrapCIMethod method, double alpha = 0.1) { - if (_bootstrapStatistics == null) - throw new InvalidOperationException("Run() or RunWithStudentizedBootstrap() must be called first."); - if (alpha <= 0 || alpha >= 1) - throw new ArgumentOutOfRangeException(nameof(alpha), "Alpha must be between 0 and 1."); - if (method == BootstrapCIMethod.BCa && (JackknifeFunction == null || SampleSizeFunction == null)) - throw new InvalidOperationException("JackknifeFunction and SampleSizeFunction must be set for BCa method."); - if (method == BootstrapCIMethod.BootstrapT && _studentizedValues == null) - throw new InvalidOperationException("RunWithStudentizedBootstrap() must be called before requesting Bootstrap-t CIs."); + ValidateConfidenceIntervalRequest(method, alpha); + + if (_runType == BootstrapRunType.Pivotal) + { + return CreatePercentileResults( + _bootstrapParameterSets, + _bootstrapStatistics, + _originalParameters.Values, + _parentStatistics, + alpha, + _failedCount, + method); + } if (StatisticFunction == null) throw new InvalidOperationException("StatisticFunction must be set."); - var originalStats = StatisticFunction(_originalParameters); + var originalStats = ValidateStatistics(StatisticFunction(_originalParameters)); - // Compute acceleration constants once for BCa double[]? accelConstants = null; if (method == BootstrapCIMethod.BCa) accelConstants = ComputeAccelerationConstants(originalStats); @@ -439,7 +911,6 @@ public BootstrapResults GetConfidenceIntervals(BootstrapCIMethod method, double FailedReplicates = _failedCount }; - // Compute CIs for each statistic for (int i = 0; i < _numStats; i++) { var values = _bootstrapStatistics.GetColumn(i); @@ -463,7 +934,6 @@ public BootstrapResults GetConfidenceIntervals(BootstrapCIMethod method, double } } - // Compute percentile CIs for each parameter for (int i = 0; i < _numParams; i++) { var values = _bootstrapParameterSets.Select(ps => ps.Values[i]).ToArray(); @@ -473,16 +943,44 @@ public BootstrapResults GetConfidenceIntervals(BootstrapCIMethod method, double return results; } + /// + /// Computes raw-bootstrap percentile confidence intervals after a pivotal bootstrap run. + /// + /// The two-sided alpha level. Default = 0.1, resulting in 90% confidence intervals. + /// Percentile confidence intervals for the accepted raw bootstrap ensemble. + /// Thrown when the last run was not pivotal bootstrap. + /// Thrown when is not between zero and one. + public BootstrapResults GetRawPivotalConfidenceIntervals(double alpha = 0.1) + { + if (_runType != BootstrapRunType.Pivotal) + throw new InvalidOperationException("RunPivotalBootstrap() or TransformPivotalBootstrap() must be called before requesting raw pivotal-bootstrap confidence intervals."); + if (alpha <= 0d || alpha >= 1d) + throw new ArgumentOutOfRangeException(nameof(alpha), "Alpha must be between 0 and 1."); + + return CreatePercentileResults( + _rawBootstrapParameterSets, + _rawBootstrapStatistics, + _originalParameters.Values, + _parentStatistics, + alpha, + _failedCount, + BootstrapCIMethod.Percentile); + } + #endregion #region CI Methods /// - /// Computes percentile confidence intervals for a single statistic. + /// Computes percentile confidence intervals for a single statistic or parameter. /// + /// Bootstrap values for one statistic or parameter. + /// The original population estimate. + /// The two-sided alpha level. + /// The percentile confidence interval summary. private BootstrapStatisticResult ComputePercentileCI(double[] values, double populationEstimate, double alpha) { - var validValues = values.Where(x => !double.IsNaN(x) && !double.IsInfinity(x)).ToArray(); + var validValues = values.Where(Tools.IsFinite).ToArray(); Array.Sort(validValues); double lowerP = alpha / 2d; @@ -501,15 +999,18 @@ private BootstrapStatisticResult ComputePercentileCI(double[] values, double pop } /// - /// Computes bias-corrected (BC) confidence intervals for a single statistic. + /// Computes bias-corrected confidence intervals for a single statistic. /// + /// Bootstrap statistic values. + /// The original statistic estimate. + /// The two-sided alpha level. + /// The bias-corrected confidence interval summary. private BootstrapStatisticResult ComputeBiasCorrectedCI(double[] values, double populationEstimate, double alpha) { - var validValues = values.Where(x => !double.IsNaN(x) && !double.IsInfinity(x)).ToArray(); + var validValues = values.Where(Tools.IsFinite).ToArray(); int validN = validValues.Length; if (validN == 0) return EmptyResult(populationEstimate, values.Length); - // Count proportion <= population estimate int countLeq = 0; for (int i = 0; i < validN; i++) if (validValues[i] <= populationEstimate) countLeq++; @@ -536,15 +1037,19 @@ private BootstrapStatisticResult ComputeBiasCorrectedCI(double[] values, double } /// - /// Computes bias-corrected and accelerated (BCa) confidence intervals for a single statistic. + /// Computes bias-corrected and accelerated confidence intervals for a single statistic. /// + /// Bootstrap statistic values. + /// The original statistic estimate. + /// The two-sided alpha level. + /// The jackknife acceleration constant. + /// The BCa confidence interval summary. private BootstrapStatisticResult ComputeBCaCI(double[] values, double populationEstimate, double alpha, double acceleration) { - var validValues = values.Where(x => !double.IsNaN(x) && !double.IsInfinity(x)).ToArray(); + var validValues = values.Where(Tools.IsFinite).ToArray(); int validN = validValues.Length; if (validN == 0) return EmptyResult(populationEstimate, values.Length); - // Count proportion <= population estimate (matching BCaQuantileCI line 593) int countLeq = 0; for (int i = 0; i < validN; i++) if (validValues[i] <= populationEstimate) countLeq++; @@ -577,18 +1082,20 @@ private BootstrapStatisticResult ComputeBCaCI(double[] values, double population } /// - /// Computes Normal (standard) confidence intervals for a single statistic. - /// Uses a configurable transform for transformation invariance. + /// Computes Normal confidence intervals for a single statistic using the configured transform pair. /// + /// Bootstrap statistic values. + /// The original statistic estimate. + /// The two-sided alpha level. + /// The Normal confidence interval summary. private BootstrapStatisticResult ComputeNormalCI(double[] values, double populationEstimate, double alpha) { double popTransformed = ApplyTransform(populationEstimate); - var transformedValid = new double[values.Length]; int validCount = 0; for (int i = 0; i < values.Length; i++) { - if (!double.IsNaN(values[i]) && !double.IsInfinity(values[i])) + if (Tools.IsFinite(values[i])) { transformedValid[validCount] = ApplyTransform(values[i]); validCount++; @@ -620,19 +1127,20 @@ private BootstrapStatisticResult ComputeNormalCI(double[] values, double populat } /// - /// Computes Bootstrap-t (studentized) confidence intervals for a single statistic. - /// Requires RunWithStudentizedBootstrap() to have been called first. + /// Computes Bootstrap-t confidence intervals for a single statistic. /// + /// The zero-based statistic index. + /// The original statistic estimate. + /// The two-sided alpha level. + /// The Bootstrap-t confidence interval summary. private BootstrapStatisticResult ComputeBootstrapTCI(int statisticIndex, double populationEstimate, double alpha) { double popTransformed = ApplyTransform(populationEstimate); - - // GetConfidenceIntervals() validates _studentizedValues is non-null before calling this method var xCol = _transformedStatistics!.GetColumn(statisticIndex); var tCol = _studentizedValues!.GetColumn(statisticIndex); - var validX = xCol.Where(x => !double.IsNaN(x) && !double.IsInfinity(x)).ToArray(); - var validT = tCol.Where(x => !double.IsNaN(x) && !double.IsInfinity(x)).ToArray(); + var validX = xCol.Where(Tools.IsFinite).ToArray(); + var validT = tCol.Where(Tools.IsFinite).ToArray(); if (validT.Length < 2) return EmptyResult(populationEstimate, Replicates); @@ -659,11 +1167,12 @@ private BootstrapStatisticResult ComputeBootstrapTCI(int statisticIndex, double #region BCa Support /// - /// Computes the acceleration constants for each statistic using jackknife leave-one-out. + /// Computes acceleration constants for each statistic using leave-one-out jackknife samples. /// + /// The original statistic estimates. + /// The acceleration constant for each statistic. private double[] ComputeAccelerationConstants(double[] populationEstimates) { - // Caller (GetConfidenceIntervals) validates these delegates before calling this method var sampleSize = SampleSizeFunction!; var jackknife = JackknifeFunction!; var fitFunc = FitFunction!; @@ -691,7 +1200,7 @@ private double[] ComputeAccelerationConstants(double[] populationEstimates) } catch (Exception) { - // Skip failed jackknife samples + // Skip failed jackknife samples. } }); @@ -706,8 +1215,9 @@ private double[] ComputeAccelerationConstants(double[] populationEstimates) #region Private Helpers /// - /// Validates that the core delegate functions are set. + /// Validates that the regular bootstrap delegates are set. /// + /// Thrown when a required regular-bootstrap delegate is missing. private void ValidateCoreDelegates() { if (ResampleFunction == null) @@ -719,12 +1229,38 @@ private void ValidateCoreDelegates() } /// - /// Initializes internal state arrays before a bootstrap run. + /// Validates that the pivotal bootstrap delegates and parent covariance are set. + /// + /// Thrown when a required pivotal-bootstrap input is missing. + private void ValidatePivotalDelegates() + { + if (ResampleFunction == null) + throw new InvalidOperationException("ResampleFunction must be set before running the pivotal bootstrap."); + if (FitWithCovarianceFunction == null) + throw new InvalidOperationException("FitWithCovarianceFunction must be set before running the pivotal bootstrap."); + if (_originalCovariance == null) + throw new InvalidOperationException("OriginalCovariance must be set before running the pivotal bootstrap."); + } + + /// + /// Validates the configured replicate and retry counts. + /// + /// Thrown when or is not positive. + private void ValidateReplicationSettings() + { + if (Replicates < 1) + throw new ArgumentOutOfRangeException(nameof(Replicates), "The number of replicates must be positive."); + if (MaxRetries < 1) + throw new ArgumentOutOfRangeException(nameof(MaxRetries), "The maximum retry count must be positive."); + } + + /// + /// Initializes regular-bootstrap state arrays before a regular run. /// private void InitializeState() { - // ValidateCoreDelegates() is always called before this method - var originalStats = StatisticFunction!(_originalParameters); + var originalStats = ValidateStatistics(StatisticFunction!(_originalParameters)); + _parentStatistics = (double[])originalStats.Clone(); _numStats = originalStats.Length; _numParams = _originalParameters.Values.Length; @@ -734,11 +1270,25 @@ private void InitializeState() _failedCount = 0; _studentizedValues = null; _transformedStatistics = null; + _runType = BootstrapRunType.None; + } + + /// + /// Clears stored pivotal-bootstrap state before a regular bootstrap run. + /// + private void ResetPivotalState() + { + _rawBootstrapFits = Array.Empty(); + _rawBootstrapParameterSets = Array.Empty(); + _rawBootstrapStatistics = null; + _pivotalLinks = Array.Empty(); + _pivotalDiagnostics = null; } /// - /// Marks a replicate as failed with NaN values. + /// Marks a replicate as failed with NaN parameter and statistic values. /// + /// The zero-based replicate index. private void MarkFailed(int idx) { var nanParams = new double[_numParams]; @@ -751,24 +1301,31 @@ private void MarkFailed(int idx) } /// - /// Applies the Transform function, or returns the value unchanged if Transform is null. + /// Applies the configured regular-bootstrap statistic transform. /// + /// The value to transform. + /// The transformed value, or the original value when is null. private double ApplyTransform(double value) { return Transform != null ? Transform(value) : value; } /// - /// Applies the InverseTransform function, or returns the value unchanged if InverseTransform is null. + /// Applies the configured inverse regular-bootstrap statistic transform. /// + /// The value to inverse transform. + /// The inverse-transformed value, or the original value when is null. private double ApplyInverseTransform(double value) { return InverseTransform != null ? InverseTransform(value) : value; } /// - /// Creates an empty result for when there are insufficient valid values. + /// Creates an empty confidence interval result for insufficient valid values. /// + /// The original estimate. + /// The total number of attempted values. + /// A confidence interval result containing NaN interval bounds. private BootstrapStatisticResult EmptyResult(double populationEstimate, int totalCount) { return new BootstrapStatisticResult @@ -783,6 +1340,243 @@ private BootstrapStatisticResult EmptyResult(double populationEstimate, int tota }; } + /// + /// Creates percentile results for parameter and statistic ensembles. + /// + /// The parameter ensemble. + /// The statistic ensemble, or null when no statistic function was supplied. + /// The original parameter estimates. + /// The original statistic estimates. + /// The two-sided alpha level. + /// The number of failed replicates. + /// The confidence interval method label to store in the result. + /// A bootstrap results object containing percentile confidence intervals. + private BootstrapResults CreatePercentileResults( + ParameterSet[] parameterSets, + double[,]? statistics, + double[] parameterEstimates, + double[]? statisticEstimates, + double alpha, + int failedReplicates, + BootstrapCIMethod method) + { + int statisticCount = statistics?.GetLength(1) ?? 0; + var results = new BootstrapResults + { + Method = method, + Alpha = alpha, + StatisticResults = new BootstrapStatisticResult[statisticCount], + ParameterResults = new BootstrapStatisticResult[parameterEstimates.Length], + FailedReplicates = failedReplicates + }; + + for (int i = 0; i < statisticCount; i++) + { + double estimate = statisticEstimates != null && i < statisticEstimates.Length ? statisticEstimates[i] : double.NaN; + results.StatisticResults[i] = ComputePercentileCI(statistics!.GetColumn(i), estimate, alpha); + } + + for (int i = 0; i < parameterEstimates.Length; i++) + results.ParameterResults[i] = ComputePercentileCI(GetParameterColumn(parameterSets, i), parameterEstimates[i], alpha); + + return results; + } + + /// + /// Extracts one parameter column from an ensemble of parameter sets. + /// + /// The parameter ensemble. + /// The zero-based parameter index. + /// The values for the requested parameter. + private static double[] GetParameterColumn(ParameterSet[] parameterSets, int parameterIndex) + { + var values = new double[parameterSets.Length]; + for (int i = 0; i < parameterSets.Length; i++) + values[i] = parameterSets[i].Values[parameterIndex]; + return values; + } + + /// + /// Computes statistic values for each parameter set. + /// + /// The parameter sets to evaluate. + /// The statistic function. + /// A two-dimensional statistic array indexed by replicate and statistic. + private static double[,] ComputeStatistics(ParameterSet[] parameterSets, Func statistic) + { + if (parameterSets.Length == 0) + return new double[0, 0]; + + double[] first = ValidateStatistics(statistic(parameterSets[0])); + var values = new double[parameterSets.Length, first.Length]; + for (int j = 0; j < first.Length; j++) + values[0, j] = first[j]; + + for (int i = 1; i < parameterSets.Length; i++) + { + double[] row = ValidateStatistics(statistic(parameterSets[i]), first.Length); + for (int j = 0; j < first.Length; j++) + values[i, j] = row[j]; + } + + return values; + } + + /// + /// Validates statistic values returned by . + /// + /// The statistic values to validate. + /// The expected statistic count, when known. + /// The validated statistic values. + /// Thrown when the statistic values are null, empty, non-finite, or have the wrong length. + private static double[] ValidateStatistics(double[] statistics, int? expectedLength = null) + { + if (statistics == null) + throw new InvalidOperationException("The statistic function returned null."); + if (statistics.Length == 0) + throw new InvalidOperationException("The statistic function must return at least one statistic."); + if (expectedLength.HasValue && statistics.Length != expectedLength.Value) + throw new InvalidOperationException("The statistic function must return the same number of statistics for every draw."); + if (!ContainsOnlyFiniteValues(statistics)) + throw new InvalidOperationException("The statistic function returned a non-finite value."); + + return statistics; + } + + /// + /// Determines whether a fitted parameter set has the expected finite parameter vector. + /// + /// The fitted parameter set to evaluate. + /// The expected number of parameters. + /// True when the parameter values are non-null, have the expected length, and are finite; otherwise false. + private static bool HasExpectedFiniteParameterValues(ParameterSet parameters, int expectedLength) + { + return parameters.Values != null + && parameters.Values.Length == expectedLength + && ContainsOnlyFiniteValues(parameters.Values); + } + + /// + /// Creates the original covariance-aware fit for pivotal bootstrap. + /// + /// The original fit and covariance. + /// Thrown when is not set. + private BootstrapFit CreateOriginalFit() + { + if (_originalCovariance == null) + throw new InvalidOperationException("OriginalCovariance must be set before running or transforming a pivotal bootstrap."); + + var fit = new BootstrapFit(_originalParameters, _originalCovariance); + if (!IsValidFit(fit, null)) + throw new ArgumentException("The original fit must contain finite parameters and a finite square covariance matrix."); + + return fit; + } + + /// + /// Determines whether a raw covariance-aware fit should be accepted before pivotal transformation. + /// + /// The fit to evaluate. + /// The required parameter count. + /// True when the fit is valid and passes the optional replicate filter; otherwise false. + private bool IsAcceptedRawFit(BootstrapFit? fit, int parameterCount) + { + if (!IsValidFit(fit, parameterCount)) + return false; + if (PivotalReplicateFilter != null && !PivotalReplicateFilter(fit!)) + return false; + return true; + } + + /// + /// Determines whether a covariance-aware fit has finite parameters and a finite square covariance matrix. + /// + /// The fit to evaluate. + /// The required parameter count, when known. + /// True when the fit is valid; otherwise false. + private static bool IsValidFit(BootstrapFit? fit, int? parameterCount) + { + if (fit == null) + return false; + if (fit.Parameters.Values == null || fit.Parameters.Values.Length == 0) + return false; + if (parameterCount.HasValue && fit.Parameters.Values.Length != parameterCount.Value) + return false; + if (!ContainsOnlyFiniteValues(fit.Parameters.Values)) + return false; + if (fit.Covariance == null || !fit.Covariance.IsSquare || fit.Covariance.NumberOfRows != fit.Parameters.Values.Length) + return false; + + for (int i = 0; i < fit.Covariance.NumberOfRows; i++) + { + for (int j = 0; j < fit.Covariance.NumberOfColumns; j++) + { + if (!Tools.IsFinite(fit.Covariance[i, j])) + return false; + } + } + + return true; + } + + /// + /// Validates that transformed values are finite. + /// + /// The transformed values. + /// The exception message to use when validation fails. + /// Thrown when any value is non-finite. + private static void ValidateTransformedValues(double[] values, string message) + { + if (!ContainsOnlyFiniteValues(values)) + throw new ArgumentException(message); + } + + /// + /// Determines whether every value in an array is finite. + /// + /// The values to evaluate. + /// True when every value is finite; otherwise false. + private static bool ContainsOnlyFiniteValues(double[] values) + { + if (values == null) + return false; + + for (int i = 0; i < values.Length; i++) + { + if (!Tools.IsFinite(values[i])) + return false; + } + + return true; + } + + /// + /// Validates a confidence interval request against the last bootstrap run mode. + /// + /// The requested confidence interval method. + /// The requested alpha level. + /// Thrown when no run has completed or when the method is incompatible with the run mode. + /// Thrown when is not between zero and one. + private void ValidateConfidenceIntervalRequest(BootstrapCIMethod method, double alpha) + { + if (_runType == BootstrapRunType.None || _bootstrapStatistics == null) + throw new InvalidOperationException("A bootstrap run must be completed before requesting confidence intervals."); + if (alpha <= 0d || alpha >= 1d) + throw new ArgumentOutOfRangeException(nameof(alpha), "Alpha must be between 0 and 1."); + + if (_runType == BootstrapRunType.Pivotal) + { + if (method != BootstrapCIMethod.Percentile) + throw new InvalidOperationException("Only percentile confidence intervals are supported after a pivotal bootstrap run."); + return; + } + + if (method == BootstrapCIMethod.BCa && (JackknifeFunction == null || SampleSizeFunction == null)) + throw new InvalidOperationException("JackknifeFunction and SampleSizeFunction must be set for BCa method."); + if (method == BootstrapCIMethod.BootstrapT && _studentizedValues == null) + throw new InvalidOperationException("RunWithStudentizedBootstrap() must be called before requesting Bootstrap-t confidence intervals."); + } + #endregion } } diff --git a/Numerics/Sampling/Bootstrap/Support/BootstrapFit.cs b/Numerics/Sampling/Bootstrap/Support/BootstrapFit.cs new file mode 100644 index 00000000..698f9954 --- /dev/null +++ b/Numerics/Sampling/Bootstrap/Support/BootstrapFit.cs @@ -0,0 +1,75 @@ +using Numerics.Mathematics.LinearAlgebra; +using Numerics.Mathematics.Optimization; +using System; + +namespace Numerics.Sampling +{ + /// + /// Stores a fitted parameter vector and the covariance matrix associated with that fit. + /// + /// + /// + /// Authors: + /// Haden Smith, USACE Risk Management Center, cole.h.smith@usace.army.mil + /// + /// + /// This type is used by covariance-aware bootstrap procedures, including the + /// pivotal bootstrap. The parameter values and covariance are cloned on input + /// so callers can safely reuse or mutate their source objects after construction. + /// + /// + [Serializable] + public sealed class BootstrapFit + { + /// + /// Initializes a new instance of the class. + /// + /// The fitted parameter set. + /// The covariance matrix for . + /// + /// Thrown when has no values, when + /// is not square, or when the covariance dimension does not match the parameter count. + /// + /// Thrown when is null. + public BootstrapFit(ParameterSet parameters, Matrix covariance) + { + if (parameters.Values == null || parameters.Values.Length == 0) + throw new ArgumentException("The parameter set must contain at least one value.", nameof(parameters)); + if (covariance == null) + throw new ArgumentNullException(nameof(covariance)); + if (!covariance.IsSquare) + throw new ArgumentException("The covariance matrix must be square.", nameof(covariance)); + if (covariance.NumberOfRows != parameters.Values.Length) + throw new ArgumentException("The covariance dimension must match the parameter count.", nameof(covariance)); + + Parameters = parameters.Clone(); + Covariance = covariance.Clone(); + } + + /// + /// Initializes a new instance of the class. + /// + /// The fitted parameter values. + /// The covariance matrix for . + /// Thrown when or is null. + public BootstrapFit(double[] parameters, Matrix covariance) + : this(new ParameterSet(parameters != null ? (double[])parameters.Clone() : throw new ArgumentNullException(nameof(parameters)), double.NaN), covariance) + { + } + + /// + /// Gets the fitted parameter set. + /// + public ParameterSet Parameters { get; } + + /// + /// Gets the covariance matrix for . + /// + public Matrix Covariance { get; } + + /// + /// Gets the number of fitted parameters. + /// + public int ParameterCount => Parameters.Values.Length; + } +} diff --git a/Numerics/Sampling/Bootstrap/Support/PivotalBootstrapContext.cs b/Numerics/Sampling/Bootstrap/Support/PivotalBootstrapContext.cs new file mode 100644 index 00000000..d44ce502 --- /dev/null +++ b/Numerics/Sampling/Bootstrap/Support/PivotalBootstrapContext.cs @@ -0,0 +1,79 @@ +using System; + +namespace Numerics.Sampling +{ + /// + /// Provides accepted raw bootstrap fits to a pivotal bootstrap link factory. + /// + /// + /// + /// Authors: + /// Haden Smith, USACE Risk Management Center, cole.h.smith@usace.army.mil + /// + /// + /// Link factories can use this context to select fixed links, such as + /// LogLink for positive scale parameters, or to fit data-adaptive links, + /// such as YeoJohnsonLink, from the accepted raw bootstrap ensemble. + /// + /// + public sealed class PivotalBootstrapContext + { + private readonly double[,] _rawParameterValues; + + /// + /// Initializes a new instance of the class. + /// + /// The original parent fit. + /// The accepted raw bootstrap fits. + /// Thrown when or is null. + /// Thrown when a raw fit parameter count differs from the parent fit. + public PivotalBootstrapContext(BootstrapFit parentFit, BootstrapFit[] rawBootstrapFits) + { + ParentFit = parentFit ?? throw new ArgumentNullException(nameof(parentFit)); + RawBootstrapFits = rawBootstrapFits ?? throw new ArgumentNullException(nameof(rawBootstrapFits)); + + int p = parentFit.ParameterCount; + _rawParameterValues = new double[rawBootstrapFits.Length, p]; + for (int i = 0; i < rawBootstrapFits.Length; i++) + { + if (rawBootstrapFits[i].ParameterCount != p) + throw new ArgumentException("Every raw bootstrap fit must have the same parameter count as the parent fit.", nameof(rawBootstrapFits)); + + for (int j = 0; j < p; j++) + _rawParameterValues[i, j] = rawBootstrapFits[i].Parameters.Values[j]; + } + } + + /// + /// Gets the original parent fit. + /// + public BootstrapFit ParentFit { get; } + + /// + /// Gets the accepted raw bootstrap fits. + /// + public BootstrapFit[] RawBootstrapFits { get; } + + /// + /// Gets the number of fitted parameters. + /// + public int ParameterCount => ParentFit.ParameterCount; + + /// + /// Gets a copy of the accepted raw bootstrap values for one parameter. + /// + /// The zero-based parameter index. + /// The accepted raw bootstrap values for . + /// Thrown when is outside the parameter range. + public double[] GetRawParameterValues(int parameterIndex) + { + if (parameterIndex < 0 || parameterIndex >= ParameterCount) + throw new ArgumentOutOfRangeException(nameof(parameterIndex)); + + var values = new double[RawBootstrapFits.Length]; + for (int i = 0; i < values.Length; i++) + values[i] = _rawParameterValues[i, parameterIndex]; + return values; + } + } +} diff --git a/Numerics/Sampling/Bootstrap/Support/PivotalBootstrapDiagnostics.cs b/Numerics/Sampling/Bootstrap/Support/PivotalBootstrapDiagnostics.cs new file mode 100644 index 00000000..b50586ac --- /dev/null +++ b/Numerics/Sampling/Bootstrap/Support/PivotalBootstrapDiagnostics.cs @@ -0,0 +1,57 @@ +using System; + +namespace Numerics.Sampling +{ + /// + /// Stores diagnostic counts and timings from the most recent pivotal bootstrap run. + /// + /// + /// + /// Authors: + /// Haden Smith, USACE Risk Management Center, cole.h.smith@usace.army.mil + /// + /// + [Serializable] + public sealed class PivotalBootstrapDiagnostics + { + /// + /// Gets or sets the number of raw bootstrap replicates requested or supplied. + /// + public int RequestedReplicates { get; set; } + + /// + /// Gets or sets the number of raw bootstrap fits rejected before transformation. + /// + public int RejectedRawReplicates { get; set; } + + /// + /// Gets or sets the number of raw bootstrap fits that failed after all retries. + /// + public int FailedRawReplicates { get; set; } + + /// + /// Gets or sets the number of raw bootstrap fits accepted for transformation. + /// + public int AcceptedRawReplicates { get; set; } + + /// + /// Gets or sets the number of invalid pivotal draws encountered after raw-fit acceptance. + /// + public int InvalidPivotalReplicates { get; set; } + + /// + /// Gets or sets the number of pivotal draws retained after invalid-draw handling. + /// + public int RetainedPivotalReplicates { get; set; } + + /// + /// Gets or sets the time spent generating and fitting raw bootstrap replicates. + /// + public TimeSpan ResamplingTime { get; set; } + + /// + /// Gets or sets the time spent applying the pivotal transformation. + /// + public TimeSpan TransformationTime { get; set; } + } +} diff --git a/Numerics/Sampling/Bootstrap/Support/PivotalBootstrapInvalidDrawPolicy.cs b/Numerics/Sampling/Bootstrap/Support/PivotalBootstrapInvalidDrawPolicy.cs new file mode 100644 index 00000000..13851d5f --- /dev/null +++ b/Numerics/Sampling/Bootstrap/Support/PivotalBootstrapInvalidDrawPolicy.cs @@ -0,0 +1,23 @@ +namespace Numerics.Sampling +{ + /// + /// Specifies how invalid pivotal bootstrap draws are handled after the two-covariance transform. + /// + public enum PivotalBootstrapInvalidDrawPolicy + { + /// + /// Drop invalid pivotal draws from the retained pivotal ensemble. + /// + Drop, + + /// + /// Replace invalid pivotal draws with the corresponding accepted raw bootstrap fit. + /// + UseRaw, + + /// + /// Replace invalid pivotal draws with the original parent fit. + /// + UseParent + } +} diff --git a/Numerics/Sampling/MCMC/Base/MCMCSampler.cs b/Numerics/Sampling/MCMC/Base/MCMCSampler.cs index 716aa529..bbbb8465 100644 --- a/Numerics/Sampling/MCMC/Base/MCMCSampler.cs +++ b/Numerics/Sampling/MCMC/Base/MCMCSampler.cs @@ -618,7 +618,7 @@ public void Reset() AcceptCount = new int[NumberOfChains]; SampleCount = new int[NumberOfChains]; MeanLogLikelihood = new List(); - MAP = new ParameterSet([], double.MinValue); + MAP = new ParameterSet([], double.NegativeInfinity); } #endregion diff --git a/Numerics/Sampling/MCMC/NUTS.cs b/Numerics/Sampling/MCMC/NUTS.cs index ec42512f..76be0966 100644 --- a/Numerics/Sampling/MCMC/NUTS.cs +++ b/Numerics/Sampling/MCMC/NUTS.cs @@ -321,16 +321,9 @@ private double FindReasonableEpsilon(double[] theta0, double logLH0, int chainIn // Compute initial Hamiltonian double H0 = -logLH0 + 0.5 * DiagonalQuadraticForm(r0, _inverseMassMatrix[chainIndex]); - // Take one leapfrog step var thetaPrime = (double[])theta0.Clone(); var rPrime = (double[])r0.Clone(); - LeapfrogInPlace(thetaPrime, rPrime, epsilon, chainIndex); - - double logLH1 = SafeLogLikelihood(thetaPrime); - double H1 = -logLH1 + 0.5 * DiagonalQuadraticForm(rPrime, _inverseMassMatrix[chainIndex]); - double logAlpha = H0 - H1; - if (double.IsNaN(logAlpha) || double.IsNegativeInfinity(logAlpha)) - logAlpha = -1000.0; + double logAlpha = TrySingleStepLogAcceptance(theta0, r0, thetaPrime, rPrime, epsilon, H0, chainIndex); // Determine direction: double epsilon if acceptance too high, halve if too low double a = logAlpha > Math.Log(0.5) ? 1.0 : -1.0; @@ -347,21 +340,47 @@ private double FindReasonableEpsilon(double[] theta0, double logLH0, int chainIn if (epsilon < 1e-8 || epsilon > 1e6) break; - // Recompute with new epsilon Array.Copy(theta0, thetaPrime, D); Array.Copy(r0, rPrime, D); - LeapfrogInPlace(thetaPrime, rPrime, epsilon, chainIndex); - - logLH1 = SafeLogLikelihood(thetaPrime); - H1 = -logLH1 + 0.5 * DiagonalQuadraticForm(rPrime, _inverseMassMatrix[chainIndex]); - logAlpha = H0 - H1; - if (double.IsNaN(logAlpha) || double.IsNegativeInfinity(logAlpha)) - logAlpha = -1000.0; + logAlpha = TrySingleStepLogAcceptance(theta0, r0, thetaPrime, rPrime, epsilon, H0, chainIndex); } return Math.Max(1e-8, Math.Min(epsilon, 1e6)); } + /// + /// Attempts one leapfrog step and converts invalid Hamiltonian states to a low log-acceptance value. + /// + /// The starting position. + /// The starting momentum. + /// Working buffer for the proposed position. + /// Working buffer for the proposed momentum. + /// The leapfrog step size. + /// The initial Hamiltonian. + /// The chain index for mass matrix access. + /// The log acceptance statistic for the trial step, or a low value when the trial is invalid. + private double TrySingleStepLogAcceptance(double[] theta0, double[] r0, double[] thetaPrime, double[] rPrime, double epsilon, double initialHamiltonian, int chainIndex) + { + try + { + LeapfrogInPlace(thetaPrime, rPrime, epsilon, chainIndex); + + double logLH = SafeLogLikelihood(thetaPrime); + double hamiltonian = -logLH + 0.5 * DiagonalQuadraticForm(rPrime, _inverseMassMatrix[chainIndex]); + double logAlpha = initialHamiltonian - hamiltonian; + if (!Tools.IsFinite(logAlpha)) + return -1000.0; + + return logAlpha; + } + catch (ArithmeticException) + { + Array.Copy(theta0, thetaPrime, theta0.Length); + Array.Copy(r0, rPrime, r0.Length); + return -1000.0; + } + } + /// /// Performs a single leapfrog step in-place on raw arrays, using the per-chain mass matrix. /// Used by FindReasonableEpsilon to avoid Vector allocations. @@ -626,10 +645,23 @@ private TreeState BuildTree(Vector theta, Vector momentum, double epsilon, int d if (depth == 0) { // Base case: take one leapfrog step - var (thetaPrime, momentumPrime) = Leapfrog(theta, momentum, epsilon, chainIndex); + Vector thetaPrime; + Vector momentumPrime; + try + { + (thetaPrime, momentumPrime) = Leapfrog(theta, momentum, epsilon, chainIndex); + } + catch (ArithmeticException) + { + return InvalidTreeState(theta, momentum); + } + double logLH = SafeLogLikelihood(thetaPrime.Array); double H = -logLH + 0.5 * DiagonalQuadraticFormVec(momentumPrime, _inverseMassMatrix[chainIndex]); double logWeight = -H; + if (!Tools.IsFinite(logLH) || !Tools.IsFinite(H) || !Tools.IsFinite(logWeight)) + return InvalidTreeState(thetaPrime, momentumPrime); + bool divergent = (H - H0) > MAX_DELTA_H; double alpha = Math.Min(1.0, Math.Exp(H0 - H)); if (double.IsNaN(alpha)) alpha = 0; @@ -694,6 +726,30 @@ private TreeState BuildTree(Vector theta, Vector momentum, double epsilon, int d return tree; } + /// + /// Creates an invalid tree state for a trajectory that entered a non-finite log-density region. + /// + /// The position to preserve as the tree endpoint. + /// The momentum to preserve as the tree endpoint. + /// An invalid tree state with zero acceptance contribution. + private static TreeState InvalidTreeState(Vector theta, Vector momentum) + { + return new TreeState + { + ThetaMinus = theta.Clone(), + MomentumMinus = momentum.Clone(), + ThetaPlus = theta.Clone(), + MomentumPlus = momentum.Clone(), + ThetaPrime = theta.Clone(), + LogSumWeight = double.NegativeInfinity, + LogLikelihoodPrime = double.NegativeInfinity, + LeafCount = 1, + Valid = false, + SumAlpha = 0d, + NumAlpha = 1 + }; + } + /// /// Performs a single leapfrog integration step with boundary enforcement, /// using the per-chain diagonal mass matrix. diff --git a/Numerics/Sampling/MCMC/SNIS.cs b/Numerics/Sampling/MCMC/SNIS.cs index a5552f17..714b9a73 100644 --- a/Numerics/Sampling/MCMC/SNIS.cs +++ b/Numerics/Sampling/MCMC/SNIS.cs @@ -107,7 +107,7 @@ public override void Sample() MeanLogLikelihood = new List(); // Keeps track of best parameter set - MAP = new ParameterSet([], double.MinValue, double.MinValue); + MAP = new ParameterSet([], double.NegativeInfinity, double.NegativeInfinity); // Create parameter random values var rnds = _masterPRNG.NextDoubles(Iterations, NumberOfParameters); @@ -138,25 +138,33 @@ public override void Sample() // Get the maximum a posteriori for (int i = 0; i < Iterations; i++) - if (MarkovChains[0][i].Weight > MAP.Weight) + if (IsFinite(MarkovChains[0][i].Weight) && MarkovChains[0][i].Weight > MAP.Weight) MAP = MarkovChains[0][i].Clone(); // Get the normalization factor double max = MAP.Weight; + if (!IsFinite(max)) + throw new InvalidOperationException("SNIS failed because all importance weights are non-finite."); + double sum = 0; for (int i = 0; i < Iterations; i++) { - if (!double.IsNaN(MarkovChains[0][i].Weight) && MarkovChains[0][i].Weight != double.MinValue) + if (IsFinite(MarkovChains[0][i].Weight)) { sum += Math.Exp(MarkovChains[0][i].Weight - max); } - } + } + if (!IsFinite(sum) || sum <= 0d) + throw new InvalidOperationException("SNIS failed because the finite importance weights could not be normalized."); + double normalization = max + Math.Log(sum); + if (!IsFinite(normalization)) + throw new InvalidOperationException("SNIS failed because the importance-weight normalization is non-finite."); // Compute the posterior weights Parallel.For(0, Iterations, (idx) => { - double w = !double.IsNaN(MarkovChains[0][idx].Weight) && MarkovChains[0][idx].Weight != double.MinValue ? Math.Exp(MarkovChains[0][idx].Weight - normalization) : 0d; + double w = IsFinite(MarkovChains[0][idx].Weight) ? Math.Exp(MarkovChains[0][idx].Weight - normalization) : 0d; MarkovChains[0][idx] = new ParameterSet(MarkovChains[0][idx].Values, MarkovChains[0][idx].Fitness, w); }); @@ -180,5 +188,15 @@ public override void Sample() } } + /// + /// Determines whether a value is finite. + /// + /// The value to evaluate. + /// true when the value is neither NaN nor infinite; otherwise false. + private static bool IsFinite(double value) + { + return !double.IsNaN(value) && !double.IsInfinity(value); + } + } } diff --git a/Numerics/Sampling/MCMC/Support/MCMCDiagnostics.cs b/Numerics/Sampling/MCMC/Support/MCMCDiagnostics.cs index d4b8835a..7371f31c 100644 --- a/Numerics/Sampling/MCMC/Support/MCMCDiagnostics.cs +++ b/Numerics/Sampling/MCMC/Support/MCMCDiagnostics.cs @@ -202,7 +202,7 @@ public static int MinimumSampleSize(double quantile, double tolerance, double pr double r = tolerance; double s = probability; double N = (q * (1d - q) * Math.Pow(Normal.StandardZ(0.5d * (s + 1d)), 2d)) / Math.Pow(r, 2d); - int Nmin = (int)Math.Round(decimal.Parse(N.ToString()) / 100M, 0) * 100; + int Nmin = (int)Math.Round(N / 100d) * 100; return Nmin; } diff --git a/Numerics/Sampling/MCMC/Support/MCMCResults.cs b/Numerics/Sampling/MCMC/Support/MCMCResults.cs index 196309d4..dab56975 100644 --- a/Numerics/Sampling/MCMC/Support/MCMCResults.cs +++ b/Numerics/Sampling/MCMC/Support/MCMCResults.cs @@ -134,7 +134,7 @@ private void ProcessParameterResults(MCMCSampler sampler, double alpha = 0.1) /// /// Process a parameter results using the output list. /// - /// The confidence level; Default = 0.1, which will result in the 90% confidence intervals. + /// The confidence level; Default = 0.1, which will result in the 90% confidence intervals. private void ProcessParameterResults(double alpha = 0.1) { int p = Output.First().Values.Length; @@ -146,10 +146,57 @@ private void ProcessParameterResults(double alpha = 0.1) ParameterResults[i] = new ParameterResults(x, alpha); postMean[i] = ParameterResults[i].SummaryStatistics.Mean; } - // Set the posterior mean parameter set. + // Set the posterior mean parameter set. PosteriorMean = new ParameterSet(postMean, double.NaN); } + /// + /// Recompute parameter summary statistics at a new credible-interval level + /// (alpha) without rerunning the chain. Preserves Rhat, ESS, autocorrelation, + /// MarkovChains, AcceptanceRates, MeanLogLikelihood, MAP, and Output. + /// + /// + /// The new significance level (e.g., 0.05 for 95% credible intervals, + /// 0.10 for 90% credible intervals). + /// + /// + /// Used when the user changes the credible-interval width on a Bayesian + /// analysis whose MCMC chain has already converged. The chain output is + /// independent of alpha; only the posterior summary percentiles + /// (LowerCI/UpperCI) need to be recomputed. Snapshots the per-parameter + /// Rhat, ESS, and autocorrelation diagnostics (which depend on the chain, + /// not on alpha) before delegating to the existing private reprocess, then + /// restores them on the freshly-built ParameterResults instances. + /// + public void RecomputeParameterResults(double alpha) + { + if (ParameterResults == null || Output == null || Output.Count == 0) return; + + // Snapshot diagnostics that are independent of alpha. Length matches + // the parameter count of the existing ParameterResults array. + int n = ParameterResults.Length; + var rhats = new double[n]; + var esss = new double[n]; + var acfs = new double[n][,]; + for (int i = 0; i < n; i++) + { + rhats[i] = ParameterResults[i].SummaryStatistics.Rhat; + esss[i] = ParameterResults[i].SummaryStatistics.ESS; + acfs[i] = ParameterResults[i].Autocorrelation; + } + + // Recompute (replaces ParameterResults[] and PosteriorMean — Output untouched) + ProcessParameterResults(alpha); + + // Restore preserved diagnostics + for (int i = 0; i < ParameterResults.Length; i++) + { + ParameterResults[i].SummaryStatistics.Rhat = rhats[i]; + ParameterResults[i].SummaryStatistics.ESS = esss[i]; + ParameterResults[i].Autocorrelation = acfs[i]; + } + } + #region Serialization /// diff --git a/Numerics/Sampling/SobolSequence.cs b/Numerics/Sampling/SobolSequence.cs index a6399858..4721a186 100644 --- a/Numerics/Sampling/SobolSequence.cs +++ b/Numerics/Sampling/SobolSequence.cs @@ -1,4 +1,5 @@ using System; +using System.Globalization; using System.IO; namespace Numerics.Sampling @@ -112,7 +113,7 @@ private void initialize() var st = line.Split(' '); int dim; - int.TryParse(st[0], out dim); + int.TryParse(st[0], NumberStyles.Integer, CultureInfo.InvariantCulture, out dim); if (dim >= 2 && dim <= Dimension) { @@ -122,7 +123,7 @@ private void initialize() { if (st[i] != "") { - int.TryParse(st[i], out s); + int.TryParse(st[i], NumberStyles.Integer, CultureInfo.InvariantCulture, out s); break; } } @@ -131,7 +132,7 @@ private void initialize() { if (st[i] != "") { - int.TryParse(st[i], out a); + int.TryParse(st[i], NumberStyles.Integer, CultureInfo.InvariantCulture, out a); break; } } @@ -143,7 +144,7 @@ private void initialize() { for (int j = 1; j <= s; j++) { - int.TryParse(st[i + j - 1], out m[j]); + int.TryParse(st[i + j - 1], NumberStyles.Integer, CultureInfo.InvariantCulture, out m[j]); } break; } diff --git a/Numerics/Utilities/Tools.cs b/Numerics/Utilities/Tools.cs index a69d6951..3455d4f0 100644 --- a/Numerics/Utilities/Tools.cs +++ b/Numerics/Utilities/Tools.cs @@ -519,7 +519,7 @@ public static double Product(IList values, IList indicators, bool u public static void MinMax(IList values, out double min, out double max) { min = double.MaxValue; - max = double.MinValue; + max = double.NegativeInfinity; if (values.Count == 0) { min = double.NaN; max = double.NaN; return; }; for (int i = 0; i < values.Count; i++) { @@ -556,11 +556,11 @@ public static int ArgMin(IList values) /// The list of values. public static int ArgMax(IList values) { - double max = double.MinValue; + double max = double.NegativeInfinity; int index = -1; for (int i = 0; i < values.Count; i++) { - if (values[i] > max) + if (!double.IsNaN(values[i]) && (index == -1 || values[i] > max)) { max = values[i]; index = i; @@ -622,7 +622,7 @@ public static double Min(IList values, IList indicators, bool useCo public static double Max(IList values) { if (values.Count == 0) return double.NaN; - double max = double.MinValue; + double max = double.NegativeInfinity; for (int i = 0; i < values.Count; i++) { double v = values[i]; @@ -643,7 +643,7 @@ public static double Max(IList values, IList indicators, bool useCo { if (values.Count == 0) return double.NaN; if (indicators.Count != values.Count) return double.NaN; - double max = double.MinValue; + double max = double.NegativeInfinity; bool any = false; int flag = useComplement ? 0 : 1; for (int i = 0; i < values.Count; i++) @@ -685,6 +685,7 @@ public static double ParallelAdd(ref double valueToAddTo, double valueToAdd) public static double LogSumExp(double u, double v) { double max = Math.Max(u, v); + if (double.IsNegativeInfinity(max)) return double.NegativeInfinity; return max + Math.Log(Math.Exp(u - max) + Math.Exp(v - max)); } @@ -696,6 +697,7 @@ public static double LogSumExp(IList values) { if (values.Count == 0) return double.NaN; double max = Max(values); + if (double.IsNegativeInfinity(max)) return double.NegativeInfinity; double sum = 0; for (int i = 0; i < values.Count; i++) { diff --git a/Test_Numerics/Data/Time Series/Test_TimeSeries.cs b/Test_Numerics/Data/Time Series/Test_TimeSeries.cs index ce63a623..7aced5d8 100644 --- a/Test_Numerics/Data/Time Series/Test_TimeSeries.cs +++ b/Test_Numerics/Data/Time Series/Test_TimeSeries.cs @@ -447,6 +447,167 @@ public void Test_MovingSum() } } + /// + /// Test that MovingSum propagates NaN strictly by default (any NaN in window -> NaN out). + /// Matches pandas Series.rolling(w).sum() default min_periods=w behavior. + /// + [TestMethod] + public void Test_MovingSum_NaN_Strict() + { + var ts = new TimeSeries(TimeInterval.OneDay, new DateTime(2023, 01, 01), new double[] { 1.0, double.NaN, 3.0, 4.0, 5.0 }); + + var newTS = ts.MovingSum(2); + // window positions: [1,NaN], [NaN,3], [3,4], [4,5] -> [NaN, NaN, 7, 9] + var valid = new double[] { double.NaN, double.NaN, 7.0, 9.0 }; + Assert.AreEqual(valid.Length, newTS.Count); + for (int i = 0; i < newTS.Count; i++) + { + Assert.AreEqual(valid[i], newTS[i].Value); + } + } + + /// + /// Test that MovingSum with minValidCount=1 skips NaN values (pandas min_periods=1 semantics). + /// Sum is over observed values only; no scaling is applied. + /// + [TestMethod] + public void Test_MovingSum_NaN_SkipMode() + { + var ts = new TimeSeries(TimeInterval.OneDay, new DateTime(2023, 01, 01), new double[] { 1.0, double.NaN, 3.0, 4.0, 5.0 }); + + var newTS = ts.MovingSum(2, minValidCount: 1); + // window positions: [1,NaN]->1, [NaN,3]->3, [3,4]->7, [4,5]->9 + var valid = new double[] { 1.0, 3.0, 7.0, 9.0 }; + Assert.AreEqual(valid.Length, newTS.Count); + for (int i = 0; i < newTS.Count; i++) + { + Assert.AreEqual(valid[i], newTS[i].Value); + } + } + + /// + /// Test that MovingAverage propagates NaN strictly by default. + /// + [TestMethod] + public void Test_MovingAverage_NaN_Strict() + { + var ts = new TimeSeries(TimeInterval.OneDay, new DateTime(2023, 01, 01), new double[] { 1.0, double.NaN, 3.0, 4.0, 5.0 }); + + var newTS = ts.MovingAverage(2); + var valid = new double[] { double.NaN, double.NaN, 3.5, 4.5 }; + Assert.AreEqual(valid.Length, newTS.Count); + for (int i = 0; i < newTS.Count; i++) + { + Assert.AreEqual(valid[i], newTS[i].Value); + } + } + + /// + /// Test that MovingAverage with minValidCount=1 averages over non-NaN values only. + /// Denominator is the count of observed values, not the window size — this fixes the prior low-bias. + /// + [TestMethod] + public void Test_MovingAverage_NaN_SkipMode() + { + var ts = new TimeSeries(TimeInterval.OneDay, new DateTime(2023, 01, 01), new double[] { 1.0, double.NaN, 3.0, 4.0, 5.0 }); + + var newTS = ts.MovingAverage(2, minValidCount: 1); + // [1,NaN]->1/1=1, [NaN,3]->3/1=3, [3,4]->7/2=3.5, [4,5]->9/2=4.5 + var valid = new double[] { 1.0, 3.0, 3.5, 4.5 }; + Assert.AreEqual(valid.Length, newTS.Count); + for (int i = 0; i < newTS.Count; i++) + { + Assert.AreEqual(valid[i], newTS[i].Value); + } + } + + /// + /// Regression guard: Difference relies on IEEE-754 subtraction to propagate NaN automatically. + /// + [TestMethod] + public void Test_Difference_NaN() + { + var ts = new TimeSeries(TimeInterval.OneDay, new DateTime(2023, 01, 01), new double[] { 1.0, double.NaN, 3.0, 7.0 }); + + var newTS = ts.Difference(); + // diffs: NaN-1=NaN, 3-NaN=NaN, 7-3=4 + Assert.AreEqual(3, newTS.Count); + Assert.AreEqual(double.NaN, newTS[0].Value); + Assert.AreEqual(double.NaN, newTS[1].Value); + Assert.AreEqual(4.0, newTS[2].Value); + } + + /// + /// Test that CalendarYearSeries with Sum/Average drops years containing any NaN + /// (rather than the previous behavior of silently treating NaN as zero). + /// + [TestMethod] + public void Test_CalendarYearSeries_NaN() + { + // Two years of monthly data; year 2 has one NaN month. + var values = new double[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 1, double.NaN, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 }; + var ts = new TimeSeries(TimeInterval.OneMonth, new DateTime(2023, 01, 01), values); + + // Sum: year 1 = 78, year 2 = NaN (dropped from output) + var sumTS = ts.CalendarYearSeries(BlockFunctionType.Sum); + Assert.AreEqual(1, sumTS.Count); + Assert.AreEqual(78.0, sumTS[0].Value); + Assert.AreEqual(2023, sumTS[0].Index.Year); + + // Average: same — year 2 dropped because NaN propagates + var avgTS = ts.CalendarYearSeries(BlockFunctionType.Average); + Assert.AreEqual(1, avgTS.Count); + Assert.AreEqual(78.0 / 12.0, avgTS[0].Value, 1E-9); + + // Maximum: NaN values are skipped via the < / > comparisons (false on NaN), so both years survive. + var maxTS = ts.CalendarYearSeries(BlockFunctionType.Maximum); + Assert.AreEqual(2, maxTS.Count); + Assert.AreEqual(12.0, maxTS[0].Value); + Assert.AreEqual(12.0, maxTS[1].Value); + } + + /// + /// Regression guard: MonthlySeries already propagated NaN naively before this fix; verify it still does. + /// + [TestMethod] + public void Test_MonthlySeries_NaN() + { + // Two months of weekly data; month 2 has one NaN. + var values = new double[] { 10, 20, 30, 40, 50, 60, double.NaN, 80 }; + var ts = new TimeSeries(TimeInterval.SevenDay, new DateTime(2023, 01, 01), values); + + var sumTS = ts.MonthlySeries(BlockFunctionType.Sum); + // Jan 2023 has 5 weekly values: 10+20+30+40+50 = 150 + // Feb 2023 has 3 weekly values, one NaN -> sum = NaN, dropped + // Mar-Dec 2023 are empty -> skipped (no spurious zeros) + Assert.AreEqual(1, sumTS.Count); + Assert.AreEqual(150.0, sumTS[0].Value, 1E-9); + } + + /// + /// Test that PeaksOverThresholdSeries with a multi-day MovingSum smoothing on data with NaN + /// does not produce spurious exceedances from windows that touched a missing day. + /// This is the user-reported bug: 2-day moving sum on precipitation with gaps was returning + /// the value of the single non-missing day instead of NaN, causing zero-baseline exceedances. + /// + [TestMethod] + public void Test_PeaksOverThreshold_MovingSum_NaN() + { + // One legitimate 2-day exceedance (5 + 6 = 11). The trailing 8.0 sits at the very end + // of the series with a NaN before it, so the only window touching it is [NaN, 8.0]. + // Pre-fix: [NaN, 8.0] silently became 8.0 -> spurious extra event above threshold 7. + // Post-fix: that window is NaN -> excluded. Only [5, 6] = 11 remains. + var values = new double[] { 0.1, 0.2, 5.0, 6.0, 0.1, 0.0, 0.1, 0.2, 0.0, double.NaN, 8.0 }; + var ts = new TimeSeries(TimeInterval.OneDay, new DateTime(2023, 01, 01), values); + + var pot = ts.PeaksOverThresholdSeries(threshold: 7.0, minStepsBetweenEvents: 1, + smoothingFunction: SmoothingFunctionType.MovingSum, period: 2); + + Assert.AreEqual(1, pot.Count); + Assert.AreEqual(11.0, pot[0].Value, 1E-9); + } + /// /// Test the method that shifts all dates to a new starting date /// @@ -1035,5 +1196,283 @@ public void Test_SeasonalDecompose_InvalidInputs() Assert.Throws(() => ts.SeasonalDecompose(12)); } + #region ResampleWithKNN tests + + /// + /// Build a stationary AR(1) series x[t] = phi*x[t-1] + N(0, sigma^2) using a fixed seed. + /// Used by the ResampleWithKNN/ResampleWithBlockBootstrap statistical tests. + /// + private static TimeSeries MakeAR1(int n, double phi, double sigma, int seed) + { + var rng = new Random(seed); + var values = new double[n]; + values[0] = 0.0; + for (int i = 1; i < n; i++) + { + // Box-Muller for a standard normal draw. + double u1 = 1.0 - rng.NextDouble(); + double u2 = 1.0 - rng.NextDouble(); + double z = Math.Sqrt(-2.0 * Math.Log(u1)) * Math.Cos(2.0 * Math.PI * u2); + values[i] = phi * values[i - 1] + sigma * z; + } + return new TimeSeries(TimeInterval.OneDay, new DateTime(2000, 1, 1), values); + } + + /// + /// Same seed must produce identical KNN resamples. + /// + [TestMethod] + public void Test_ResampleWithKNN_DeterministicWithSeed() + { + var ts = MakeAR1(n: 100, phi: 0.5, sigma: 1.0, seed: 1); + var a = ts.ResampleWithKNN(timeSteps: 50, k: 10, seed: 42); + var b = ts.ResampleWithKNN(timeSteps: 50, k: 10, seed: 42); + Assert.AreEqual(a.Count, b.Count); + for (int i = 0; i < a.Count; i++) + Assert.AreEqual(a[i].Value, b[i].Value, 1e-12, + $"KNN must be deterministic with a fixed seed; index {i} differs."); + } + + /// + /// Output length must equal the requested timeSteps exactly. + /// + [TestMethod] + public void Test_ResampleWithKNN_OutputLength() + { + var ts = MakeAR1(n: 100, phi: 0.5, sigma: 1.0, seed: 1); + foreach (int len in new[] { 1, 7, 50, 250 }) + { + var r = ts.ResampleWithKNN(timeSteps: len, k: 10, seed: 42); + Assert.AreEqual(len, r.Count, + $"KNN output length must match the requested timeSteps; expected {len}, got {r.Count}."); + } + } + + /// + /// Regression test for the off-by-one bug. + /// On a strongly trended series x[t] = t, the conditional KNN bootstrap (Lall-Sharma) + /// should advance through the trend, accumulating mean drift of ≈ +1 per step. The + /// pre-fix implementation took the neighbor's own value rather than x[j+1], which + /// kept the trajectory hovering near the starting value (zero net drift). + /// We average drift over multiple seeds to crush the random-walk noise. + /// + [TestMethod] + public void Test_ResampleWithKNN_AdvancesThroughTime() + { + // x[t] = t, t in [0, 199] + var values = Enumerable.Range(0, 200).Select(i => (double)i).ToArray(); + var ts = new TimeSeries(TimeInterval.OneDay, new DateTime(2000, 1, 1), values); + + const int steps = 50; + const int trials = 20; + double avgDrift = 0; + for (int seed = 1; seed <= trials; seed++) + { + var r = ts.ResampleWithKNN(timeSteps: steps, k: 10, seed: seed); + avgDrift += (r[steps - 1].Value - r[0].Value); + } + avgDrift /= trials; + + // Pre-fix: avgDrift ~ N(0, ~5) — fails this assertion clearly. + // Post-fix: avgDrift ~ +50 (drift = +1 per step over 50 steps). + Assert.IsGreaterThan(25.0, avgDrift, + $"Expected KNN trajectory to advance through the trend (avgDrift > 25 over {steps} steps); " + + $"observed avgDrift = {avgDrift:F2}. The pre-fix off-by-one keeps the trajectory near the starting value."); + } + + /// + /// The marginal mean of resampled tails should match the input mean within sampling error. + /// + [TestMethod] + public void Test_ResampleWithKNN_PreservesMarginalMean() + { + var ts = MakeAR1(n: 200, phi: 0.5, sigma: 1.0, seed: 7); + double inputMean = ts.MeanValue(); + double inputStd = ts.StandardDeviation(); + + const int trials = 200; + const int steps = 100; + double sumOfTailMeans = 0; + for (int seed = 1; seed <= trials; seed++) + { + var r = ts.ResampleWithKNN(timeSteps: steps, k: 10, seed: seed); + sumOfTailMeans += r.MeanValue(); + } + double grandMean = sumOfTailMeans / trials; + double standardError = inputStd / Math.Sqrt(steps * trials); + + // Allow 4x SE — generous to absorb short-series boundary effects in KNN. + Assert.IsLessThan(4.0 * standardError + 0.05, Math.Abs(grandMean - inputMean), + $"KNN tails should preserve the marginal mean; input={inputMean:F4}, grand-mean={grandMean:F4}, " + + $"4*SE={4.0 * standardError:F4}."); + } + + /// + /// The marginal variance of resampled tails should be in the same regime as the input variance. + /// KNN can shrink variance because the conditional sampling pulls toward the local mean — we + /// only require the resample variance to remain within 50% of the input variance. + /// + [TestMethod] + public void Test_ResampleWithKNN_PreservesMarginalVariance() + { + var ts = MakeAR1(n: 200, phi: 0.5, sigma: 1.0, seed: 7); + double inputVar = ts.StandardDeviation() * ts.StandardDeviation(); + + const int trials = 200; + const int steps = 100; + double sumOfTailVars = 0; + for (int seed = 1; seed <= trials; seed++) + { + var r = ts.ResampleWithKNN(timeSteps: steps, k: 10, seed: seed); + double rStd = r.StandardDeviation(); + sumOfTailVars += rStd * rStd; + } + double avgTailVar = sumOfTailVars / trials; + double ratio = avgTailVar / inputVar; + + Assert.IsTrue(ratio > 0.5 && ratio < 2.0, + $"KNN tail variance should be within 0.5x..2x of input variance; input={inputVar:F4}, " + + $"avg-tail={avgTailVar:F4}, ratio={ratio:F2}."); + } + + /// + /// On a synthetic AR(1) with phi=0.7, the resampled tails should recover lag-1 autocorrelation + /// in the same regime. The conditional bootstrap is what gives KNN this property. + /// + [TestMethod] + public void Test_ResampleWithKNN_RecoversAR1Autocorrelation() + { + const double phiTrue = 0.7; + var ts = MakeAR1(n: 300, phi: phiTrue, sigma: 1.0, seed: 7); + + const int trials = 200; + const int steps = 100; + double sumLag1 = 0; + for (int seed = 1; seed <= trials; seed++) + { + var r = ts.ResampleWithKNN(timeSteps: steps, k: 10, seed: seed); + sumLag1 += Lag1Autocorrelation(r); + } + double avgLag1 = sumLag1 / trials; + + // Post-fix: avgLag1 should be > 0.4 (close to phiTrue=0.7, allow shrinkage). + // Pre-fix: avgLag1 is dominated by KNN-neighbor random walk near a fixed point, NOT phi. + Assert.IsTrue(avgLag1 > 0.4 && avgLag1 < 0.95, + $"KNN should recover lag-1 autocorrelation in the AR(1) regime; expected ~{phiTrue}, got {avgLag1:F3}."); + } + + private static double Lag1Autocorrelation(TimeSeries ts) + { + int n = ts.Count; + if (n < 3) return double.NaN; + double mean = ts.MeanValue(); + double num = 0, den = 0; + for (int i = 1; i < n; i++) num += (ts[i].Value - mean) * (ts[i - 1].Value - mean); + for (int i = 0; i < n; i++) den += (ts[i].Value - mean) * (ts[i].Value - mean); + return den > 0 ? num / den : double.NaN; + } + + /// + /// KNN must reject series that are too short for a sensible conditional bootstrap. + /// + [TestMethod] + public void Test_ResampleWithKNN_ThrowsForShortSeries() + { + // Count = 10 — one less than required since we exclude the last index from candidates + // and the underlying KNearestNeighbors needs ≥10 training points. + var ts = MakeAR1(n: 10, phi: 0.5, sigma: 1.0, seed: 1); + Assert.Throws(() => ts.ResampleWithKNN(timeSteps: 5, k: 5, seed: 42)); + } + + #endregion + + #region ResampleWithBlockBootstrap tests + + /// + /// Same seed must produce identical block-bootstrap resamples. + /// + [TestMethod] + public void Test_ResampleWithBlockBootstrap_DeterministicWithSeed() + { + var ts = MakeAR1(n: 100, phi: 0.5, sigma: 1.0, seed: 1); + var a = ts.ResampleWithBlockBootstrap(timeSteps: 50, blockSize: 5, seed: 42); + var b = ts.ResampleWithBlockBootstrap(timeSteps: 50, blockSize: 5, seed: 42); + Assert.AreEqual(a.Count, b.Count); + for (int i = 0; i < a.Count; i++) + Assert.AreEqual(a[i].Value, b[i].Value, 1e-12, + $"BlockBootstrap must be deterministic with a fixed seed; index {i} differs."); + } + + /// + /// Output length must equal the requested timeSteps exactly even when not divisible by blockSize. + /// + [TestMethod] + public void Test_ResampleWithBlockBootstrap_OutputLength() + { + var ts = MakeAR1(n: 100, phi: 0.5, sigma: 1.0, seed: 1); + // Try lengths that are NOT multiples of blockSize=7 to verify the truncation logic. + foreach (int len in new[] { 1, 7, 13, 50, 251 }) + { + var r = ts.ResampleWithBlockBootstrap(timeSteps: len, blockSize: 7, seed: 42); + Assert.AreEqual(len, r.Count, + $"BlockBootstrap output length must match requested timeSteps; expected {len}, got {r.Count}."); + } + } + + /// + /// The marginal mean of block-bootstrap tails should match the input mean within sampling error. + /// + [TestMethod] + public void Test_ResampleWithBlockBootstrap_PreservesMarginalMean() + { + var ts = MakeAR1(n: 200, phi: 0.5, sigma: 1.0, seed: 7); + double inputMean = ts.MeanValue(); + double inputStd = ts.StandardDeviation(); + + const int trials = 200; + const int steps = 100; + double sumOfTailMeans = 0; + for (int seed = 1; seed <= trials; seed++) + { + var r = ts.ResampleWithBlockBootstrap(timeSteps: steps, blockSize: 5, seed: seed); + sumOfTailMeans += r.MeanValue(); + } + double grandMean = sumOfTailMeans / trials; + double standardError = inputStd / Math.Sqrt(steps * trials); + + Assert.IsLessThan(4.0 * standardError + 0.05, Math.Abs(grandMean - inputMean), + $"BlockBootstrap tails should preserve the marginal mean; input={inputMean:F4}, " + + $"grand-mean={grandMean:F4}, 4*SE={4.0 * standardError:F4}."); + } + + /// + /// The marginal variance of block-bootstrap tails should be close to the input variance. + /// Block bootstrap preserves marginal variance well; we require within ±20%. + /// + [TestMethod] + public void Test_ResampleWithBlockBootstrap_PreservesMarginalVariance() + { + var ts = MakeAR1(n: 200, phi: 0.5, sigma: 1.0, seed: 7); + double inputVar = ts.StandardDeviation() * ts.StandardDeviation(); + + const int trials = 200; + const int steps = 100; + double sumOfTailVars = 0; + for (int seed = 1; seed <= trials; seed++) + { + var r = ts.ResampleWithBlockBootstrap(timeSteps: steps, blockSize: 5, seed: seed); + double rStd = r.StandardDeviation(); + sumOfTailVars += rStd * rStd; + } + double avgTailVar = sumOfTailVars / trials; + double ratio = avgTailVar / inputVar; + + Assert.IsTrue(ratio > 0.8 && ratio < 1.2, + $"BlockBootstrap should preserve marginal variance within ±20%; input={inputVar:F4}, " + + $"avg-tail={avgTailVar:F4}, ratio={ratio:F2}."); + } + + #endregion + } } diff --git a/Test_Numerics/Data/Time Series/Test_TimeSeriesDownload.cs b/Test_Numerics/Data/Time Series/Test_TimeSeriesDownload.cs index 3ec1712c..f5f6693d 100644 --- a/Test_Numerics/Data/Time Series/Test_TimeSeriesDownload.cs +++ b/Test_Numerics/Data/Time Series/Test_TimeSeriesDownload.cs @@ -122,24 +122,48 @@ public class Test_TimeSeriesDownload private static async Task Online() => await TimeSeriesDownload.IsConnectedToInternet(); /// - /// Per-service availability cache. Each entry runs its probe at most once per test - /// run; subsequent calls reuse the cached task. Used so that - /// integration tests skip cleanly when the upstream provider is unreachable - /// (e.g. BOM's WDP backend returning 500 / DatasourceError) instead of failing CI. + /// Per-service availability cache. Successful probes are cached for the rest of + /// the run. Failed probes are cached only for , + /// so a single transient flake (e.g. BOM's WDP backend returning 500 / + /// DatasourceError once) does not silently disable every test for that + /// provider for the entire run. /// - private static readonly ConcurrentDictionary>> _serviceAvailability = new(); + private static readonly ConcurrentDictionary _serviceState = new(); /// - /// Memoized "can we hit this service?" probe. Returns false when the host is offline - /// or when the supplied probe throws (any exception is treated as "service down"). + /// How long a "service unreachable" verdict sticks before we re-probe. /// - private static Task AvailableAsync(string serviceName, Func probe) => - _serviceAvailability.GetOrAdd(serviceName, _ => new Lazy>(async () => + private static readonly TimeSpan ProbeFailureTtl = TimeSpan.FromSeconds(30); + + /// + /// "Can we hit this service?" probe with TTL on negative results. Returns false + /// when the host is offline or when the supplied probe throws (any exception is + /// treated as "service down" for the next 30 seconds). + /// + private static async Task AvailableAsync(string serviceName, Func probe) + { + if (_serviceState.TryGetValue(serviceName, out var prior)) + { + if (prior.ok) return true; + if (DateTime.UtcNow - prior.at < ProbeFailureTtl) return false; + } + if (!await Online()) { - if (!await Online()) return false; - try { await probe(); return true; } - catch { return false; } - })).Value; + _serviceState[serviceName] = (false, DateTime.UtcNow); + return false; + } + try + { + await probe(); + _serviceState[serviceName] = (true, DateTime.UtcNow); + return true; + } + catch + { + _serviceState[serviceName] = (false, DateTime.UtcNow); + return false; + } + } /// /// Probes a small windowed CHMN download to confirm Water Survey of Canada services are responsive. diff --git a/Test_Numerics/Distributions/Bivariate Copulas/Test_NormalCopula.cs b/Test_Numerics/Distributions/Bivariate Copulas/Test_NormalCopula.cs index da5ef2c2..6e43b5c0 100644 --- a/Test_Numerics/Distributions/Bivariate Copulas/Test_NormalCopula.cs +++ b/Test_Numerics/Distributions/Bivariate Copulas/Test_NormalCopula.cs @@ -52,6 +52,30 @@ public void Test_PDF() Assert.AreEqual(3.208773, copula.PDF(0.8, 0.2), 1E-6); } + /// + /// Test that invalid boundary log-density evaluations return log(0). + /// + [TestMethod] + public void Test_LogPDF_InvalidBoundary_ReturnsNegativeInfinity() + { + var copula = new NormalCopula(0.8); + + Assert.AreEqual(double.NegativeInfinity, copula.LogPDF(0.0, 0.5)); + } + + /// + /// Test that aggregate pseudo log-likelihood returns log(0) when any term is invalid. + /// + [TestMethod] + public void Test_PseudoLogLikelihood_InvalidBoundary_ReturnsNegativeInfinity() + { + var copula = new NormalCopula(0.8); + + Assert.AreEqual( + double.NegativeInfinity, + copula.PseudoLogLikelihood(new[] { 0.0, 0.2 }, new[] { 0.5, 0.8 })); + } + /// /// Test CDF /// diff --git a/Test_Numerics/Distributions/Multivariate/Test_Dirichlet.cs b/Test_Numerics/Distributions/Multivariate/Test_Dirichlet.cs index 6fc04e62..b8f5321d 100644 --- a/Test_Numerics/Distributions/Multivariate/Test_Dirichlet.cs +++ b/Test_Numerics/Distributions/Multivariate/Test_Dirichlet.cs @@ -215,6 +215,19 @@ public void Test_LogPDF() Assert.AreEqual(Math.Log(D.PDF(x)), D.LogPDF(x), 1e-10); } + /// + /// Test that invalid simplex inputs return the canonical log(0) sentinel. + /// + [TestMethod] + public void Test_LogPDF_InvalidInput_ReturnsNegativeInfinity() + { + var D = new Dirichlet(new double[] { 1.0, 2.0, 3.0 }); + + Assert.AreEqual(double.NegativeInfinity, D.LogPDF(new double[] { 0.5, 0.5 })); + Assert.AreEqual(double.NegativeInfinity, D.LogPDF(new double[] { -0.1, 0.6, 0.5 })); + Assert.AreEqual(double.NegativeInfinity, D.LogPDF(new double[] { 0.3, 0.3, 0.3 })); + } + /// /// Test random sampling: all samples on the simplex and moments converge. /// diff --git a/Test_Numerics/Distributions/Multivariate/Test_Multinomial.cs b/Test_Numerics/Distributions/Multivariate/Test_Multinomial.cs index 19ba5060..25bddfa6 100644 --- a/Test_Numerics/Distributions/Multivariate/Test_Multinomial.cs +++ b/Test_Numerics/Distributions/Multivariate/Test_Multinomial.cs @@ -153,6 +153,19 @@ public void Test_LogPMF() Assert.AreEqual(Math.Log(M.PDF(x)), M.LogPMF(x), 1e-10); } + /// + /// Test that invalid count vectors return the canonical log(0) sentinel. + /// + [TestMethod] + public void Test_LogPMF_InvalidInput_ReturnsNegativeInfinity() + { + var M = new Multinomial(10, new double[] { 0.5, 0.5 }); + + Assert.AreEqual(double.NegativeInfinity, M.LogPMF(new double[] { 3, 3 })); + Assert.AreEqual(double.NegativeInfinity, M.LogPMF(new double[] { -1, 11 })); + Assert.AreEqual(double.NegativeInfinity, M.LogPMF(new double[] { 5, 3, 2 })); + } + /// /// Test random sampling: each sample sums to N and moments converge. /// diff --git a/Test_Numerics/Distributions/Multivariate/Test_MultivariateNormal.cs b/Test_Numerics/Distributions/Multivariate/Test_MultivariateNormal.cs index 686610d0..93d227a5 100644 --- a/Test_Numerics/Distributions/Multivariate/Test_MultivariateNormal.cs +++ b/Test_Numerics/Distributions/Multivariate/Test_MultivariateNormal.cs @@ -61,6 +61,17 @@ public void Test_MultivariateNormalDist() } + /// + /// Test that non-finite log-density evaluations return the canonical log(0) sentinel. + /// + [TestMethod] + public void Test_LogPDF_NonFiniteInput_ReturnsNegativeInfinity() + { + var mvn = new MultivariateNormal(new[] { 0d, 0d }, new[,] { { 1d, 0d }, { 0d, 1d } }); + + Assert.AreEqual(double.NegativeInfinity, mvn.LogPDF(new[] { double.PositiveInfinity, 0d })); + } + /// /// Verification against Fortran routine /// http://www.math.wsu.edu/faculty/genz/software/fort77/mvndstpack.f diff --git a/Test_Numerics/Distributions/Multivariate/Test_MultivariateStudentT.cs b/Test_Numerics/Distributions/Multivariate/Test_MultivariateStudentT.cs index f970b5f7..7210e8dc 100644 --- a/Test_Numerics/Distributions/Multivariate/Test_MultivariateStudentT.cs +++ b/Test_Numerics/Distributions/Multivariate/Test_MultivariateStudentT.cs @@ -171,6 +171,17 @@ public void Test_LogPDF() Assert.AreEqual(-3.4353564596992499, mvt.LogPDF(new[] { 0.0, 0.0 }), 1E-10); } + /// + /// Verify that non-finite log-density evaluations return the canonical log(0) sentinel. + /// + [TestMethod] + public void Test_LogPDF_NonFiniteInput_ReturnsNegativeInfinity() + { + var mvt = CreateStandard2D(); + + Assert.AreEqual(double.NegativeInfinity, mvt.LogPDF(new[] { double.PositiveInfinity, 0.0 })); + } + /// /// Verify that 1D MVT PDF matches univariate Student's t PDF. /// MVT(ν=5, μ=[3], Σ=[[4]]) should equal StudentT(μ=3, σ=2, ν=5). @@ -937,4 +948,4 @@ public void Test_InverseCDF_ConsistentWithSampling() #endregion } -} \ No newline at end of file +} diff --git a/Test_Numerics/Distributions/Univariate/Test_CompetingRisks.cs b/Test_Numerics/Distributions/Univariate/Test_CompetingRisks.cs index f4b16c6b..cbaef530 100644 --- a/Test_Numerics/Distributions/Univariate/Test_CompetingRisks.cs +++ b/Test_Numerics/Distributions/Univariate/Test_CompetingRisks.cs @@ -429,6 +429,25 @@ public void Test_LogPDF_ConsistentWithPDF() "LogPDF should equal log(PDF) at median for max rule"); } + /// + /// Verifies that dependent numerical-differentiation log-density returns log(0) + /// when the density is outside the component support. + /// + [TestMethod] + public void Test_LogPDF_DependentInvalidSupport_ReturnsNegativeInfinity() + { + var cr = new CompetingRisks(new UnivariateDistributionBase[] + { + new Exponential(1.0), + new Exponential(2.0) + }) + { + Dependency = Numerics.Data.Statistics.Probability.DependencyType.PerfectlyPositive + }; + + Assert.AreEqual(double.NegativeInfinity, cr.LogPDF(-1.0)); + } + #region Minimum Rule - 2 Distributions diff --git a/Test_Numerics/Distributions/Univariate/Test_Mixture.cs b/Test_Numerics/Distributions/Univariate/Test_Mixture.cs index b0cc281b..ed94ae02 100644 --- a/Test_Numerics/Distributions/Univariate/Test_Mixture.cs +++ b/Test_Numerics/Distributions/Univariate/Test_Mixture.cs @@ -249,5 +249,16 @@ public void Test_Mixture_InverseCDF_ZeroInflated() } } + /// + /// Test that a mixture with no component support at x returns log(0). + /// + [TestMethod] + public void Test_Mixture_LogPDF_InvalidSupport_ReturnsNegativeInfinity() + { + var mix = new Mixture(new[] { 1.0 }, new[] { new Exponential(1.0) }); + + Assert.AreEqual(double.NegativeInfinity, mix.LogPDF(-1.0)); + } + } } diff --git a/Test_Numerics/Functions/Test_FisherZLink.cs b/Test_Numerics/Functions/Test_FisherZLink.cs new file mode 100644 index 00000000..e79d363e --- /dev/null +++ b/Test_Numerics/Functions/Test_FisherZLink.cs @@ -0,0 +1,118 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; +using Numerics; +using Numerics.Functions; +using System; + +namespace Functions +{ + /// + /// Unit tests for the class. + /// + [TestClass] + public class Test_FisherZLink + { + private const double RoundTripTol = 1e-12; + private const double DeltaH = 1e-7; + private const double DerivativeTol = 1e-6; + + /// + /// Verifies known link values. + /// + [TestMethod] + public void Link_KnownValues() + { + var link = new FisherZLink(); + + Assert.AreEqual(0d, link.Link(0d), 1e-12); + Assert.AreEqual(0.5d * Math.Log(3d), link.Link(0.5d), 1e-12); + Assert.AreEqual(-0.5d * Math.Log(3d), link.Link(-0.5d), 1e-12); + } + + /// + /// Verifies inverse-link values. + /// + [TestMethod] + public void InverseLink_KnownValues() + { + var link = new FisherZLink(); + + Assert.AreEqual(0d, link.InverseLink(0d), 1e-12); + Assert.AreEqual(0.5d, link.InverseLink(0.5d * Math.Log(3d)), 1e-12); + Assert.AreEqual(-0.5d, link.InverseLink(-0.5d * Math.Log(3d)), 1e-12); + } + + /// + /// Verifies inverse-link values remain finite and approach the correlation bounds for large eta. + /// + [TestMethod] + public void InverseLink_LargeEta_ApproachesBounds() + { + var link = new FisherZLink(); + + double upper = link.InverseLink(10d); + double lower = link.InverseLink(-10d); + + Assert.IsTrue(Tools.IsFinite(upper)); + Assert.IsTrue(Tools.IsFinite(lower)); + Assert.IsLessThan(1d, upper); + Assert.IsGreaterThan(0.99999999d, upper); + Assert.IsGreaterThan(-1d, lower); + Assert.IsLessThan(-0.99999999d, lower); + } + + /// + /// Verifies round-trip behavior over the correlation domain. + /// + [TestMethod] + public void RoundTrip_RecoversInput() + { + var link = new FisherZLink(); + foreach (double x in new[] { -0.99d, -0.75d, -0.25d, 0d, 0.25d, 0.75d, 0.99d }) + { + Assert.AreEqual(x, link.InverseLink(link.Link(x)), RoundTripTol); + } + } + + /// + /// Verifies derivative values and finite-difference consistency. + /// + [TestMethod] + public void DLink_FiniteDifferenceConsistency() + { + var link = new FisherZLink(); + foreach (double x in new[] { -0.8d, -0.25d, 0d, 0.25d, 0.8d }) + { + double expected = 1d / (1d - x * x); + double finiteDifference = (link.Link(x + DeltaH) - link.Link(x - DeltaH)) / (2d * DeltaH); + Assert.AreEqual(expected, link.DLink(x), 1e-12); + Assert.AreEqual(finiteDifference, link.DLink(x), DerivativeTol); + } + } + + /// + /// Verifies domain checks. + /// + [TestMethod] + public void Link_OutsideOpenInterval_Throws() + { + var link = new FisherZLink(); + + Assert.Throws(() => link.Link(-1d)); + Assert.Throws(() => link.Link(1d)); + Assert.Throws(() => link.Link(double.NaN)); + Assert.Throws(() => link.DLink(-1d)); + Assert.Throws(() => link.DLink(1d)); + Assert.Throws(() => link.DLink(double.PositiveInfinity)); + } + + /// + /// Verifies factory and XML construction. + /// + [TestMethod] + public void Factory_CreatesFisherZLink() + { + Assert.IsInstanceOfType(LinkFunctionFactory.Create(LinkFunctionType.FisherZ), typeof(FisherZLink)); + Assert.IsInstanceOfType(LinkFunctionFactory.CreateFromXElement(new FisherZLink().ToXElement()), typeof(FisherZLink)); + } + } +} diff --git a/Test_Numerics/Functions/Test_YeoJohnsonLink.cs b/Test_Numerics/Functions/Test_YeoJohnsonLink.cs new file mode 100644 index 00000000..dc84ad77 --- /dev/null +++ b/Test_Numerics/Functions/Test_YeoJohnsonLink.cs @@ -0,0 +1,205 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; +using Numerics; +using Numerics.Functions; +using System; +using System.Xml.Linq; + +namespace Functions +{ + /// + /// Unit tests for the class. + /// + [TestClass] + public class Test_YeoJohnsonLink + { + private const double RoundTripTol = 1e-9; + private const double DeltaH = 1e-7; + private const double DerivativeTol = 1e-4; + + /// + /// Verifies the default constructor uses the identity-transform lambda. + /// + [TestMethod] + public void Constructor_Default_LambdaIsOne() + { + var link = new YeoJohnsonLink(); + + Assert.AreEqual(1d, link.Lambda, 1e-12); + } + + /// + /// Verifies the lambda constructor stores the requested value. + /// + [TestMethod] + public void Constructor_Lambda_StoresValue() + { + var link = new YeoJohnsonLink(0.5d); + + Assert.AreEqual(0.5d, link.Lambda, 1e-12); + } + + /// + /// Verifies the lambda constructor rejects non-finite and unsupported lambda values. + /// + [TestMethod] + public void Constructor_Lambda_InvalidValues_Throws() + { + Assert.Throws(() => new YeoJohnsonLink(double.NaN)); + Assert.Throws(() => new YeoJohnsonLink(double.PositiveInfinity)); + Assert.Throws(() => new YeoJohnsonLink(-5.1d)); + Assert.Throws(() => new YeoJohnsonLink(5.1d)); + } + + /// + /// Verifies the values constructor rejects null. + /// + [TestMethod] + public void Constructor_Values_Null_Throws() + { + Assert.Throws(() => new YeoJohnsonLink((double[])null!)); + } + + /// + /// Verifies the values constructor rejects degenerate samples. + /// + [TestMethod] + public void Constructor_Values_SingleElement_Throws() + { + Assert.Throws(() => new YeoJohnsonLink(new[] { 1d })); + } + + /// + /// Verifies the values constructor rejects non-finite samples. + /// + [TestMethod] + public void Constructor_Values_NonFiniteValue_Throws() + { + Assert.Throws(() => new YeoJohnsonLink(new[] { 1d, double.NaN })); + Assert.Throws(() => new YeoJohnsonLink(new[] { 1d, double.NegativeInfinity })); + } + + /// + /// Verifies lambda fitting produces a finite value for a non-degenerate sample. + /// + [TestMethod] + public void Constructor_Values_ProducesFiniteLambda() + { + var link = new YeoJohnsonLink(new[] { -2d, -1d, -0.25d, 0d, 0.5d, 1d, 3d }); + + Assert.IsTrue(Tools.IsFinite(link.Lambda)); + } + + /// + /// Verifies the XML constructor rejects null. + /// + [TestMethod] + public void Constructor_XElement_Null_Throws() + { + Assert.Throws(() => new YeoJohnsonLink((XElement)null!)); + } + + /// + /// Verifies XML construction rejects missing or invalid lambda values. + /// + [TestMethod] + public void Constructor_XElement_InvalidLambda_Throws() + { + Assert.Throws(() => new YeoJohnsonLink(new XElement(nameof(YeoJohnsonLink)))); + Assert.Throws(() => new YeoJohnsonLink(new XElement(nameof(YeoJohnsonLink), new XAttribute("Lambda", "NaN")))); + Assert.Throws(() => new YeoJohnsonLink(new XElement(nameof(YeoJohnsonLink), new XAttribute("Lambda", "6")))); + } + + /// + /// Verifies lambda equal to 1 is the identity transform. + /// + [TestMethod] + public void Link_LambdaOne_IsIdentity() + { + var link = new YeoJohnsonLink(1d); + foreach (double x in new[] { -5d, -1d, 0d, 0.5d, 1d, 5d }) + { + Assert.AreEqual(x, link.Link(x), 1e-10); + Assert.AreEqual(1d, link.DLink(x), 1e-10); + } + } + + /// + /// Verifies lambda zero uses the logarithmic positive-value branch. + /// + [TestMethod] + public void Link_LambdaZero_UsesPositiveLogBranch() + { + var link = new YeoJohnsonLink(0d); + + Assert.AreEqual(Math.Log(3d), link.Link(2d), 1e-12); + Assert.AreEqual(2d, link.InverseLink(Math.Log(3d)), 1e-12); + Assert.AreEqual(1d / 3d, link.DLink(2d), 1e-12); + } + + /// + /// Verifies lambda two uses the logarithmic negative-value branch. + /// + [TestMethod] + public void Link_LambdaTwo_UsesNegativeLogBranch() + { + var link = new YeoJohnsonLink(2d); + + Assert.AreEqual(-Math.Log(3d), link.Link(-2d), 1e-12); + Assert.AreEqual(-2d, link.InverseLink(-Math.Log(3d)), 1e-12); + Assert.AreEqual(1d / 3d, link.DLink(-2d), 1e-12); + } + + /// + /// Verifies round-trip behavior for positive and negative values. + /// + [TestMethod] + public void RoundTrip_PositiveAndNegativeValues() + { + var link = new YeoJohnsonLink(0.5d); + foreach (double x in new[] { -10d, -5d, -1d, -0.1d, 0d, 0.1d, 1d, 5d, 10d }) + { + double recovered = link.InverseLink(link.Link(x)); + Assert.AreEqual(x, recovered, RoundTripTol); + } + } + + /// + /// Verifies the derivative against finite differences. + /// + [TestMethod] + public void DLink_FiniteDifferenceConsistency() + { + var link = new YeoJohnsonLink(0.5d); + foreach (double x in new[] { -5d, -2d, -0.5d, 0.5d, 1d, 5d }) + { + double finiteDifference = (link.Link(x + DeltaH) - link.Link(x - DeltaH)) / (2d * DeltaH); + Assert.AreEqual(finiteDifference, link.DLink(x), DerivativeTol); + } + } + + /// + /// Verifies XML serialization preserves lambda. + /// + [TestMethod] + public void XmlRoundTrip_PreservesLambda() + { + var original = new YeoJohnsonLink(0.75d); + var restored = new YeoJohnsonLink(original.ToXElement()); + + Assert.AreEqual(original.Lambda, restored.Lambda, 1e-12); + } + + /// + /// Verifies factory construction for Yeo-Johnson links. + /// + [TestMethod] + public void Factory_CreatesYeoJohnsonLink() + { + Assert.IsInstanceOfType(LinkFunctionFactory.Create(LinkFunctionType.YeoJohnson), typeof(YeoJohnsonLink)); + + var restored = LinkFunctionFactory.CreateFromXElement(new YeoJohnsonLink(0.25d).ToXElement()); + Assert.IsInstanceOfType(restored, typeof(YeoJohnsonLink)); + Assert.AreEqual(0.25d, ((YeoJohnsonLink)restored).Lambda, 1e-12); + } + } +} diff --git a/Test_Numerics/Sampling/MCMC/Test_MCMCResults_Recompute.cs b/Test_Numerics/Sampling/MCMC/Test_MCMCResults_Recompute.cs new file mode 100644 index 00000000..5a8ec980 --- /dev/null +++ b/Test_Numerics/Sampling/MCMC/Test_MCMCResults_Recompute.cs @@ -0,0 +1,238 @@ +using System.Collections.Generic; +using System.Linq; +using Microsoft.VisualStudio.TestTools.UnitTesting; +using Numerics.Mathematics.Optimization; +using Numerics.Sampling; +using Numerics.Sampling.MCMC; + +namespace Sampling.MCMC +{ + /// + /// Unit tests for . The contract: + /// recomputing parameter results at a new alpha must update the credible-interval + /// percentiles (LowerCI/UpperCI) while preserving alpha-independent diagnostics + /// (Rhat, ESS, Autocorrelation) AND the underlying chain output (Output, MAP). + /// + /// + /// This is the cornerstone of the RMC.BestFit Phase 2 reprocess-don't-clear refactor. + /// A regression that drops one of the three snapshot/restore lines silently corrupts + /// every convergence-diagnostic display the moment a user changes CredibleIntervalWidth + /// on an estimated analysis. + /// + [TestClass] + public class Test_MCMCResults_Recompute + { + /// + /// Builds a minimal populated via the + /// constructor. + /// MarkovChains/AcceptanceRates/MeanLogLikelihood remain null per the constructor's + /// documented behavior. + /// + private static MCMCResults BuildResults(double alpha) + { + // Three parameters (mu, sigma, tau) sampled from synthetic distributions. + // 1000 samples is enough for smooth percentile estimates. + var rng = new MersenneTwister(2026); + var output = new List(capacity: 1000); + for (int i = 0; i < 1000; i++) + { + // Param 0: ~ Normal(10, 1) + double p0 = 10.0 + StandardNormal(rng); + // Param 1: ~ Normal(2, 0.5) + double p1 = 2.0 + 0.5 * StandardNormal(rng); + // Param 2: ~ Uniform(-1, 1) + double p2 = -1.0 + 2.0 * rng.NextDouble(); + output.Add(new ParameterSet(new[] { p0, p1, p2 }, 0)); + } + var map = new ParameterSet(new[] { 10.0, 2.0, 0.0 }, 0); + return new MCMCResults(map, output, alpha); + } + + /// Box-Muller transform, single sample. + private static double StandardNormal(MersenneTwister rng) + { + double u1 = rng.NextDouble(); + double u2 = rng.NextDouble(); + if (u1 < 1e-300) u1 = 1e-300; + return System.Math.Sqrt(-2.0 * System.Math.Log(u1)) * System.Math.Cos(2.0 * System.Math.PI * u2); + } + + /// + /// Seed each parameter's alpha-independent diagnostics with sentinel values. + /// These would normally be populated by the + /// path; for test purposes we set them directly to verify the snapshot/restore. + /// + private static (double[] rhats, double[] esss, double[][,] acfs) SeedDiagnostics(MCMCResults results) + { + int n = results.ParameterResults.Length; + var rhats = new double[n]; + var esss = new double[n]; + var acfs = new double[n][,]; + for (int i = 0; i < n; i++) + { + rhats[i] = 1.0 + 0.01 * (i + 1); // 1.01, 1.02, 1.03 + esss[i] = 800.0 + 10.0 * i; // 800, 810, 820 + acfs[i] = new double[1, 5] { { 1.0, 0.5 - 0.1 * i, 0.25, 0.125, 0.0625 } }; + results.ParameterResults[i].SummaryStatistics.Rhat = rhats[i]; + results.ParameterResults[i].SummaryStatistics.ESS = esss[i]; + results.ParameterResults[i].Autocorrelation = acfs[i]; + } + return (rhats, esss, acfs); + } + + [TestMethod] + public void Recompute_PreservesOutputReference() + { + var results = BuildResults(alpha: 0.10); + var outputRef = results.Output; + + results.RecomputeParameterResults(alpha: 0.05); + + // The Output list must be the same reference — the chain itself is not touched. + Assert.AreSame(outputRef, results.Output, "Output reference must not change."); + Assert.HasCount(1000, results.Output, "Output count must be preserved."); + } + + [TestMethod] + public void Recompute_PreservesMAP() + { + var results = BuildResults(alpha: 0.10); + var valuesBefore = (double[])results.MAP.Values.Clone(); + var fitnessBefore = results.MAP.Fitness; + + results.RecomputeParameterResults(alpha: 0.05); + + Assert.HasCount(valuesBefore.Length, results.MAP.Values, "MAP value count must not change."); + for (int i = 0; i < valuesBefore.Length; i++) + Assert.AreEqual(valuesBefore[i], results.MAP.Values[i], 1e-12, $"MAP value [{i}] must not change."); + Assert.AreEqual(fitnessBefore, results.MAP.Fitness, 1e-12, "MAP fitness must not change."); + } + + [TestMethod] + public void Recompute_PreservesRhat() + { + var results = BuildResults(alpha: 0.10); + var (rhats, _, _) = SeedDiagnostics(results); + + results.RecomputeParameterResults(alpha: 0.05); + + for (int i = 0; i < results.ParameterResults.Length; i++) + { + Assert.AreEqual(rhats[i], results.ParameterResults[i].SummaryStatistics.Rhat, 1e-12, + $"Parameter {i}: Rhat must be preserved across alpha change."); + } + } + + [TestMethod] + public void Recompute_PreservesESS() + { + var results = BuildResults(alpha: 0.10); + var (_, esss, _) = SeedDiagnostics(results); + + results.RecomputeParameterResults(alpha: 0.05); + + for (int i = 0; i < results.ParameterResults.Length; i++) + { + Assert.AreEqual(esss[i], results.ParameterResults[i].SummaryStatistics.ESS, 1e-12, + $"Parameter {i}: ESS must be preserved across alpha change."); + } + } + + [TestMethod] + public void Recompute_PreservesAutocorrelation() + { + var results = BuildResults(alpha: 0.10); + var (_, _, acfs) = SeedDiagnostics(results); + + results.RecomputeParameterResults(alpha: 0.05); + + for (int i = 0; i < results.ParameterResults.Length; i++) + { + var acfAfter = results.ParameterResults[i].Autocorrelation; + Assert.AreEqual(acfs[i].GetLength(0), acfAfter.GetLength(0)); + Assert.AreEqual(acfs[i].GetLength(1), acfAfter.GetLength(1)); + for (int r = 0; r < acfs[i].GetLength(0); r++) + { + for (int c = 0; c < acfs[i].GetLength(1); c++) + { + Assert.AreEqual(acfs[i][r, c], acfAfter[r, c], 1e-12, + $"Parameter {i} ACF[{r},{c}]: must be preserved across alpha change."); + } + } + } + } + + [TestMethod] + public void Recompute_NarrowsCIWhenAlphaIncreases() + { + // alpha=0.10 → 90% CI; alpha=0.20 → 80% CI (narrower band). + var results = BuildResults(alpha: 0.10); + var lower90 = results.ParameterResults.Select(p => p.SummaryStatistics.LowerCI).ToArray(); + var upper90 = results.ParameterResults.Select(p => p.SummaryStatistics.UpperCI).ToArray(); + + results.RecomputeParameterResults(alpha: 0.20); + + for (int i = 0; i < results.ParameterResults.Length; i++) + { + double lower80 = results.ParameterResults[i].SummaryStatistics.LowerCI; + double upper80 = results.ParameterResults[i].SummaryStatistics.UpperCI; + Assert.IsGreaterThan(lower90[i], lower80, + $"Parameter {i}: 80% LowerCI ({lower80}) must be > 90% LowerCI ({lower90[i]}) — band narrows when alpha increases."); + Assert.IsLessThan(upper90[i], upper80, + $"Parameter {i}: 80% UpperCI ({upper80}) must be < 90% UpperCI ({upper90[i]}) — band narrows when alpha increases."); + } + } + + [TestMethod] + public void Recompute_WidensCIWhenAlphaDecreases() + { + // alpha=0.10 → 90% CI; alpha=0.05 → 95% CI (wider band). + var results = BuildResults(alpha: 0.10); + var lower90 = results.ParameterResults.Select(p => p.SummaryStatistics.LowerCI).ToArray(); + var upper90 = results.ParameterResults.Select(p => p.SummaryStatistics.UpperCI).ToArray(); + + results.RecomputeParameterResults(alpha: 0.05); + + for (int i = 0; i < results.ParameterResults.Length; i++) + { + double lower95 = results.ParameterResults[i].SummaryStatistics.LowerCI; + double upper95 = results.ParameterResults[i].SummaryStatistics.UpperCI; + Assert.IsLessThan(lower90[i], lower95, + $"Parameter {i}: 95% LowerCI ({lower95}) must be < 90% LowerCI ({lower90[i]}) — band widens when alpha decreases."); + Assert.IsGreaterThan(upper90[i], upper95, + $"Parameter {i}: 95% UpperCI ({upper95}) must be > 90% UpperCI ({upper90[i]}) — band widens when alpha decreases."); + } + } + + [TestMethod] + public void Recompute_PreservesMeanAndMedian() + { + // Mean and Median are alpha-independent — should be preserved exactly. + var results = BuildResults(alpha: 0.10); + var meansBefore = results.ParameterResults.Select(p => p.SummaryStatistics.Mean).ToArray(); + var mediansBefore = results.ParameterResults.Select(p => p.SummaryStatistics.Median).ToArray(); + + results.RecomputeParameterResults(alpha: 0.05); + + for (int i = 0; i < results.ParameterResults.Length; i++) + { + Assert.AreEqual(meansBefore[i], results.ParameterResults[i].SummaryStatistics.Mean, 1e-10, + $"Parameter {i}: Mean must be preserved."); + Assert.AreEqual(mediansBefore[i], results.ParameterResults[i].SummaryStatistics.Median, 1e-10, + $"Parameter {i}: Median must be preserved."); + } + } + + [TestMethod] + public void Recompute_OnEmptyResults_DoesNotThrow() + { + // ParameterResults == null guard. + var results = new MCMCResults(); + + results.RecomputeParameterResults(alpha: 0.05); + + // Should be a no-op — ParameterResults stays null, no exception. + Assert.IsNull(results.ParameterResults); + } + } +} diff --git a/Test_Numerics/Sampling/MCMC/Test_SNIS.cs b/Test_Numerics/Sampling/MCMC/Test_SNIS.cs index 1f61f018..a0b37331 100644 --- a/Test_Numerics/Sampling/MCMC/Test_SNIS.cs +++ b/Test_Numerics/Sampling/MCMC/Test_SNIS.cs @@ -75,5 +75,59 @@ double logLH(double[] x) Assert.AreEqual(5771.81, results.ParameterResults[1].SummaryStatistics.UpperCI, 0.05 * 5771.81); } + /// + /// This test verifies that SNIS fails fast when every importance weight is invalid. + /// + [TestMethod] + public void Test_SNIS_AllInvalidWeights_ThrowsInvalidOperationException() + { + var priors = new List { new Uniform(0d, 1d) }; + double logLH(double[] x) => double.NegativeInfinity; + + var sampler = new SNIS(priors, logLH) + { + Iterations = 100, + OutputLength = 100, + PRNGSeed = 12345 + }; + + Assert.Throws(() => sampler.Sample()); + } + + /// + /// This test verifies that finite SNIS weights normalize cleanly when invalid samples are present. + /// + [TestMethod] + public void Test_SNIS_MixedFiniteAndInvalidWeights_NormalizesFiniteWeights() + { + var priors = new List { new Uniform(0d, 1d) }; + double logLH(double[] x) => x[0] < 0.5d ? 0d : double.NegativeInfinity; + + var sampler = new SNIS(priors, logLH) + { + Iterations = 100, + OutputLength = 100, + PRNGSeed = 12345 + }; + + sampler.Sample(); + + double sum = 0d; + bool foundZeroWeight = false; + bool foundPositiveWeight = false; + foreach (var parameterSet in sampler.MarkovChains[0]) + { + Assert.IsFalse(double.IsNaN(parameterSet.Weight)); + Assert.IsFalse(double.IsInfinity(parameterSet.Weight)); + sum += parameterSet.Weight; + foundZeroWeight |= parameterSet.Weight == 0d; + foundPositiveWeight |= parameterSet.Weight > 0d; + } + + Assert.IsTrue(foundZeroWeight); + Assert.IsTrue(foundPositiveWeight); + Assert.AreEqual(1d, sum, 1e-12); + } + } } diff --git a/Test_Numerics/Sampling/Test_PivotalBootstrap.cs b/Test_Numerics/Sampling/Test_PivotalBootstrap.cs new file mode 100644 index 00000000..2bdac82a --- /dev/null +++ b/Test_Numerics/Sampling/Test_PivotalBootstrap.cs @@ -0,0 +1,571 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; +using Numerics; +using Numerics.Data.Statistics; +using Numerics.Distributions; +using Numerics.Functions; +using Numerics.Mathematics.LinearAlgebra; +using Numerics.Mathematics.Optimization; +using Numerics.Sampling; +using System; +using System.Linq; + +namespace Sampling +{ + /// + /// Unit tests for the covariance-aware pivotal bootstrap workflow on . + /// + [TestClass] + public class Test_PivotalBootstrap + { + /// + /// Verifies the pivotal transform uses the replicate covariance to standardize + /// and the parent covariance to re-inflate. + /// + [TestMethod] + public void Transform_IdentityLink_UsesBothCovariances() + { + var parent = Fit(new[] { 10d, 20d }, new double[,] { { 4d, 0d }, { 0d, 9d } }); + var raw = Fit(new[] { 8d, 17d }, new double[,] { { 1d, 0d }, { 0d, 1d } }); + var boot = CreatePivotalBootstrap(parent); + boot.RegularizePivotalCovariances = false; + + boot.TransformPivotalBootstrap(new[] { raw }); + + Assert.HasCount(1, boot.BootstrapParameterSets); + Assert.AreEqual(14d, boot.BootstrapParameterSets[0].Values[0], 1e-12); + Assert.AreEqual(29d, boot.BootstrapParameterSets[0].Values[1], 1e-12); + } + + /// + /// Verifies a full covariance matrix is used in the two-covariance transform. + /// + [TestMethod] + public void Transform_FullCovariance_UsesCholeskyAlgebra() + { + var parent = Fit(new[] { 10d, 20d }, new double[,] { { 4d, 1.2d }, { 1.2d, 9d } }); + var raw = Fit(new[] { 8d, 17d }, new double[,] { { 1d, 0.25d }, { 0.25d, 2d } }); + var boot = CreatePivotalBootstrap(parent); + boot.RegularizePivotalCovariances = false; + + boot.TransformPivotalBootstrap(new[] { raw }); + + double[] expected = ExpectedIdentityPivotalValues(parent, raw); + Assert.AreEqual(expected[0], boot.BootstrapParameterSets[0].Values[0], 1e-12); + Assert.AreEqual(expected[1], boot.BootstrapParameterSets[0].Values[1], 1e-12); + } + + /// + /// Verifies callers have full control over pivotal links through the link factory. + /// + [TestMethod] + public void Transform_CustomLinkFactory_ControlsLinkDefinition() + { + var parent = Fit(new[] { 10d }, new double[,] { { 4d } }); + var raw = Fit(new[] { 8d }, new double[,] { { 1d } }); + var boot = CreatePivotalBootstrap(parent); + bool factoryWasCalled = false; + + boot.RegularizePivotalCovariances = false; + boot.PivotalLinkFactory = context => + { + factoryWasCalled = true; + Assert.AreEqual(1, context.ParameterCount); + CollectionAssert.AreEqual(new[] { 8d }, context.GetRawParameterValues(0)); + return new ILinkFunction[] { new LinearScaleLink(2d) }; + }; + + boot.TransformPivotalBootstrap(new[] { raw }); + + Assert.IsTrue(factoryWasCalled); + Assert.IsInstanceOfType(boot.PivotalLinks[0], typeof(LinearScaleLink)); + Assert.AreEqual(14d, boot.BootstrapParameterSets[0].Values[0], 1e-12); + } + + /// + /// Verifies standard Numerics links can be mixed in a pivotal transformation. + /// + [TestMethod] + public void Transform_LogFisherZAndYeoJohnsonLinks_RetainsFiniteDraws() + { + var parent = Fit( + new[] { 1d, 2d, 0.25d, -0.5d }, + new double[,] + { + { 0.04d, 0d, 0d, 0d }, + { 0d, 0.09d, 0d, 0d }, + { 0d, 0d, 0.01d, 0d }, + { 0d, 0d, 0d, 0.16d } + }); + var rawFits = new[] + { + Fit( + new[] { 0.9d, 1.8d, 0.10d, -0.7d }, + new double[,] + { + { 0.01d, 0d, 0d, 0d }, + { 0d, 0.04d, 0d, 0d }, + { 0d, 0d, 0.0025d, 0d }, + { 0d, 0d, 0d, 0.09d } + }), + Fit( + new[] { 1.1d, 2.1d, 0.35d, -0.3d }, + new double[,] + { + { 0.01d, 0d, 0d, 0d }, + { 0d, 0.04d, 0d, 0d }, + { 0d, 0d, 0.0025d, 0d }, + { 0d, 0d, 0d, 0.09d } + }) + }; + var boot = CreatePivotalBootstrap(parent); + boot.RegularizePivotalCovariances = false; + boot.PivotalLinkFactory = context => new ILinkFunction[] + { + null, + new LogLink(), + new FisherZLink(), + new YeoJohnsonLink(context.GetRawParameterValues(3)) + }; + boot.PivotalParameterValidator = values => values[1] > 0d && values[2] > -1d && values[2] < 1d; + + boot.TransformPivotalBootstrap(rawFits); + + Assert.HasCount(2, boot.BootstrapParameterSets); + Assert.IsNull(boot.PivotalLinks[0]); + Assert.IsInstanceOfType(boot.PivotalLinks[1], typeof(LogLink)); + Assert.IsInstanceOfType(boot.PivotalLinks[2], typeof(FisherZLink)); + Assert.IsInstanceOfType(boot.PivotalLinks[3], typeof(YeoJohnsonLink)); + foreach (double value in boot.BootstrapParameterSets[0].Values) + Assert.IsTrue(Tools.IsFinite(value)); + } + + /// + /// Verifies raw and pivotal confidence intervals are exposed separately after pivotal transformation. + /// + [TestMethod] + public void Transform_WithStatisticFunction_StoresRawAndPivotalStatistics() + { + var parent = Fit(new[] { 10d }, new double[,] { { 4d } }); + var rawFits = new[] + { + Fit(new[] { 8d }, new double[,] { { 1d } }), + Fit(new[] { 9d }, new double[,] { { 1d } }), + Fit(new[] { 10d }, new double[,] { { 1d } }), + Fit(new[] { 11d }, new double[,] { { 1d } }), + Fit(new[] { 12d }, new double[,] { { 1d } }) + }; + var boot = CreatePivotalBootstrap(parent); + boot.RegularizePivotalCovariances = false; + boot.StatisticFunction = ps => new[] { ps.Values[0] * 2d }; + + boot.TransformPivotalBootstrap(rawFits); + BootstrapResults pivotal = boot.GetConfidenceIntervals(BootstrapCIMethod.Percentile, 0.2d); + BootstrapResults raw = boot.GetRawPivotalConfidenceIntervals(0.2d); + + Assert.HasCount(1, pivotal.StatisticResults); + Assert.HasCount(1, raw.StatisticResults); + Assert.AreEqual(20d, pivotal.StatisticResults[0].PopulationEstimate, 1e-12); + Assert.AreEqual(20d, raw.StatisticResults[0].PopulationEstimate, 1e-12); + Assert.IsGreaterThan(pivotal.ParameterResults[0].LowerCI, raw.ParameterResults[0].LowerCI); + } + + /// + /// Verifies pivotal parameter ensembles can be generated without a statistic function. + /// + [TestMethod] + public void Transform_WithoutStatisticFunction_ProducesParameterIntervalsOnly() + { + var parent = Fit(new[] { 10d }, new double[,] { { 4d } }); + var raw = Fit(new[] { 8d }, new double[,] { { 1d } }); + var boot = CreatePivotalBootstrap(parent); + boot.RegularizePivotalCovariances = false; + + boot.TransformPivotalBootstrap(new[] { raw }); + BootstrapResults results = boot.GetConfidenceIntervals(BootstrapCIMethod.Percentile); + + Assert.HasCount(1, results.ParameterResults); + Assert.HasCount(0, results.StatisticResults); + Assert.AreEqual(0, boot.BootstrapStatistics.GetLength(1)); + } + + /// + /// Verifies invalid pivotal draws can be dropped. + /// + [TestMethod] + public void Transform_InvalidDrawPolicyDrop_DropsInvalidPivotalDraws() + { + var parent = Fit(new[] { 10d }, new double[,] { { 4d } }); + var raw = Fit(new[] { 8d }, new double[,] { { 1d } }); + var boot = CreatePivotalBootstrap(parent); + boot.RegularizePivotalCovariances = false; + boot.PivotalParameterValidator = _ => false; + boot.PivotalInvalidDrawPolicy = PivotalBootstrapInvalidDrawPolicy.Drop; + + boot.TransformPivotalBootstrap(new[] { raw }); + + Assert.HasCount(0, boot.BootstrapParameterSets); + Assert.AreEqual(1, boot.PivotalDiagnostics!.InvalidPivotalReplicates); + Assert.AreEqual(0, boot.PivotalDiagnostics.RetainedPivotalReplicates); + } + + /// + /// Verifies invalid pivotal draws can fall back to raw bootstrap fits. + /// + [TestMethod] + public void Transform_InvalidDrawPolicyUseRaw_UsesRawParameters() + { + var parent = Fit(new[] { 10d }, new double[,] { { 4d } }); + var raw = Fit(new[] { 8d }, new double[,] { { 1d } }); + var boot = CreatePivotalBootstrap(parent); + boot.RegularizePivotalCovariances = false; + boot.PivotalParameterValidator = _ => false; + boot.PivotalInvalidDrawPolicy = PivotalBootstrapInvalidDrawPolicy.UseRaw; + + boot.TransformPivotalBootstrap(new[] { raw }); + + Assert.HasCount(1, boot.BootstrapParameterSets); + Assert.AreEqual(8d, boot.BootstrapParameterSets[0].Values[0], 1e-12); + } + + /// + /// Verifies invalid pivotal draws can fall back to the parent fit. + /// + [TestMethod] + public void Transform_InvalidDrawPolicyUseParent_UsesParentParameters() + { + var parent = Fit(new[] { 10d }, new double[,] { { 4d } }); + var raw = Fit(new[] { 8d }, new double[,] { { 1d } }); + var boot = CreatePivotalBootstrap(parent); + boot.RegularizePivotalCovariances = false; + boot.PivotalParameterValidator = _ => false; + boot.PivotalInvalidDrawPolicy = PivotalBootstrapInvalidDrawPolicy.UseParent; + + boot.TransformPivotalBootstrap(new[] { raw }); + + Assert.HasCount(1, boot.BootstrapParameterSets); + Assert.AreEqual(10d, boot.BootstrapParameterSets[0].Values[0], 1e-12); + } + + /// + /// Verifies raw replicate filtering occurs before pivotal transformation and is reported in diagnostics. + /// + [TestMethod] + public void Transform_ReplicateFilter_RejectsRawFitsAndUpdatesDiagnostics() + { + var parent = Fit(new[] { 10d }, new double[,] { { 4d } }); + var rawFits = new[] + { + Fit(new[] { 8d }, new double[,] { { 1d } }), + Fit(new[] { 9d }, new double[,] { { 1d } }), + Fit(new[] { 12d }, new double[,] { { 1d } }) + }; + var boot = CreatePivotalBootstrap(parent); + boot.RegularizePivotalCovariances = false; + boot.PivotalReplicateFilter = fit => fit.Parameters.Values[0] >= 9d; + + boot.TransformPivotalBootstrap(rawFits); + + Assert.HasCount(2, boot.RawBootstrapParameterSets); + Assert.HasCount(2, boot.BootstrapParameterSets); + Assert.AreEqual(1, boot.PivotalDiagnostics!.RejectedRawReplicates); + } + + /// + /// Verifies the pivotal run method uses the covariance-aware fit delegate, not the regular fit delegate. + /// + [TestMethod] + public void RunPivotalBootstrap_IgnoresRegularFitFunction() + { + var parent = Fit(new[] { 10d }, new double[,] { { 4d } }); + var samples = new[] { 8d, 9d, 10d, 11d }; + var boot = CreatePivotalBootstrap(parent); + boot.RegularizePivotalCovariances = false; + boot.Replicates = samples.Length; + boot.MaxRetries = 1; + boot.ResampleFunction = (data, ps, rng) => data; + boot.FitFunction = _ => throw new InvalidOperationException("Regular fit should not be used by pivotal bootstrap."); + int index = -1; + boot.FitWithCovarianceFunction = _ => + { + int next = System.Threading.Interlocked.Increment(ref index); + return Fit(new[] { samples[next] }, new double[,] { { 1d } }); + }; + + boot.RunPivotalBootstrap(); + + Assert.HasCount(samples.Length, boot.RawBootstrapFits); + Assert.HasCount(samples.Length, boot.BootstrapParameterSets); + } + + /// + /// Verifies regular bootstrap ignores pivotal-only properties and delegates. + /// + [TestMethod] + public void Run_RegularBootstrap_IgnoresPivotalOnlyProperties() + { + var boot = new Bootstrap(new[] { 1d }, new ParameterSet(new[] { 10d }, double.NaN)); + boot.Replicates = 3; + boot.MaxRetries = 1; + boot.ResampleFunction = (data, ps, rng) => data; + boot.FitFunction = _ => new ParameterSet(new[] { 11d }, double.NaN); + boot.StatisticFunction = ps => new[] { ps.Values[0] }; + boot.FitWithCovarianceFunction = _ => throw new InvalidOperationException("Pivotal fit should not be used by regular bootstrap."); + boot.PivotalLinkFactory = _ => throw new InvalidOperationException("Pivotal links should not be used by regular bootstrap."); + boot.PivotalReplicateFilter = _ => throw new InvalidOperationException("Pivotal filters should not be used by regular bootstrap."); + boot.OriginalCovariance = new Matrix(new[,] { { 1d } }); + + boot.Run(); + + Assert.HasCount(3, boot.BootstrapParameterSets); + Assert.HasCount(0, boot.RawBootstrapParameterSets); + } + + /// + /// Verifies missing covariance-aware fit delegate produces a clear failure. + /// + [TestMethod] + public void RunPivotalBootstrap_WithoutFitWithCovarianceFunction_Throws() + { + var parent = Fit(new[] { 10d }, new double[,] { { 4d } }); + var boot = CreatePivotalBootstrap(parent); + boot.ResampleFunction = (data, ps, rng) => data; + boot.FitWithCovarianceFunction = null; + + var exception = Assert.Throws(() => boot.RunPivotalBootstrap()); + StringAssert.Contains(exception.Message, nameof(Bootstrap.FitWithCovarianceFunction)); + } + + /// + /// Verifies missing original covariance produces a clear failure. + /// + [TestMethod] + public void RunPivotalBootstrap_WithoutOriginalCovariance_Throws() + { + var boot = new Bootstrap(new[] { 1d }, new ParameterSet(new[] { 10d }, double.NaN)); + boot.ResampleFunction = (data, ps, rng) => data; + boot.FitWithCovarianceFunction = _ => Fit(new[] { 10d }, new double[,] { { 1d } }); + + var exception = Assert.Throws(() => boot.RunPivotalBootstrap()); + StringAssert.Contains(exception.Message, nameof(Bootstrap.OriginalCovariance)); + } + + /// + /// Verifies incompatible confidence interval methods are rejected after a pivotal run. + /// + [TestMethod] + public void GetConfidenceIntervals_BCaAfterPivotalRun_Throws() + { + var parent = Fit(new[] { 10d }, new double[,] { { 4d } }); + var raw = Fit(new[] { 8d }, new double[,] { { 1d } }); + var boot = CreatePivotalBootstrap(parent); + boot.RegularizePivotalCovariances = false; + boot.TransformPivotalBootstrap(new[] { raw }); + + var exception = Assert.Throws(() => boot.GetConfidenceIntervals(BootstrapCIMethod.BCa)); + StringAssert.Contains(exception.Message, "Only percentile"); + } + + /// + /// Verifies raw pivotal confidence intervals require a pivotal run first. + /// + [TestMethod] + public void GetRawPivotalConfidenceIntervals_BeforePivotalRun_Throws() + { + var parent = Fit(new[] { 10d }, new double[,] { { 4d } }); + var boot = CreatePivotalBootstrap(parent); + + Assert.Throws(() => boot.GetRawPivotalConfidenceIntervals()); + } + + /// + /// Verifies repeated pivotal runs with the same seed retain reproducibility. + /// + [TestMethod] + public void RunPivotalBootstrap_WithSameSeed_IsReproducible() + { + Bootstrap first = CreateSeededNormalPivotalBootstrap(); + Bootstrap second = CreateSeededNormalPivotalBootstrap(); + + first.RunPivotalBootstrap(); + second.RunPivotalBootstrap(); + + Assert.HasCount(first.BootstrapParameterSets.Length, second.BootstrapParameterSets); + for (int i = 0; i < first.BootstrapParameterSets.Length; i++) + { + Assert.AreEqual(first.BootstrapParameterSets[i].Values[0], second.BootstrapParameterSets[i].Values[0], 1e-12); + Assert.AreEqual(first.BootstrapParameterSets[i].Values[1], second.BootstrapParameterSets[i].Values[1], 1e-12); + } + } + + /// + /// Verifies the normal location-scale pivotal bootstrap produces plausible objective-Bayes-style quantile intervals. + /// + [TestMethod] + public void Run_NormalLocationScale_MatchesObjectiveBayesQuantileIntervals() + { + const int sampleSize = 100; + const int replicates = 2500; + var parentDistribution = new Normal(3d, 0.7d); + var parent = new BootstrapFit( + new ParameterSet(parentDistribution.GetParameters, double.NaN), + new Matrix(parentDistribution.ParameterCovariance(sampleSize, ParameterEstimationMethod.MaximumLikelihood))); + double[] originalData = new double[sampleSize]; + double[] probabilities = { 0.5d, 0.95d }; + var boot = new Bootstrap(originalData, parent); + + boot.Replicates = replicates; + boot.PRNGSeed = 12345; + boot.ResampleFunction = (data, ps, rng) => + { + var distribution = new Normal(ps.Values[0], ps.Values[1]); + return distribution.GenerateRandomValues(sampleSize, rng.Next()); + }; + boot.FitWithCovarianceFunction = FitNormal; + boot.PivotalLinkFactory = _ => new ILinkFunction[] { new IdentityLink(), new LogLink() }; + boot.PivotalParameterValidator = values => values[1] > 0d; + boot.StatisticFunction = ps => + { + var distribution = new Normal(ps.Values[0], ps.Values[1]); + return probabilities.Select(distribution.InverseCDF).ToArray(); + }; + + boot.RunPivotalBootstrap(); + BootstrapResults quantileIntervals = boot.GetConfidenceIntervals(BootstrapCIMethod.Percentile, 0.1d); + double[,] objectiveBayesIntervals = parentDistribution.MonteCarloConfidenceIntervals( + sampleSize, + 12000, + probabilities, + new[] { 0.05d, 0.95d }); + + Assert.IsGreaterThan(0.98d * replicates, boot.BootstrapParameterSets.Length); + for (int i = 0; i < probabilities.Length; i++) + { + Assert.AreEqual(objectiveBayesIntervals[i, 0], quantileIntervals.StatisticResults[i].LowerCI, 0.12d * parentDistribution.Sigma); + Assert.AreEqual(objectiveBayesIntervals[i, 1], quantileIntervals.StatisticResults[i].UpperCI, 0.12d * parentDistribution.Sigma); + } + } + + /// + /// Creates a covariance-aware bootstrap configured with a fixed parent fit. + /// + /// The parent fit. + /// A bootstrap configured for pivotal transformations. + private static Bootstrap CreatePivotalBootstrap(BootstrapFit parent) + { + return new Bootstrap(Array.Empty(), parent); + } + + /// + /// Creates a covariance-aware fit from parameter values and covariance entries. + /// + /// The fitted parameter values. + /// The covariance matrix. + /// The covariance-aware fit. + private static BootstrapFit Fit(double[] values, double[,] covariance) + { + return new BootstrapFit(new ParameterSet(values, double.NaN), new Matrix(covariance)); + } + + /// + /// Fits a normal distribution by maximum likelihood and returns parameter covariance. + /// + /// The sample to fit. + /// The covariance-aware normal fit. + private static BootstrapFit FitNormal(double[] sample) + { + var distribution = new Normal(); + ((IEstimation)distribution).Estimate(sample, ParameterEstimationMethod.MaximumLikelihood); + if (!distribution.ParametersValid) + throw new InvalidOperationException("The normal fit produced invalid parameters."); + + return new BootstrapFit( + new ParameterSet(distribution.GetParameters, double.NaN), + new Matrix(distribution.ParameterCovariance(sample.Length, ParameterEstimationMethod.MaximumLikelihood))); + } + + /// + /// Creates a reproducible normal location-scale pivotal bootstrap for reproducibility checks. + /// + /// A configured pivotal bootstrap instance. + private static Bootstrap CreateSeededNormalPivotalBootstrap() + { + const int sampleSize = 25; + var distribution = new Normal(3d, 0.7d); + var parent = new BootstrapFit( + new ParameterSet(distribution.GetParameters, double.NaN), + new Matrix(distribution.ParameterCovariance(sampleSize, ParameterEstimationMethod.MaximumLikelihood))); + var boot = new Bootstrap(new double[sampleSize], parent); + boot.Replicates = 50; + boot.PRNGSeed = 8675309; + boot.ResampleFunction = (data, ps, rng) => new Normal(ps.Values[0], ps.Values[1]).GenerateRandomValues(sampleSize, rng.Next()); + boot.FitWithCovarianceFunction = FitNormal; + boot.PivotalLinkFactory = _ => new ILinkFunction[] { null, new LogLink() }; + boot.PivotalParameterValidator = values => values[1] > 0d; + return boot; + } + + /// + /// Computes the expected identity-link pivotal value for a single raw fit. + /// + /// The parent fit. + /// The raw bootstrap fit. + /// The expected pivotal parameter values. + private static double[] ExpectedIdentityPivotalValues(BootstrapFit parent, BootstrapFit raw) + { + var parentCholesky = new CholeskyDecomposition(parent.Covariance); + var rawCholesky = new CholeskyDecomposition(raw.Covariance); + var difference = new double[parent.ParameterCount]; + for (int i = 0; i < difference.Length; i++) + difference[i] = parent.Parameters.Values[i] - raw.Parameters.Values[i]; + + double[] z = rawCholesky.Forward(new Vector(difference)).ToArray(); + double[] reinflated = parentCholesky.L * z; + var expected = new double[difference.Length]; + for (int i = 0; i < expected.Length; i++) + expected[i] = parent.Parameters.Values[i] + reinflated[i]; + return expected; + } + + /// + /// A linear scaling link used to verify the pivotal link factory accepts custom instances. + /// + [Serializable] + private sealed class LinearScaleLink : ILinkFunction + { + private readonly double _scale; + + /// + /// Initializes a new instance of the class. + /// + /// The nonzero linear scale. + public LinearScaleLink(double scale) + { + _scale = scale; + } + + /// + public double Link(double x) + { + return _scale * x; + } + + /// + public double InverseLink(double eta) + { + return eta / _scale; + } + + /// + public double DLink(double x) + { + return _scale; + } + + /// + public System.Xml.Linq.XElement ToXElement() + { + return new System.Xml.Linq.XElement(nameof(LinearScaleLink)); + } + } + } +} diff --git a/Test_Numerics/Utilities/Test_Tools.cs b/Test_Numerics/Utilities/Test_Tools.cs index 110f9315..1287b891 100644 --- a/Test_Numerics/Utilities/Test_Tools.cs +++ b/Test_Numerics/Utilities/Test_Tools.cs @@ -354,6 +354,36 @@ public void Test_LogSumExp() Assert.AreEqual(1000.70815,result2, 1E-04); } + /// + /// Testing max helpers when every candidate is negative infinity. + /// + [TestMethod] + public void Test_MaxHelpers_AllNegativeInfinity() + { + List values = new List { double.NegativeInfinity, double.NegativeInfinity }; + List indicators = new List { 1, 1 }; + + Tools.MinMax(values, out double min, out double max); + + Assert.AreEqual(double.NegativeInfinity, min); + Assert.AreEqual(double.NegativeInfinity, max); + Assert.AreEqual(0, Tools.ArgMax(values)); + Assert.AreEqual(double.NegativeInfinity, Tools.Max(values)); + Assert.AreEqual(double.NegativeInfinity, Tools.Max(values, indicators)); + } + + /// + /// Testing log-sum-exponential when every log input is negative infinity. + /// + [TestMethod] + public void Test_LogSumExp_AllNegativeInfinity() + { + List values = new List { double.NegativeInfinity, double.NegativeInfinity }; + + Assert.AreEqual(double.NegativeInfinity, Tools.LogSumExp(double.NegativeInfinity, double.NegativeInfinity)); + Assert.AreEqual(double.NegativeInfinity, Tools.LogSumExp(values)); + } + /// /// Testing integer sequence. /// diff --git a/docs/data/time-series.md b/docs/data/time-series.md index 62296722..1948eeb2 100644 --- a/docs/data/time-series.md +++ b/docs/data/time-series.md @@ -393,7 +393,7 @@ This prevents unreliable interpolation across long gaps while filling short data ### Block Bootstrap -The **block bootstrap** preserves temporal dependence by resampling contiguous blocks rather than individual observations. Given a block size $b$, the method randomly selects blocks of $b$ consecutive values (with replacement) and concatenates them: +The **block bootstrap** (Künsch 1989) preserves temporal dependence by resampling contiguous blocks rather than individual observations. Given a block size $b$, every contiguous run of $b$ consecutive values in the input is enumerated, blocks are drawn uniformly at random with replacement, and concatenated until the requested number of time steps is reached: ```cs // Generate a 1000-step synthetic series preserving 30-day temporal structure @@ -401,11 +401,11 @@ var resampled = ts.ResampleWithBlockBootstrap( timeSteps: 1000, blockSize: 30, seed: 42); ``` -Unlike the standard bootstrap (which destroys autocorrelation), the block bootstrap retains the short-range dependence structure within each block. +The block bootstrap preserves the marginal mean and variance and the within-block autocorrelation structure of the input series. Discontinuities can appear at block boundaries. ### k-Nearest Neighbors -The **k-nearest neighbors** (k-NN) resampling method generates synthetic time series that preserve the multivariate dependence structure. At each step, it finds the $k$ nearest neighbors of the current state in the historical record (using Euclidean distance on standardized values) and randomly selects one as the next value: +The **k-nearest neighbors** (k-NN) bootstrap of Lall & Sharma (1996) is a *conditional* resampler: at each step it draws the next synthetic value from the empirical conditional distribution $p(x_{t+1} \mid x_t)$. The current state $x_t$ is standardized, the $k$ historical observations whose standardized values are closest in Euclidean distance are identified, one of those neighbor indices $j$ is selected uniformly at random, and the trajectory advances by taking the value that *historically came after* the chosen neighbor — i.e., $x_{j+1}$: ```cs // Generate synthetic series using 5-nearest neighbors @@ -413,6 +413,13 @@ var synthetic = ts.ResampleWithKNN( timeSteps: 500, k: 5, seed: 42); ``` +The implementation is **univariate** (embedding dimension $d = 1$); higher-dimensional embedding and joint multivariate resampling are not currently exposed. Lall & Sharma suggest $k = \lfloor \sqrt{n} \rfloor$ as a reasonable default. The series must contain at least 11 observations. + +**References** + +- Künsch, H.R. (1989). The jackknife and the bootstrap for general stationary observations. *Annals of Statistics* **17**(3), 1217–1241. +- Lall, U., Sharma, A. (1996). A nearest neighbor bootstrap for resampling hydrologic time series. *Water Resources Research* **32**(3), 679–693. + ## Sorting and Filtering ### Sorting diff --git a/paper/paper.bib b/paper/paper.bib index b3dcfaca..7226cb62 100644 --- a/paper/paper.bib +++ b/paper/paper.bib @@ -89,7 +89,8 @@ @incollection{neal2011 publisher = {CRC Press}, pages = {113--162}, year = {2011}, - chapter = {5} + chapter = {5}, + doi = {10.1201/b10905-6} } @article{vehtari2021, @@ -108,7 +109,8 @@ @book{efron1993 title = {An Introduction to the Bootstrap}, publisher = {Chapman \& Hall}, year = {1993}, - isbn = {978-0412042317} + isbn = {978-0412042317}, + doi = {10.1201/9780429246593} } @article{storn1997, @@ -167,7 +169,8 @@ @manual{hosking2019lmom title = {lmom: {L}-moments}, year = {2019}, note = {R package version 2.8}, - url = {https://CRAN.R-project.org/package=lmom} + url = {https://CRAN.R-project.org/package=lmom}, + doi = {10.32614/cran.package.lmom} } @article{stephenson2002evd, @@ -178,7 +181,8 @@ @article{stephenson2002evd number = {2}, pages = {31--32}, year = {2002}, - url = {https://CRAN.R-project.org/package=evd} + url = {https://CRAN.R-project.org/package=evd}, + doi = {10.32614/cran.package.evd} } @book{press2007, @@ -195,7 +199,8 @@ @book{rao2000 title = {Flood Frequency Analysis}, publisher = {CRC Press}, year = {2000}, - isbn = {978-0849300837} + isbn = {978-0849300837}, + doi = {10.1201/9780429128813} } @manual{viglione2024nsrfa, @@ -203,7 +208,8 @@ @manual{viglione2024nsrfa title = {nsRFA: Non-supervised Regional Frequency Analysis}, year = {2024}, note = {R package version 0.7-17}, - url = {https://CRAN.R-project.org/package=nsRFA} + url = {https://CRAN.R-project.org/package=nsRFA}, + doi = {10.32614/cran.package.nsrfa} } @manual{ribatet2022spatialextremes, @@ -211,7 +217,8 @@ @manual{ribatet2022spatialextremes title = {SpatialExtremes: Modelling Spatial Extremes}, year = {2022}, note = {R package version 2.1-0}, - url = {https://CRAN.R-project.org/package=SpatialExtremes} + url = {https://CRAN.R-project.org/package=SpatialExtremes}, + doi = {10.32614/cran.package.SpatialExtremes} } @article{rinnooy1987,