From e2c0032b7e10e7d2d33ecb124cf4531affe6d5dc Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Wed, 24 Jun 2026 11:15:47 -0500 Subject: [PATCH 1/2] feat(waterdata): add get_queryables + a live queryables monitor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add `waterdata.get_queryables(collection)`, returning the OGC queryable properties of a Water Data collection (`daily`, `continuous`, `monitoring-locations`, ...) as a tidy `(DataFrame, BaseMetadata)` — one row per filterable property with its type, title, and description. Add `tests/waterdata_queryables_test.py`: offline parsing / error tests plus a live monitor that compares each collection's advertised queryables against a committed snapshot (`tests/data/waterdata_queryables.json`). The monitor fails when the upstream API adds / removes / renames a queryable — the signal to regenerate the snapshot and enable any new queryables on the matching getter. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01Sjb14HkwuCydKSKMsaXsgd --- dataretrieval/waterdata/__init__.py | 2 + dataretrieval/waterdata/api.py | 65 ++++ tests/data/waterdata_queryables.json | 513 +++++++++++++++++++++++++++ tests/waterdata_queryables_test.py | 118 ++++++ 4 files changed, 698 insertions(+) create mode 100644 tests/data/waterdata_queryables.json create mode 100644 tests/waterdata_queryables_test.py diff --git a/dataretrieval/waterdata/__init__.py b/dataretrieval/waterdata/__init__.py index 7d3fce45..99b6e178 100644 --- a/dataretrieval/waterdata/__init__.py +++ b/dataretrieval/waterdata/__init__.py @@ -25,6 +25,7 @@ get_latest_daily, get_monitoring_locations, get_peaks, + get_queryables, get_reference_table, get_samples, get_samples_summary, @@ -62,6 +63,7 @@ "get_monitoring_locations", "get_nearest_continuous", "get_peaks", + "get_queryables", "get_ratings", "get_reference_table", "get_samples", diff --git a/dataretrieval/waterdata/api.py b/dataretrieval/waterdata/api.py index e79d08f7..91448128 100644 --- a/dataretrieval/waterdata/api.py +++ b/dataretrieval/waterdata/api.py @@ -16,6 +16,7 @@ import httpx import pandas as pd +from dataretrieval.ogc.engine import OGC_API_URL from dataretrieval.ogc.filters import FILTER_LANG from dataretrieval.utils import ( HTTPX_DEFAULTS, @@ -2198,6 +2199,70 @@ def get_reference_table( ) +def get_queryables(collection: str) -> tuple[pd.DataFrame, BaseMetadata]: + """List the queryable properties of a Water Data API collection. + + Every OGC collection (``daily``, ``continuous``, ``monitoring-locations``, + ...) advertises the set of properties that can be filtered on -- exposed as + the typed keyword arguments of the matching ``get_*`` function, and usable + directly in a CQL2 ``filter``. This returns that set, so the available + filters can be discovered programmatically and monitored for upstream + additions. + + Parameters + ---------- + collection : string + The collection id, e.g. ``"daily"``, ``"continuous"``, + ``"monitoring-locations"``, or ``"time-series-metadata"``. See + :data:`dataretrieval.waterdata.types.WATERDATA_SERVICES` for the data + collections; reference collections (e.g. ``"parameter-codes"``) work + too. + + Returns + ------- + df : ``pandas.DataFrame`` + One row per queryable, sorted by name, with columns ``queryable`` (the + property name), ``type``, ``title``, and ``description``. + md : :class:`dataretrieval.utils.BaseMetadata` + Metadata describing the request (URL, query time, response headers). + + Raises + ------ + DataRetrievalError + On an HTTP error response (e.g. an unknown ``collection`` yields a 404), + the typed subclass for the status. + + Examples + -------- + .. doctest:: + :skipif: True # network + + >>> from dataretrieval import waterdata + >>> df, md = waterdata.get_queryables("daily") + >>> df.set_index("queryable").loc["state_name", "type"] + 'string' + """ + url = f"{OGC_API_URL}/collections/{collection}/queryables" + response = _get(url, headers=_default_headers(), **HTTPX_DEFAULTS) + _raise_for_non_200(response) + # The OGC queryables document is a JSON Schema whose ``properties`` map each + # filterable property name to a ``{title, type, description}`` definition. + properties: dict[str, Any] = response.json().get("properties", {}) + df = pd.DataFrame( + [ + { + "queryable": name, + "type": prop.get("type"), + "title": prop.get("title"), + "description": (prop.get("description") or "").strip(), + } + for name, prop in sorted(properties.items()) + ], + columns=["queryable", "type", "title", "description"], + ) + return df, BaseMetadata(response) + + def get_codes(code_service: CODE_SERVICES) -> tuple[pd.DataFrame, BaseMetadata]: """Return codes from a Samples code service. diff --git a/tests/data/waterdata_queryables.json b/tests/data/waterdata_queryables.json new file mode 100644 index 00000000..a9d9862c --- /dev/null +++ b/tests/data/waterdata_queryables.json @@ -0,0 +1,513 @@ +{ + "channel-measurements": [ + "channel_area", + "channel_area_unit", + "channel_evenness", + "channel_flow", + "channel_flow_unit", + "channel_location_direction", + "channel_location_distance", + "channel_location_distance_unit", + "channel_material", + "channel_measurement_type", + "channel_name", + "channel_stability", + "channel_velocity", + "channel_velocity_unit", + "channel_width", + "channel_width_unit", + "field_visit_id", + "geometry", + "horizontal_velocity_description", + "id", + "last_modified", + "longitudinal_velocity_description", + "measurement_number", + "measurement_type", + "monitoring_location_id", + "time", + "vertical_velocity_description" + ], + "combined-metadata": [ + "agency_code", + "agency_name", + "altitude", + "altitude_accuracy", + "altitude_method_code", + "altitude_method_name", + "aquifer_code", + "aquifer_type_code", + "basin_code", + "begin", + "computation_identifier", + "construction_date", + "contributing_drainage_area", + "country_code", + "country_name", + "county_code", + "county_name", + "data_gap_interval", + "data_type", + "depth_source_code", + "district_code", + "drainage_area", + "end", + "geometry", + "hole_constructed_depth", + "horizontal_position_method_code", + "horizontal_position_method_name", + "horizontal_positional_accuracy", + "horizontal_positional_accuracy_code", + "hydrologic_unit_code", + "id", + "last_modified", + "minor_civil_division_code", + "monitoring_location_id", + "monitoring_location_name", + "monitoring_location_number", + "national_aquifer_code", + "original_horizontal_datum", + "original_horizontal_datum_name", + "parameter_code", + "parameter_description", + "parameter_name", + "parent_time_series_id", + "primary", + "reading_type", + "site_type", + "site_type_code", + "state_code", + "state_name", + "statistic_id", + "sublocation_identifier", + "thresholds", + "time_zone_abbreviation", + "unit_of_measure", + "uses_daylight_savings", + "vertical_datum", + "vertical_datum_name", + "web_description", + "well_constructed_depth" + ], + "continuous": [ + "agency_code", + "agency_name", + "altitude", + "altitude_accuracy", + "altitude_method_code", + "altitude_method_name", + "approval_status", + "aquifer_code", + "aquifer_type_code", + "basin_code", + "construction_date", + "contributing_drainage_area", + "country_code", + "country_name", + "county_code", + "county_name", + "data_gap_interval", + "depth_source_code", + "district_code", + "drainage_area", + "geometry", + "hole_constructed_depth", + "horizontal_position_method_code", + "horizontal_position_method_name", + "horizontal_positional_accuracy", + "horizontal_positional_accuracy_code", + "hydrologic_unit_code", + "id", + "last_modified", + "minor_civil_division_code", + "monitoring_location_id", + "monitoring_location_name", + "monitoring_location_number", + "national_aquifer_code", + "original_horizontal_datum", + "original_horizontal_datum_name", + "parameter_code", + "qualifier", + "site_type", + "site_type_code", + "state_code", + "state_name", + "statistic_id", + "time", + "time_series_id", + "time_zone_abbreviation", + "unit_of_measure", + "uses_daylight_savings", + "value", + "vertical_datum", + "vertical_datum_name", + "well_constructed_depth" + ], + "daily": [ + "agency_code", + "agency_name", + "altitude", + "altitude_accuracy", + "altitude_method_code", + "altitude_method_name", + "approval_status", + "aquifer_code", + "aquifer_type_code", + "basin_code", + "construction_date", + "contributing_drainage_area", + "country_code", + "country_name", + "county_code", + "county_name", + "data_gap_interval", + "depth_source_code", + "district_code", + "drainage_area", + "geometry", + "hole_constructed_depth", + "horizontal_position_method_code", + "horizontal_position_method_name", + "horizontal_positional_accuracy", + "horizontal_positional_accuracy_code", + "hydrologic_unit_code", + "id", + "last_modified", + "minor_civil_division_code", + "monitoring_location_id", + "monitoring_location_name", + "monitoring_location_number", + "national_aquifer_code", + "original_horizontal_datum", + "original_horizontal_datum_name", + "parameter_code", + "qualifier", + "site_type", + "site_type_code", + "state_code", + "state_name", + "statistic_id", + "time", + "time_series_id", + "time_zone_abbreviation", + "unit_of_measure", + "uses_daylight_savings", + "value", + "vertical_datum", + "vertical_datum_name", + "well_constructed_depth" + ], + "field-measurements": [ + "agency_code", + "agency_name", + "altitude", + "altitude_accuracy", + "altitude_method_code", + "altitude_method_name", + "approval_status", + "aquifer_code", + "aquifer_type_code", + "basin_code", + "construction_date", + "contributing_drainage_area", + "control_condition", + "country_code", + "country_name", + "county_code", + "county_name", + "day", + "depth_source_code", + "district_code", + "drainage_area", + "field_measurements_series_id", + "field_visit_id", + "geometry", + "hole_constructed_depth", + "horizontal_position_method_code", + "horizontal_position_method_name", + "horizontal_positional_accuracy", + "horizontal_positional_accuracy_code", + "hydrologic_unit_code", + "id", + "last_modified", + "measurement_rated", + "measuring_agency", + "minor_civil_division_code", + "monitoring_location_id", + "monitoring_location_name", + "monitoring_location_number", + "month", + "national_aquifer_code", + "observing_procedure", + "observing_procedure_code", + "original_horizontal_datum", + "original_horizontal_datum_name", + "parameter_code", + "qualifier", + "reading_type", + "site_type", + "site_type_code", + "state_code", + "state_name", + "time", + "time_of_day", + "time_zone_abbreviation", + "unit_of_measure", + "uses_daylight_savings", + "value", + "vertical_datum", + "vertical_datum_name", + "vertical_datum_site", + "well_constructed_depth", + "year" + ], + "field-measurements-metadata": [ + "begin", + "end", + "geometry", + "id", + "last_modified", + "monitoring_location_id", + "parameter_code", + "parameter_description", + "parameter_name", + "reading_type" + ], + "latest-continuous": [ + "agency_code", + "agency_name", + "altitude", + "altitude_accuracy", + "altitude_method_code", + "altitude_method_name", + "approval_status", + "aquifer_code", + "aquifer_type_code", + "basin_code", + "construction_date", + "contributing_drainage_area", + "country_code", + "country_name", + "county_code", + "county_name", + "data_gap_interval", + "depth_source_code", + "district_code", + "drainage_area", + "geometry", + "hole_constructed_depth", + "horizontal_position_method_code", + "horizontal_position_method_name", + "horizontal_positional_accuracy", + "horizontal_positional_accuracy_code", + "hydrologic_unit_code", + "id", + "last_modified", + "minor_civil_division_code", + "monitoring_location_id", + "monitoring_location_name", + "monitoring_location_number", + "national_aquifer_code", + "original_horizontal_datum", + "original_horizontal_datum_name", + "parameter_code", + "qualifier", + "site_type", + "site_type_code", + "state_code", + "state_name", + "statistic_id", + "time", + "time_series_id", + "time_zone_abbreviation", + "unit_of_measure", + "uses_daylight_savings", + "value", + "vertical_datum", + "vertical_datum_name", + "well_constructed_depth" + ], + "latest-daily": [ + "agency_code", + "agency_name", + "altitude", + "altitude_accuracy", + "altitude_method_code", + "altitude_method_name", + "approval_status", + "aquifer_code", + "aquifer_type_code", + "basin_code", + "construction_date", + "contributing_drainage_area", + "country_code", + "country_name", + "county_code", + "county_name", + "data_gap_interval", + "depth_source_code", + "district_code", + "drainage_area", + "geometry", + "hole_constructed_depth", + "horizontal_position_method_code", + "horizontal_position_method_name", + "horizontal_positional_accuracy", + "horizontal_positional_accuracy_code", + "hydrologic_unit_code", + "id", + "last_modified", + "minor_civil_division_code", + "monitoring_location_id", + "monitoring_location_name", + "monitoring_location_number", + "national_aquifer_code", + "original_horizontal_datum", + "original_horizontal_datum_name", + "parameter_code", + "qualifier", + "site_type", + "site_type_code", + "state_code", + "state_name", + "statistic_id", + "time", + "time_series_id", + "time_zone_abbreviation", + "unit_of_measure", + "uses_daylight_savings", + "value", + "vertical_datum", + "vertical_datum_name", + "well_constructed_depth" + ], + "monitoring-locations": [ + "agency_code", + "agency_name", + "altitude", + "altitude_accuracy", + "altitude_method_code", + "altitude_method_name", + "aquifer_code", + "aquifer_type_code", + "basin_code", + "construction_date", + "contributing_drainage_area", + "country_code", + "country_name", + "county_code", + "county_name", + "depth_source_code", + "district_code", + "drainage_area", + "geometry", + "hole_constructed_depth", + "horizontal_position_method_code", + "horizontal_position_method_name", + "horizontal_positional_accuracy", + "horizontal_positional_accuracy_code", + "hydrologic_unit_code", + "id", + "minor_civil_division_code", + "monitoring_location_name", + "monitoring_location_number", + "national_aquifer_code", + "original_horizontal_datum", + "original_horizontal_datum_name", + "revision_created", + "revision_modified", + "revision_note", + "site_type", + "site_type_code", + "state_code", + "state_name", + "time_zone_abbreviation", + "uses_daylight_savings", + "vertical_datum", + "vertical_datum_name", + "well_constructed_depth" + ], + "peaks": [ + "agency_code", + "agency_name", + "altitude", + "altitude_accuracy", + "altitude_method_code", + "altitude_method_name", + "aquifer_code", + "aquifer_type_code", + "basin_code", + "construction_date", + "contributing_drainage_area", + "country_code", + "country_name", + "county_code", + "county_name", + "data_gap_interval", + "day", + "depth_source_code", + "district_code", + "drainage_area", + "geometry", + "hole_constructed_depth", + "horizontal_position_method_code", + "horizontal_position_method_name", + "horizontal_positional_accuracy", + "horizontal_positional_accuracy_code", + "hydrologic_unit_code", + "id", + "last_modified", + "minor_civil_division_code", + "monitoring_location_id", + "monitoring_location_name", + "monitoring_location_number", + "month", + "national_aquifer_code", + "original_horizontal_datum", + "original_horizontal_datum_name", + "parameter_code", + "peak_since", + "qualifier", + "site_type", + "site_type_code", + "state_code", + "state_name", + "time", + "time_of_day", + "time_series_id", + "time_zone_abbreviation", + "unit_of_measure", + "uses_daylight_savings", + "value", + "vertical_datum", + "vertical_datum_name", + "water_year", + "well_constructed_depth", + "year" + ], + "time-series-metadata": [ + "begin", + "begin_utc", + "computation_identifier", + "computation_period_identifier", + "data_gap_interval", + "end", + "end_utc", + "geometry", + "hydrologic_unit_code", + "id", + "last_modified", + "monitoring_location_id", + "parameter_code", + "parameter_description", + "parameter_name", + "parent_time_series_id", + "primary", + "state_name", + "statistic_id", + "sublocation_identifier", + "thresholds", + "unit_of_measure", + "web_description" + ] +} diff --git a/tests/waterdata_queryables_test.py b/tests/waterdata_queryables_test.py new file mode 100644 index 00000000..fde3ab4d --- /dev/null +++ b/tests/waterdata_queryables_test.py @@ -0,0 +1,118 @@ +"""Tests for :func:`dataretrieval.waterdata.get_queryables`, plus a live monitor +that flags upstream changes to the Water Data API's queryable sets. + +The live monitor (:func:`test_queryables_match_snapshot`) compares the +queryables each collection advertises against a committed snapshot +(``tests/data/waterdata_queryables.json``). When it fails, the upstream API has +added / removed / renamed a queryable: regenerate the snapshot and enable any +new queryables on the matching getter. Regenerate with:: + + python - <<'PY' + import httpx, json + from typing import get_args + from dataretrieval.waterdata.types import WATERDATA_SERVICES + base = "https://api.waterdata.usgs.gov/ogcapi/v0" + snap = {} + for c in get_args(WATERDATA_SERVICES): + r = httpx.get(f"{base}/collections/{c}/queryables", timeout=30) + r.raise_for_status() + snap[c] = sorted(r.json().get("properties", {})) + json.dump(snap, open("tests/data/waterdata_queryables.json", "w"), + indent=2, sort_keys=True) + open("tests/data/waterdata_queryables.json", "a").write("\\n") + PY +""" + +import json +import re +from pathlib import Path + +import pytest + +import dataretrieval +from dataretrieval import waterdata +from dataretrieval.utils import BaseMetadata +from tests.conftest import flaky_api + +# The OGC queryables endpoint for any Water Data collection. +QUERYABLES_RE = re.compile( + r"^https://api\.waterdata\.usgs\.gov/ogcapi/v0/collections/[^/]+/queryables$" +) + +# A minimal queryables document (the JSON Schema shape the real endpoint returns). +_FAKE_QUERYABLES = { + "type": "object", + "title": "Daily", + "$schema": "https://json-schema.org/draft/2019-09/schema", + "properties": { + "state_name": { + "title": "State name", + "type": "string", + "description": "The name of the state.\n", + }, + "parameter_code": { + "title": "Parameter code", + "type": "string", + "description": "5-digit codes.\n", + }, + }, +} + +_SNAPSHOT_PATH = Path(__file__).parent / "data" / "waterdata_queryables.json" +_SNAPSHOT = json.loads(_SNAPSHOT_PATH.read_text()) + + +# --- get_queryables unit tests (mocked) ------------------------------------ + + +def test_get_queryables_parses_properties(httpx_mock): + """Properties become one tidy row each, sorted by name, with the + description whitespace-stripped; returns ``(DataFrame, BaseMetadata)``.""" + httpx_mock.add_response(method="GET", url=QUERYABLES_RE, json=_FAKE_QUERYABLES) + + df, md = waterdata.get_queryables("daily") + + assert isinstance(md, BaseMetadata) + assert list(df.columns) == ["queryable", "type", "title", "description"] + # Sorted by name (parameter_code before state_name). + assert df["queryable"].tolist() == ["parameter_code", "state_name"] + row = df.set_index("queryable").loc["state_name"] + assert row["type"] == "string" + assert row["title"] == "State name" + assert row["description"] == "The name of the state." # trailing \n stripped + + +def test_get_queryables_unknown_collection_raises(httpx_mock): + """An HTTP error (e.g. a 404 for an unknown collection) is surfaced as the + typed ``DataRetrievalError``, not a bare DataFrame.""" + httpx_mock.add_response( + method="GET", + url=QUERYABLES_RE, + status_code=404, + json={"code": "404", "description": "Collection not found"}, + ) + + with pytest.raises(dataretrieval.DataRetrievalError): + waterdata.get_queryables("not-a-collection") + + +# --- live queryables monitor ----------------------------------------------- + + +@flaky_api +@pytest.mark.parametrize("collection", sorted(_SNAPSHOT)) +def test_queryables_match_snapshot(collection): + """Each collection's live queryables match the committed snapshot. + + A failure means the upstream API changed a collection's queryables. + Regenerate ``tests/data/waterdata_queryables.json`` (see this module's + docstring) and enable any newly added queryables on the matching getter. + """ + df, _ = waterdata.get_queryables(collection) + live = set(df["queryable"]) + expected = set(_SNAPSHOT[collection]) + assert live == expected, ( + f"{collection} queryables changed upstream: " + f"added={sorted(live - expected)}, removed={sorted(expected - live)}. " + f"Regenerate {_SNAPSHOT_PATH.name} and enable any new queryables." + ) From 2060c6c5aec6cc8e99bed807f1df64f9634197b1 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Wed, 24 Jun 2026 11:43:20 -0500 Subject: [PATCH 2/2] feat(waterdata): enable arbitrary queryables as passthrough filters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The OGC data getters (`get_daily`, `get_continuous`, `get_peaks`, ...) exposed ~11 of each collection's ~50 queryables as named params; the rest — mostly the shared monitoring-location attributes (`state_name`, `county_code`, `site_type`, `altitude`, ...) now filterable on the data endpoints — were reachable only via the raw `filter` CQL. Accept any queryable as a passthrough kwarg: each OGC getter gains `**queryables`, and the shared `_get_args` flattens it so an extra filter such as `state_name="Wisconsin"` is normalized and sent exactly like a named param. The service itself validates names (an unknown one returns HTTP 400 → typed error), so no client-side queryable list is bundled. The passthrough is provisional (see the PR description for the trade-off vs. explicit per-property keyword arguments). Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01Sjb14HkwuCydKSKMsaXsgd --- dataretrieval/waterdata/api.py | 88 ++++++++++++++++++++++++++++++ dataretrieval/waterdata/utils.py | 10 ++++ tests/waterdata_queryables_test.py | 62 +++++++++++++++++++++ 3 files changed, 160 insertions(+) diff --git a/dataretrieval/waterdata/api.py b/dataretrieval/waterdata/api.py index 91448128..7d45a06a 100644 --- a/dataretrieval/waterdata/api.py +++ b/dataretrieval/waterdata/api.py @@ -75,6 +75,7 @@ def get_daily( filter: str | None = None, filter_lang: FILTER_LANG | None = None, convert_type: bool = True, + **queryables: Any, ) -> tuple[pd.DataFrame, BaseMetadata]: """Daily data provide one data value to represent water conditions for the day. @@ -207,6 +208,13 @@ def get_daily( and the lexicographic-comparison pitfall. convert_type : boolean, optional If True, converts columns to appropriate types. + **queryables : string or iterable of strings, optional + Any other queryable property of this collection, passed through as a + server-side filter (e.g. ``state_name="Wisconsin"``, + ``site_type_code="ST"``). See :func:`get_queryables` for a collection's + queryable properties; an unknown name is rejected by the service with a + ``DataRetrievalError`` (HTTP 400). This passthrough is provisional and + may be superseded by explicit per-property keyword arguments. Returns ------- @@ -296,6 +304,7 @@ def get_continuous( filter: str | None = None, filter_lang: FILTER_LANG | None = None, convert_type: bool = True, + **queryables: Any, ) -> tuple[pd.DataFrame, BaseMetadata]: """ Continuous data provide instantaneous water conditions. @@ -422,6 +431,13 @@ def get_continuous( and the lexicographic-comparison pitfall. convert_type : boolean, optional If True, converts columns to appropriate types. + **queryables : string or iterable of strings, optional + Any other queryable property of this collection, passed through as a + server-side filter (e.g. ``state_name="Wisconsin"``, + ``site_type_code="ST"``). See :func:`get_queryables` for a collection's + queryable properties; an unknown name is rejected by the service with a + ``DataRetrievalError`` (HTTP 400). This passthrough is provisional and + may be superseded by explicit per-property keyword arguments. Returns ------- @@ -521,6 +537,7 @@ def get_monitoring_locations( filter: str | None = None, filter_lang: FILTER_LANG | None = None, convert_type: bool = True, + **queryables: Any, ) -> tuple[pd.DataFrame, BaseMetadata]: """Location information is basic information about the monitoring location including the name, identifier, agency responsible for data collection, and @@ -739,6 +756,13 @@ def get_monitoring_locations( and the lexicographic-comparison pitfall. convert_type : boolean, optional If True, converts columns to appropriate types. + **queryables : string or iterable of strings, optional + Any other queryable property of this collection, passed through as a + server-side filter (e.g. ``state_name="Wisconsin"``, + ``site_type_code="ST"``). See :func:`get_queryables` for a collection's + queryable properties; an unknown name is rejected by the service with a + ``DataRetrievalError`` (HTTP 400). This passthrough is provisional and + may be superseded by explicit per-property keyword arguments. Returns ------- @@ -809,6 +833,7 @@ def get_time_series_metadata( filter: str | None = None, filter_lang: FILTER_LANG | None = None, convert_type: bool = True, + **queryables: Any, ) -> tuple[pd.DataFrame, BaseMetadata]: """Daily data and continuous measurements are grouped into time series, which represent a collection of observations of a single parameter, @@ -976,6 +1001,13 @@ def get_time_series_metadata( and the lexicographic-comparison pitfall. convert_type : boolean, optional If True, converts columns to appropriate types. + **queryables : string or iterable of strings, optional + Any other queryable property of this collection, passed through as a + server-side filter (e.g. ``state_name="Wisconsin"``, + ``site_type_code="ST"``). See :func:`get_queryables` for a collection's + queryable properties; an unknown name is rejected by the service with a + ``DataRetrievalError`` (HTTP 400). This passthrough is provisional and + may be superseded by explicit per-property keyword arguments. Returns ------- @@ -1081,6 +1113,7 @@ def get_combined_metadata( filter: str | None = None, filter_lang: FILTER_LANG | None = None, convert_type: bool = True, + **queryables: Any, ) -> tuple[pd.DataFrame, BaseMetadata]: """Get combined monitoring-location and time-series metadata. @@ -1183,6 +1216,13 @@ def get_combined_metadata( and the lexicographic-comparison pitfall. convert_type : boolean, optional If True, converts columns to appropriate types. + **queryables : string or iterable of strings, optional + Any other queryable property of this collection, passed through as a + server-side filter (e.g. ``state_name="Wisconsin"``, + ``site_type_code="ST"``). See :func:`get_queryables` for a collection's + queryable properties; an unknown name is rejected by the service with a + ``DataRetrievalError`` (HTTP 400). This passthrough is provisional and + may be superseded by explicit per-property keyword arguments. Returns ------- @@ -1278,6 +1318,7 @@ def get_latest_continuous( filter: str | None = None, filter_lang: FILTER_LANG | None = None, convert_type: bool = True, + **queryables: Any, ) -> tuple[pd.DataFrame, BaseMetadata]: """This endpoint provides the most recent observation for each time series of continuous data. Continuous data are collected via automated sensors @@ -1407,6 +1448,13 @@ def get_latest_continuous( and the lexicographic-comparison pitfall. convert_type : boolean, optional If True, converts columns to appropriate types. + **queryables : string or iterable of strings, optional + Any other queryable property of this collection, passed through as a + server-side filter (e.g. ``state_name="Wisconsin"``, + ``site_type_code="ST"``). See :func:`get_queryables` for a collection's + queryable properties; an unknown name is rejected by the service with a + ``DataRetrievalError`` (HTTP 400). This passthrough is provisional and + may be superseded by explicit per-property keyword arguments. Returns ------- @@ -1479,6 +1527,7 @@ def get_latest_daily( filter: str | None = None, filter_lang: FILTER_LANG | None = None, convert_type: bool = True, + **queryables: Any, ) -> tuple[pd.DataFrame, BaseMetadata]: """Daily data provide one data value to represent water conditions for the day. @@ -1610,6 +1659,13 @@ def get_latest_daily( and the lexicographic-comparison pitfall. convert_type : boolean, optional If True, converts columns to appropriate types. + **queryables : string or iterable of strings, optional + Any other queryable property of this collection, passed through as a + server-side filter (e.g. ``state_name="Wisconsin"``, + ``site_type_code="ST"``). See :func:`get_queryables` for a collection's + queryable properties; an unknown name is rejected by the service with a + ``DataRetrievalError`` (HTTP 400). This passthrough is provisional and + may be superseded by explicit per-property keyword arguments. Returns ------- @@ -1683,6 +1739,7 @@ def get_field_measurements( filter: str | None = None, filter_lang: FILTER_LANG | None = None, convert_type: bool = True, + **queryables: Any, ) -> tuple[pd.DataFrame, BaseMetadata]: """Field measurements are physically measured values collected during a visit to the monitoring location. Field measurements consist of measurements @@ -1805,6 +1862,13 @@ def get_field_measurements( and the lexicographic-comparison pitfall. convert_type : boolean, optional If True, converts columns to appropriate types. + **queryables : string or iterable of strings, optional + Any other queryable property of this collection, passed through as a + server-side filter (e.g. ``state_name="Wisconsin"``, + ``site_type_code="ST"``). See :func:`get_queryables` for a collection's + queryable properties; an unknown name is rejected by the service with a + ``DataRetrievalError`` (HTTP 400). This passthrough is provisional and + may be superseded by explicit per-property keyword arguments. Returns ------- @@ -1874,6 +1938,7 @@ def get_field_measurements_metadata( filter: str | None = None, filter_lang: FILTER_LANG | None = None, convert_type: bool = True, + **queryables: Any, ) -> tuple[pd.DataFrame, BaseMetadata]: """Get field-measurement metadata: one row per (location, parameter) series. @@ -1927,6 +1992,13 @@ def get_field_measurements_metadata( and the lexicographic-comparison pitfall. convert_type : boolean, optional If True, converts columns to appropriate types. + **queryables : string or iterable of strings, optional + Any other queryable property of this collection, passed through as a + server-side filter (e.g. ``state_name="Wisconsin"``, + ``site_type_code="ST"``). See :func:`get_queryables` for a collection's + queryable properties; an unknown name is rejected by the service with a + ``DataRetrievalError`` (HTTP 400). This passthrough is provisional and + may be superseded by explicit per-property keyword arguments. Returns ------- @@ -1999,6 +2071,7 @@ def get_peaks( filter: str | None = None, filter_lang: FILTER_LANG | None = None, convert_type: bool = True, + **queryables: Any, ) -> tuple[pd.DataFrame, BaseMetadata]: """Get the annual peak streamflow / stage record for a monitoring location. @@ -2057,6 +2130,13 @@ def get_peaks( and the lexicographic-comparison pitfall. convert_type : boolean, optional If True, converts columns to appropriate types. + **queryables : string or iterable of strings, optional + Any other queryable property of this collection, passed through as a + server-side filter (e.g. ``state_name="Wisconsin"``, + ``site_type_code="ST"``). See :func:`get_queryables` for a collection's + queryable properties; an unknown name is rejected by the service with a + ``DataRetrievalError`` (HTTP 400). This passthrough is provisional and + may be superseded by explicit per-property keyword arguments. Returns ------- @@ -2981,6 +3061,7 @@ def get_channel( filter: str | None = None, filter_lang: FILTER_LANG | None = None, convert_type: bool = True, + **queryables: Any, ) -> tuple[pd.DataFrame, BaseMetadata]: """ Channel measurements taken as part of streamflow field measurements. @@ -3110,6 +3191,13 @@ def get_channel( and the lexicographic-comparison pitfall. convert_type : boolean, optional If True, converts columns to appropriate types. + **queryables : string or iterable of strings, optional + Any other queryable property of this collection, passed through as a + server-side filter (e.g. ``state_name="Wisconsin"``, + ``site_type_code="ST"``). See :func:`get_queryables` for a collection's + queryable properties; an unknown name is rejected by the service with a + ``DataRetrievalError`` (HTTP 400). This passthrough is provisional and + may be superseded by explicit per-property keyword arguments. Returns ------- diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py index 65f9ea2f..72889fc3 100644 --- a/dataretrieval/waterdata/utils.py +++ b/dataretrieval/waterdata/utils.py @@ -166,7 +166,17 @@ def _get_args( params such as ``water_year``, ``thresholds``, ``boundingBox``) so they keep their element types. See :func:`engine._get_args` for the full normalization contract. + + A getter's ``**queryables`` passthrough kwargs are collected by ``locals()`` + under the ``queryables`` key; they are flattened in here, so an extra + server-side filter such as ``state_name="Wisconsin"`` is normalized and sent + exactly like a named param. See + :func:`dataretrieval.waterdata.get_queryables` for each collection's + filterable properties (the service rejects an unknown one with a 400). """ + queryables = local_vars.pop("queryables", None) + if queryables: + local_vars.update(queryables) return _engine_get_args(local_vars, exclude, no_normalize=_NO_NORMALIZE_PARAMS) diff --git a/tests/waterdata_queryables_test.py b/tests/waterdata_queryables_test.py index fde3ab4d..3d26d08d 100644 --- a/tests/waterdata_queryables_test.py +++ b/tests/waterdata_queryables_test.py @@ -26,6 +26,7 @@ import json import re from pathlib import Path +from urllib.parse import parse_qs, urlsplit import pytest @@ -96,6 +97,67 @@ def test_get_queryables_unknown_collection_raises(httpx_mock): waterdata.get_queryables("not-a-collection") +# --- passthrough queryables (mocked) --------------------------------------- + +_DAILY_ITEMS_RE = re.compile( + r"^https://api\.waterdata\.usgs\.gov/ogcapi/v0/collections/daily/items" +) +_DAILY_SCHEMA_RE = re.compile( + r"^https://api\.waterdata\.usgs\.gov/ogcapi/v0/collections/daily/schema$" +) +_EMPTY_FEATURES = { + "type": "FeatureCollection", + "features": [], + "numberReturned": 0, + "numberMatched": 0, + "links": [], +} + + +def _mock_daily(httpx_mock): + """Mock the two endpoints a ``get_daily`` call touches: the items query and + the schema fetch (used for output typing).""" + httpx_mock.add_response(method="GET", url=_DAILY_SCHEMA_RE, json={"properties": {}}) + httpx_mock.add_response(method="GET", url=_DAILY_ITEMS_RE, json=_EMPTY_FEATURES) + + +def _items_query(httpx_mock): + """Parsed query string of the ``/items`` request the getter sent.""" + req = next(r for r in httpx_mock.get_requests() if "/items" in str(r.url)) + return parse_qs(urlsplit(str(req.url)).query) + + +def test_passthrough_queryables_sent_as_filters(httpx_mock): + """An OGC getter forwards queryables that aren't in its explicit signature + (e.g. ``state_name``, ``site_type_code``) to the service as query filters, + alongside the named params.""" + _mock_daily(httpx_mock) + + waterdata.get_daily( + monitoring_location_id="USGS-05427718", + state_name="Wisconsin", + site_type_code="ST", + ) + + qs = _items_query(httpx_mock) + assert qs["state_name"] == ["Wisconsin"] + assert qs["site_type_code"] == ["ST"] + assert qs["monitoring_location_id"] == ["USGS-05427718"] + + +def test_passthrough_list_queryable_is_comma_joined(httpx_mock): + """A list-valued passthrough queryable is normalized and comma-joined like a + named multi-value param.""" + _mock_daily(httpx_mock) + + waterdata.get_daily( + monitoring_location_id="USGS-05427718", + site_type_code=["ST", "LK"], + ) + + assert _items_query(httpx_mock)["site_type_code"] == ["ST,LK"] + + # --- live queryables monitor -----------------------------------------------