Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions tests/data/nwis_site_seriescatalog.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#
#
# US Geological Survey
# retrieved: 2026-06-24 13:30:23 -04:00 (caas01)
#
# The Site File stores location and general information about groundwater,
# surface water, and meteorological sites
# for sites in USA.
#
# File-format description: http://help.waterdata.usgs.gov/faq/about-tab-delimited-output
# Automated-retrieval info: http://waterservices.usgs.gov/rest/Site-Service.html
#
# Contact: gs-w_support_nwisweb@usgs.gov
#
# The following selected fields are included in this output:
#
# agency_cd -- Agency
# site_no -- Site identification number
# station_nm -- Site name
# site_tp_cd -- Site type
# dec_lat_va -- Decimal latitude
# dec_long_va -- Decimal longitude
# coord_acy_cd -- Latitude-longitude accuracy
# dec_coord_datum_cd -- Decimal Latitude-longitude datum
# alt_va -- Altitude of Gage/land surface
# alt_acy_va -- Altitude accuracy
# alt_datum_cd -- Altitude datum
# huc_cd -- Hydrologic unit code
# data_type_cd -- Data type
# parm_cd -- Parameter code
# stat_cd -- Statistical code
# ts_id -- Internal timeseries ID
# loc_web_ds -- Additional measurement description
# medium_grp_cd -- Medium group code
# parm_grp_cd -- Parameter group code
# srs_id -- SRS ID
# access_cd -- Access code
# begin_date -- Begin date
# end_date -- End date
# count_nu -- Record count
#
agency_cd site_no station_nm site_tp_cd dec_lat_va dec_long_va coord_acy_cd dec_coord_datum_cd alt_va alt_acy_va alt_datum_cd huc_cd data_type_cd parm_cd stat_cd ts_id loc_web_ds medium_grp_cd parm_grp_cd srs_id access_cd begin_date end_date count_nu
5s 15s 50s 7s 16s 16s 1s 10s 8s 3s 10s 16s 2s 5s 5s 5n 30s 3s 3s 5n 4n 20d 20d 5n
USGS 01491000 CHOPTANK RIVER NEAR GREENSBORO, MD ST 38.99719444 -75.7858056 S NAD83 2.73 .1 NAVD88 02060005 ad 0 wat 0 0 2006 2025 20
USGS 01491000 CHOPTANK RIVER NEAR GREENSBORO, MD ST 38.99719444 -75.7858056 S NAD83 2.73 .1 NAVD88 02060005 dv 00010 00001 68074 [Discontinued] wat 1645597 0 1988-10-01 2012-05-09 894
USGS 01491000 CHOPTANK RIVER NEAR GREENSBORO, MD ST 38.99719444 -75.7858056 S NAD83 2.73 .1 NAVD88 02060005 dv 00010 00001 327630 wat 1645597 0 2023-04-21 2026-06-23 1155
204 changes: 76 additions & 128 deletions tests/nwis_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import pandas as pd
import pytest

from dataretrieval.exceptions import DataRetrievalError
from dataretrieval.nwis import (
NWIS_Metadata,
_read_rdb,
Expand All @@ -24,17 +23,16 @@
preformat_peaks_response,
what_sites,
)
from tests.conftest import flaky_api

START_DATE = "2018-01-24"
END_DATE = "2018-01-25"

DATETIME_COL = "datetime"
SITENO_COL = "site_no"

# Several tests in this module hit the live NWIS services, so retry a transient
# upstream failure rather than failing CI (see ``conftest.flaky_api``).
pytestmark = flaky_api
# Legacy NWIS endpoints these tests mock — this module makes no live calls.
_SITE_RE = re.compile(r"^https://waterservices\.usgs\.gov/nwis/site(\?.*)?$")
_IV_RE = re.compile(r"^https://waterservices\.usgs\.gov/nwis/iv(\?.*)?$")


def _load_mock_json(file_name):
Expand All @@ -44,6 +42,16 @@ def _load_mock_json(file_name):
return json.load(f)


def _load_fixture(file_name):
"""Read a raw fixture file (e.g. an RDB response) from tests/data."""
return (Path(__file__).parent / "data" / file_name).read_text(encoding="utf-8")


def _mock_site(httpx_mock, fixture="waterservices_site.txt"):
"""Mock the legacy NWIS ``site`` endpoint with an RDB fixture."""
httpx_mock.add_response(method="GET", url=_SITE_RE, text=_load_fixture(fixture))


def _test_iv_service(httpx_mock):
"""Mocked test of instantaneous value service"""
start = START_DATE
Expand Down Expand Up @@ -73,39 +81,6 @@ def test_iv_service_answer(httpx_mock):
], f"iv service returned incorrect index: {df.index.names}"


def test_nwis_service_live():
"""Live sanity check of NWIS service, tolerant of transient NWIS outages."""
site = "01491000"
try:
# Minimal query: just most recent record
get_iv(sites=site)
except (DataRetrievalError, ValueError) as e:
# Catch known transient service failures: a typed DataRetrievalError
# (e.g. ServiceUnavailable on a 5xx, a RuntimeError) or a legacy ValueError
error_text = str(e)
if any(
err in error_text
for err in [
"500",
"502",
"503",
"Service Unavailable",
"Received HTML response instead of JSON",
]
):
pytest.skip(
f"Service is currently unavailable (transient NWIS outage): {e}"
)
raise
except Exception as e:
# Fallback for other potential transient network issues
if "Expecting value" in str(e) or "JSON" in str(e):
pytest.skip(
f"Service returned invalid response (likely transient outage): {e}"
)
raise


def test_preformat_peaks_response():
# make a data frame with a "peak_dt" datetime column
# it will have some nan and none values
Expand All @@ -119,14 +94,6 @@ def test_preformat_peaks_response():
assert df["datetime"].isna().sum() == 0


# tests using real queries to USGS webservices
# these specific queries represent some edge-cases and the tests to address
# incomplete date-time information


# Removed defunct gwlevels tests.


class TestDeprecationWarnings:
"""Verify per-function DeprecationWarning fires with the right replacement.

Expand Down Expand Up @@ -257,78 +224,80 @@ def test_get_record_defunct_service_water_use(self):


class TestTZ:
"""Tests relating to GitHub Issue #60."""
"""Tests relating to GitHub Issue #60 — merging IV results across sites
yields a proper datetime index. Mocked against fixture responses."""

@pytest.fixture(scope="class")
def sites(self):
# Fetch once per class, at test time (not at collection) so a transient
# upstream failure is retried by the module ``flaky`` marker instead of
# aborting collection — a class-body call cannot be reran.
sites, _ = what_sites(stateCd="MD")
return sites
def _mock(self, httpx_mock):
_mock_site(httpx_mock)
httpx_mock.add_response(
method="GET", url=_IV_RE, json=_load_mock_json("nwis_iv_mock.json")
)

def test_multiple_tz_01(self, sites):
"""Test based on GitHub Issue #60 - error merging different time zones."""
# this test fails before issue #60 is fixed
def test_multiple_tz_01(self, httpx_mock):
"""Issue #60 - merging IV across sites yields a datetime index."""
self._mock(httpx_mock)
sites, _ = what_sites(stateCd="MD")
iv, _ = get_iv(sites=sites.site_no.values[:25].tolist())
# assert that the datetime column exists
assert "datetime" in iv.index.names
# assert that it is a datetime type
assert isinstance(iv.index[0][1], datetime.datetime)

def test_multiple_tz_02(self, sites):
"""Test based on GitHub Issue #60 - confirm behavior for same tz."""
# this test passes before issue #60 is fixed
def test_multiple_tz_02(self, httpx_mock):
"""Issue #60 - the same-tz path also yields a datetime index."""
self._mock(httpx_mock)
sites, _ = what_sites(stateCd="MD")
iv, _ = get_iv(sites=sites.site_no.values[:20].tolist())
# assert that the datetime column exists
assert "datetime" in iv.index.names
# assert that it is a datetime type
assert isinstance(iv.index[0][1], datetime.datetime)


class TestSiteseriesCatalogOutput:
"""Tests relating to GitHub Issue #34."""
"""Tests relating to GitHub Issue #34 — ``seriesCatalogOutput`` adds the
data-inventory columns (begin_date / end_date / count_nu). Mocked against
fixture responses (the chosen fixture, not the request param, decides which
columns come back)."""

def test_seriesCatalogOutput_get_record(self):
"""Test setting seriesCatalogOutput to true with get_record."""
_SERIESCATALOG = "nwis_site_seriescatalog.txt"

def test_seriesCatalogOutput_get_record(self, httpx_mock):
"""seriesCatalogOutput=True with get_record exposes inventory columns."""
_mock_site(httpx_mock, self._SERIESCATALOG)
data = get_record(
huc="20", parameterCd="00060", service="site", seriesCatalogOutput="True"
)
# assert that expected data columns are present
assert "begin_date" in data.columns
assert "end_date" in data.columns
assert "count_nu" in data.columns

def test_seriesCatalogOutput_get_info(self):
"""Test setting seriesCatalogOutput to true with get_info."""
def test_seriesCatalogOutput_get_info(self, httpx_mock):
"""seriesCatalogOutput=TRUE with get_info exposes inventory columns."""
_mock_site(httpx_mock, self._SERIESCATALOG)
data, _ = get_info(huc="20", parameterCd="00060", seriesCatalogOutput="TRUE")
# assert that expected data columns are present
assert "begin_date" in data.columns
assert "end_date" in data.columns
assert "count_nu" in data.columns

def test_seriesCatalogOutput_bool(self):
"""Test setting seriesCatalogOutput with a boolean."""
def test_seriesCatalogOutput_bool(self, httpx_mock):
"""A boolean seriesCatalogOutput is accepted and exposes inventory cols."""
_mock_site(httpx_mock, self._SERIESCATALOG)
data, _ = get_info(huc="20", parameterCd="00060", seriesCatalogOutput=True)
# assert that expected data columns are present
assert "begin_date" in data.columns
assert "end_date" in data.columns
assert "count_nu" in data.columns

def test_expandedrdb_get_record(self):
"""Test default expanded_rdb format with get_record."""
def test_expandedrdb_get_record(self, httpx_mock):
"""The default expanded-rdb format omits the inventory columns."""
_mock_site(httpx_mock)
data = get_record(
huc="20", parameterCd="00060", service="site", seriesCatalogOutput="False"
)
# assert that seriesCatalogOutput columns are not present
assert "begin_date" not in data.columns
assert "end_date" not in data.columns
assert "count_nu" not in data.columns

def test_expandedrdb_get_info(self):
"""Test default expanded_rdb format with get_info."""
def test_expandedrdb_get_info(self, httpx_mock):
"""get_info default omits the inventory columns."""
_mock_site(httpx_mock)
data, _ = get_info(huc="20", parameterCd="00060")
# assert that seriesCatalogOutput columns are not present
assert "begin_date" not in data.columns
assert "end_date" not in data.columns
assert "count_nu" not in data.columns
Expand All @@ -353,67 +322,46 @@ def test_empty_timeseries(httpx_mock):


class TestMetaData:
"""Tests of NWIS metadata setting,

Notes
-----
"""Tests of NWIS metadata setting (originally GitHub Issue #73).

- Originally based on GitHub Issue #73.
- Modified to expose site_info as a property, not a callable.
``site_info`` is a property that lazily re-queries ``what_sites``; mocked
here against the ``site`` endpoint so it is exercised offline.
"""

def test_set_metadata_info_site(self):
"""Test metadata info is set when site parameter is supplied."""
# mock the query response
response = mock.MagicMock()
# make metadata call
md = NWIS_Metadata(response, sites="01491000")
# assert that site_info is implemented
def test_set_metadata_info_site(self, httpx_mock):
"""site_info is populated when ``sites`` is supplied."""
_mock_site(httpx_mock)
md = NWIS_Metadata(mock.MagicMock(), sites="01491000")
assert md.site_info

def test_set_metadata_info_site_no(self):
"""Test metadata info is set when site_no parameter is supplied."""
# mock the query response
response = mock.MagicMock()
# make metadata call
md = NWIS_Metadata(response, site_no="01491000")
# assert that site_info is implemented
def test_set_metadata_info_site_no(self, httpx_mock):
"""site_info is populated when ``site_no`` is supplied."""
_mock_site(httpx_mock)
md = NWIS_Metadata(mock.MagicMock(), site_no="01491000")
assert md.site_info

def test_set_metadata_info_stateCd(self):
"""Test metadata info is set when stateCd parameter is supplied."""
# mock the query response
response = mock.MagicMock()
# make metadata call
md = NWIS_Metadata(response, stateCd="RI")
# assert that site_info is implemented
def test_set_metadata_info_stateCd(self, httpx_mock):
"""site_info is populated when ``stateCd`` is supplied."""
_mock_site(httpx_mock)
md = NWIS_Metadata(mock.MagicMock(), stateCd="RI")
assert md.site_info

def test_set_metadata_info_huc(self):
"""Test metadata info is set when huc parameter is supplied."""
# mock the query response
response = mock.MagicMock()
# make metadata call
md = NWIS_Metadata(response, huc="01")
# assert that site_info is implemented
def test_set_metadata_info_huc(self, httpx_mock):
"""site_info is populated when ``huc`` is supplied."""
_mock_site(httpx_mock)
md = NWIS_Metadata(mock.MagicMock(), huc="01")
assert md.site_info

def test_set_metadata_info_bbox(self):
"""Test metadata info is set when bbox parameter is supplied."""
# mock the query response
response = mock.MagicMock()
# make metadata call
md = NWIS_Metadata(response, bBox="-92.8,44.2,-88.9,46.0")
# assert that site_info is implemented
def test_set_metadata_info_bbox(self, httpx_mock):
"""site_info is populated when ``bBox`` is supplied."""
_mock_site(httpx_mock)
md = NWIS_Metadata(mock.MagicMock(), bBox="-92.8,44.2,-88.9,46.0")
assert md.site_info

def test_set_metadata_info_countyCd(self):
"""Test metadata info is set when countyCd parameter is supplied."""
# mock the query response
response = mock.MagicMock()
# make metadata call
md = NWIS_Metadata(response, countyCd="01001")
# assert that site_info is implemented
def test_set_metadata_info_countyCd(self, httpx_mock):
"""site_info is populated when ``countyCd`` is supplied."""
_mock_site(httpx_mock)
md = NWIS_Metadata(mock.MagicMock(), countyCd="01001")
assert md.site_info


Expand Down
Loading