diff --git a/Changelog.rst b/Changelog.rst index f861c8f804..4a5c86cd36 100644 --- a/Changelog.rst +++ b/Changelog.rst @@ -3,6 +3,13 @@ Version NEXTVERSION **2026-06-??** +* New methods to convert to `xarray`: `cf.Field.to_xarray`, + `cf.FieldList.to_xarray`, `cf.Domain.to_xarray`, and + `cf.DomainList.to_xarray` + (https://github.com/NCAS-CMS/cf-python/issues/933) +* New output format for `cf.write` that creates an `xarray` dataset in + memory: ``'XARRAY'`` + (https://github.com/NCAS-CMS/cf-python/issues/933) * New keyword parameter to `cf.Field.regrids` and `cf.Field.regridc`: ``mtol`` (https://github.com/NCAS-CMS/cf-python/issues/949) * Fix bug in `cf.read` that prevented some OPeNDAP URLS being read @@ -10,6 +17,7 @@ Version NEXTVERSION * Fix bug that caused weighted `cf.collapse` and `cf.weights` to fail when a cell meausures has a size 1 axis (https://github.com/NCAS-CMS/cf-python/issues/952) +* New optional dependency: ``xarray>=2026.2.0`` ---- diff --git a/cf/mixin/fielddomain.py b/cf/mixin/fielddomain.py index f26b1c3de1..c7cdf02f6e 100644 --- a/cf/mixin/fielddomain.py +++ b/cf/mixin/fielddomain.py @@ -3884,6 +3884,43 @@ def set_coordinate_reference( return self.set_construct(ref, key=key, copy=False) + def to_xarray(self, group=True): + """Convert the {{class}} to an `xarray` dataset. + + {{cf_xarray description}} + + Note that ``ds = f.to_xarray()`` is identical to ``ds = + cf.write(f, fmt='XARRAY')``; and multiple {{class_lower}}s may + be written to the same `xarray` dataset with + `cf.{{class}}List.to_xarray`, or with `cf.write` (e.g. ``ds = + cf.write([f, g], fmt='XARRAY')``). Also, `cf.write` allows a + mixture a mixture of fields and domains to be written to the + same `xarray` dataset. + + .. versionadded:: NEXTVERSION + + .. seealso:: `cf.{{class}}List.to_xarray`, `cf.write` + + :Parameter: + + group: `bool`, optional + + If False then create a "flat" dataset, i.e. one with + only the root group, regardless of any group structure + specified by the netCDF interfaces of the + {{class_lower}} and its components. If True (the + default) then any sub-groups will be created and + populated. + + :Returns: + + {{Returns xarray}} + + """ + from cf.read_write import write + + return write(self, fmt="XARRAY", group=group) + # ---------------------------------------------------------------- # Aliases # ---------------------------------------------------------------- diff --git a/cf/mixin/fielddomainlist.py b/cf/mixin/fielddomainlist.py index 1e754ed357..8d529c274f 100644 --- a/cf/mixin/fielddomainlist.py +++ b/cf/mixin/fielddomainlist.py @@ -331,3 +331,35 @@ def select_by_rank(self, *ranks): """ return type(self)(f for f in self if f.match_by_rank(*ranks)) + + def to_xarray(self, group=True): + """Convert the list elements to an `xarray` Dataset. + + {{cf_xarray description}} + + Note that ``ds = fl.to_xarray()`` is identical to ``ds = + cf.write(fl, fmt='XARRAY')``. Also, `cfdm.write` allows a + mixture a mixture of fields and domains to be written to the + same `xarray` dataset. + + .. versionadded:: NEXTVERSION + + .. seealso:: `cf.write` + + :Parameter: + + group: `bool`, optional + If False then create a "flat" dataset, i.e. one with + only the root group, regardless of any group structure + specified by the netCDF interfaces of the list + elements and their components. If True (the default) + then any sub-groups will be created and populated. + + :Returns: + + {{Returns xarray}} + + """ + from cf.read_write import write + + return write(self, fmt="XARRAY", group=group) diff --git a/cf/test/test_xarray.py b/cf/test/test_xarray.py new file mode 100644 index 0000000000..619f453c5e --- /dev/null +++ b/cf/test/test_xarray.py @@ -0,0 +1,178 @@ +import datetime +import faulthandler +import unittest + +faulthandler.enable() # to debug seg faults and timeouts + +import xarray as xr + +import cf + + +class xarrayTest(unittest.TestCase): + """Unit test for converting to xarray.""" + + def setUp(self): + """Preparations called immediately before each test method.""" + # Disable log messages to silence expected warnings + cf.log_level("DISABLE") + # Note: to enable all messages for given methods, lines or + # calls (those without a 'verbose' option to do the same) + # e.g. to debug them, wrap them (for methods, start-to-end + # internally) as follows: + # + # cf.LOG_LEVEL('DEBUG') + # < ... test code ... > + # cf.log_level('DISABLE') + + def test_Field_to_xarray(self): + """Test Field.to_xarray.""" + fields = cf.example_fields() + + # Write each field to a different xarray dataset + for f in fields: + ds = f.to_xarray() + self.assertIsInstance(ds, xr.Dataset) + str(ds) + self.assertIn("Conventions", ds.attrs) + + # Write all fields to one xarray dataset + ds = cf.write(fields, fmt="XARRAY") + self.assertIsInstance(ds, xr.Dataset) + str(ds) + + def test_Domain_to_xarray(self): + """Test Domain.to_xarray.""" + domains = [f.domain for f in cf.example_fields()] + + # Write each domain to a different xarray dataset + for d in domains: + ds = d.to_xarray() + self.assertIsInstance(ds, xr.Dataset) + str(ds) + + # Write all domains to one xarray dataset + ds = cf.write(domains, fmt="XARRAY") + self.assertIsInstance(ds, xr.Dataset) + str(ds) + + def test_FieldList_to_xarray(self): + """Test Field.to_xarray.""" + fields = cf.example_fields() + ds = fields.to_xarray() + self.assertIsInstance(ds, xr.Dataset) + str(ds) + + def test_DomainList_to_xarray(self): + """Test DomainList.to_xarray.""" + domains = cf.DomainList([f.domain for f in cf.example_fields()]) + ds = domains.to_xarray() + self.assertIsInstance(ds, xr.Dataset) + str(ds) + + def test_FieldList_to_xarray_from_disk(self): + """Test FieldList.to_xarray from datasets read from disk.""" + for dataset in ( + "example_field_0.nc", + "example_field_0.zarr2", + "example_field_0.zarr3", + "gathered.nc", + "DSG_timeSeries_contiguous.nc", + "DSG_timeSeries_indexed.nc", + "DSG_timeSeriesProfile_indexed_contiguous.nc", + "parent.nc", + "external.nc", + "external_missing.nc", + "combined.nc", + "geometry_1.nc", + "geometry_2.nc", + "geometry_3.nc", + "geometry_4.nc", + "geometry_interior_ring.nc", + "geometry_interior_ring_2.nc", + "string_char.nc", + "subsampled_2.nc", + "ugrid_1.nc", + "ugrid_2.nc", + "ugrid_3.nc", + "test_file.nc", + "extra_data.pp", + "file1.pp", + "umfile.pp", + "wgdos_packed.pp", + ): + f = cf.read(dataset) + ds = f.to_xarray() + self.assertIsInstance(ds, xr.Dataset) + str(ds) + + def test_Field_to_xarray_groups(self): + """Test Field.to_xarray with groups.""" + f = cf.example_field(0) + g = f.copy() + + ds = f.to_xarray() + self.assertIsInstance(ds, xr.Dataset) + + f.nc_set_variable("/forecast/model/q2") + ds = f.to_xarray() + self.assertIsInstance(ds, xr.DataTree) + self.assertIn("q2", ds["/forecast/model"]) + str(ds) + + # group=True + ds = cf.write([f, g], fmt="XARRAY") + self.assertIsInstance(ds, xr.DataTree) + str(ds) + + self.assertIn("q", ds) + self.assertIn("q2", ds["/forecast/model"]) + + # group=False + ds = f.to_xarray(group=False) + self.assertIsInstance(ds, xr.Dataset) + self.assertIn("q2", ds) + str(ds) + + ds = cf.write([f, g], fmt="XARRAY", group=False) + self.assertIsInstance(ds, xr.Dataset) + str(ds) + + self.assertIn("q", ds) + self.assertIn("q2", ds) + + def test_FieldList_to_xarray_groups(self): + """Test Field.to_xarray with groups.""" + f = cf.example_fields(0) + + ds = f.to_xarray() + self.assertIsInstance(ds, xr.Dataset) + + f[0].nc_set_variable("/forecast/model/q2") + ds = f.to_xarray() + self.assertIsInstance(ds, xr.DataTree) + self.assertIn("q2", ds["/forecast/model"]) + str(ds) + + ds = f.to_xarray(group=False) + self.assertIsInstance(ds, xr.Dataset) + self.assertIn("q2", ds) + str(ds) + + def test_Field_to_xarray_aggregation(self): + """Test Field.to_xarray with aggregated data.""" + f = cf.read("example_field_0.nc")[0] + dsf = f.to_xarray() + + self.assertEqual(f.shape[0], 5) + g = cf.aggregate([f[:3], f[3:]])[0] + dsg = g.to_xarray() + + self.assertTrue(dsg.equals(dsf)) + + +if __name__ == "__main__": + print("Run date:", datetime.datetime.now()) + cf.environment() + print("") + unittest.main(verbosity=2) diff --git a/docs/source/class/cf.Domain.rst b/docs/source/class/cf.Domain.rst index e445416244..bcdd1a7ef7 100644 --- a/docs/source/class/cf.Domain.rst +++ b/docs/source/class/cf.Domain.rst @@ -262,6 +262,20 @@ NetCDF ~cf.Domain.nc_set_global_attribute ~cf.Domain.nc_set_global_attributes +.. _Domain-xarray: + +xarray +------ + +.. rubric:: Methods + +.. autosummary:: + :nosignatures: + :toctree: ../method/ + :template: method.rst + + ~cf.Domain.to_xarray + Groups ^^^^^^ diff --git a/docs/source/class/cf.Field.rst b/docs/source/class/cf.Field.rst index 9e9b02c296..07a4995918 100644 --- a/docs/source/class/cf.Field.rst +++ b/docs/source/class/cf.Field.rst @@ -440,6 +440,20 @@ NetCDF ~cf.Field.nc_dataset_chunksizes ~cf.Field.nc_set_dataset_chunksizes +.. _Field-xarray: + +xarray +------ + +.. rubric:: Methods + +.. autosummary:: + :nosignatures: + :toctree: ../method/ + :template: method.rst + + ~cf.Field.to_xarray + Groups ^^^^^^ diff --git a/docs/source/class/cf.FieldList.rst b/docs/source/class/cf.FieldList.rst index dfc462f727..34ffc334a1 100644 --- a/docs/source/class/cf.FieldList.rst +++ b/docs/source/class/cf.FieldList.rst @@ -41,6 +41,20 @@ Comparison ~cf.FieldList.equals +.. _FieldList-xarray: + +xarray +------ + +.. rubric:: Methods + +.. autosummary:: + :nosignatures: + :toctree: ../method/ + :template: method.rst + + ~cf.FieldList.to_xarray + Miscellaneous ------------- diff --git a/docs/source/installation.rst b/docs/source/installation.rst index 976c717123..5f0b2931df 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -273,6 +273,12 @@ environments for which these features are not required. For reading and writing Zarr datasets. +.. rubric:: xarray + +* `xarray `_, version 2026.2.0 or newer. + + For converting fields and domains to `xarray` datasets in memory. + .. rubric:: Regridding * `esmpy `_, version 8.7.0 or diff --git a/setup.py b/setup.py index 8aa39b8d03..7f3f844a0d 100755 --- a/setup.py +++ b/setup.py @@ -181,65 +181,48 @@ def compile(): * read field constructs and domain constructs from netCDF, CDL, Zarr, Kerchunk, PP and UM datasets with a choice of netCDF backends, - * read files from OPeNDAP servers and S3 object stores, - * be fully flexible with respect to dataset storage chunking, - -* create new field constructs in memory, - +* create new field and domain constructs in memory, * write and append field and domain constructs to netCDF and Zarr v3 datasets on disk, with control over HDF5 internal file metadata, - -* read, create, and manipulate UGRID mesh topologies, - +* read, write, and manipulate UGRID mesh topologies, * read, write, and manipulate HEALPix grids, - * read, write, and create coordinates defined by geometry cells, - -* read netCDF and CDL datasets containing hierarchical groups, - +* read and write netCDF4 string data-type variables, +* read, write, and create netCDF and CDL datasets containing hierarchical groups, +* read, write, and create data that have been compressed by convention + (i.e. ragged or gathered arrays, or coordinate arrays compressed by + subsampling), whilst presenting a view of the data in its + uncompressed form, +* read and write data that are quantized to eliminate false + precision, +* Convert field and domain constructs to `xarray` datasets in memory, * inspect field constructs, - * test whether two field constructs are the same, - * modify field construct metadata and data, - * create subspaces of field constructs, - * incorporate, and create, metadata stored in external files, - * read, write, and create data that have been compressed by convention (i.e. ragged or gathered arrays, or coordinate arrays compressed by subsampling), whilst presenting a view of the data in its uncompressed form, - * combine field constructs arithmetically, - * manipulate field construct data by arithmetical and trigonometrical operations, - * perform weighted statistical collapses on field constructs, including those with geometry cells, UGRID mesh topologies, and HEALPix grids, - * perform histogram, percentile and binning operations on field constructs, - * regrid structured grid, UGRID, HEALPix, and DSG field constructs with (multi-)linear, nearest neighbour, first- and second-order conservative and higher order patch recovery methods, including 3-d regridding, and large-grid support, - * apply convolution filters to field constructs, - * create running means from field constructs, +* apply differential operators to field constructs, and +* create derived quantities (such as relative vorticity). -* apply differential operators to field constructs, - -* create derived quantities (such as relative vorticity), - -* read and write data that are quantized to eliminate false - precision. """ @@ -274,6 +257,9 @@ def compile(): "zarr": [ "zarr>=3.1.3", ], + "xarray": [ + "xarray>=2026.2.0", + ], } setup(