Source code for IS2view.convert

"""
convert.py
Written by Tyler Sutterley (06/2024)
Utilities for converting gridded ICESat-2 files from native netCDF4

PYTHON DEPENDENCIES:
    h5netcdf: Pythonic interface to netCDF4 via h5py
        https://h5netcdf.org/
    numpy: Scientific Computing Tools For Python
        https://numpy.org
        https://numpy.org/doc/stable/user/numpy-for-matlab-users.html
    xarray: N-D labeled arrays and datasets in Python
        https://docs.xarray.dev/en/stable/

UPDATE HISTORY:
    Updated 06/2024: use wrapper to importlib for optional dependencies
    Updated 08/2023: use h5netcdf as the netCDF4 driver for xarray
    Updated 07/2023: use logging instead of warnings for import attempts
    Updated 06/2023: using pathlib to define and expand paths
    Updated 11/2022: output variables and attributes in top-level group
        use netCDF4 directly due to changes in xarray backends
    Written 07/2022
"""

import logging
import pathlib
import numpy as np
from IS2view.utilities import import_dependency

# attempt imports
h5netcdf = import_dependency("h5netcdf")
xr = import_dependency("xarray")

# default groups to skip
_default_skip_groups = (
    "METADATA",
    "orbit_info",
    "quality_assessment",
)


[docs] class convert: np.seterr(invalid="ignore") def __init__(self, filename=None, output=None): """Utilities for converting gridded ICESat-2 files from native netCDF4 Parameters ---------- filename: str, obj or NoneType, default None input netCDF4 filename or io.BytesIO object """ self.filename = filename self.output = output # PURPOSE: convert the netCDF4 file to zarr copying all file data
[docs] def nc_to_zarr(self, **kwds): """ convert a netCDF4 file to zarr copying all file data Parameters ---------- **kwds: dict keyword arguments for output """ kwds.setdefault("filename", self.filename) kwds.setdefault("output", self.output) kwds.setdefault("skip_groups", _default_skip_groups) # update filenames self.filename = kwds["filename"] self.output = kwds["output"] # split extension from netCDF4 file if isinstance(self.filename, (str, pathlib.Path)): filename = pathlib.Path(self.filename) else: filename = pathlib.Path(self.filename.filename) # output zarr file if self.output is None: self.output = filename.with_suffix(".zarr") # log input and output files logging.info(self.filename) logging.info(self.output) # find each group within the input netCDF4 file with h5netcdf.File(self.filename) as source: # copy variables and attributes from the top-level group # copy everything from the netCDF4 file to the zarr file ds = xr.open_dataset(xr.backends.h5netcdf_.H5NetCDFStore(source)) ds.to_zarr(store=self.output, mode="a") # for each group for group in source.groups.keys(): # skip over specific groups if group in kwds["skip_groups"]: continue # get netCDF4 group logging.info(group) nc = source.groups.get(group) # copy everything from the netCDF4 group to the zarr file ds = xr.open_dataset(xr.backends.h5netcdf_.H5NetCDFStore(nc)) ds.to_zarr(store=self.output, mode="a", group=group)