Source code for IS2view.convert

"""
convert.py
Written by Tyler Sutterley (06/2024)
Utilities for converting gridded ICESat-2 files from native netCDF4

PYTHON DEPENDENCIES:
    h5netcdf: Pythonic interface to netCDF4 via h5py
        https://h5netcdf.org/
    numpy: Scientific Computing Tools For Python
        https://numpy.org
        https://numpy.org/doc/stable/user/numpy-for-matlab-users.html
    xarray: N-D labeled arrays and datasets in Python
        https://docs.xarray.dev/en/stable/

UPDATE HISTORY:
    Updated 06/2024: use wrapper to importlib for optional dependencies
    Updated 08/2023: use h5netcdf as the netCDF4 driver for xarray
    Updated 07/2023: use logging instead of warnings for import attempts
    Updated 06/2023: using pathlib to define and expand paths
    Updated 11/2022: output variables and attributes in top-level group
        use netCDF4 directly due to changes in xarray backends
    Written 07/2022
"""
import logging
import pathlib
import numpy as np
from IS2view.utilities import import_dependency

# attempt imports
h5netcdf = import_dependency('h5netcdf')
xr = import_dependency('xarray')

# default groups to skip
_default_skip_groups = ('METADATA', 'orbit_info', 'quality_assessment',)

[docs] class convert(): np.seterr(invalid='ignore') def __init__(self, filename=None, output=None): """Utilities for converting gridded ICESat-2 files from native netCDF4 Parameters ---------- filename: str, obj or NoneType, default None input netCDF4 filename or io.BytesIO object """ self.filename = filename self.output = output # PURPOSE: convert the netCDF4 file to zarr copying all file data
[docs] def nc_to_zarr(self, **kwds): """ convert a netCDF4 file to zarr copying all file data Parameters ---------- **kwds: dict keyword arguments for output """ kwds.setdefault('filename', self.filename) kwds.setdefault('output', self.output) kwds.setdefault('skip_groups', _default_skip_groups) # update filenames self.filename = kwds['filename'] self.output = kwds['output'] # split extension from netCDF4 file if isinstance(self.filename, (str, pathlib.Path)): filename = pathlib.Path(self.filename) else: filename = pathlib.Path(self.filename.filename) # output zarr file if self.output is None: self.output = filename.with_suffix('.zarr') # log input and output files logging.info(self.filename) logging.info(self.output) # find each group within the input netCDF4 file with h5netcdf.File(self.filename) as source: # copy variables and attributes from the top-level group # copy everything from the netCDF4 file to the zarr file ds = xr.open_dataset(xr.backends.h5netcdf_.H5NetCDFStore(source)) ds.to_zarr(store=self.output, mode='a') # for each group for group in source.groups.keys(): # skip over specific groups if group in kwds['skip_groups']: continue # get netCDF4 group logging.info(group) nc = source.groups.get(group) # copy everything from the netCDF4 group to the zarr file ds = xr.open_dataset(xr.backends.h5netcdf_.H5NetCDFStore(nc)) ds.to_zarr(store=self.output, mode='a', group=group)