"""
convert.py
Written by Tyler Sutterley (06/2024)
Utilities for converting gridded ICESat-2 files from native netCDF4
PYTHON DEPENDENCIES:
h5netcdf: Pythonic interface to netCDF4 via h5py
https://h5netcdf.org/
numpy: Scientific Computing Tools For Python
https://numpy.org
https://numpy.org/doc/stable/user/numpy-for-matlab-users.html
xarray: N-D labeled arrays and datasets in Python
https://docs.xarray.dev/en/stable/
UPDATE HISTORY:
Updated 06/2024: use wrapper to importlib for optional dependencies
Updated 08/2023: use h5netcdf as the netCDF4 driver for xarray
Updated 07/2023: use logging instead of warnings for import attempts
Updated 06/2023: using pathlib to define and expand paths
Updated 11/2022: output variables and attributes in top-level group
use netCDF4 directly due to changes in xarray backends
Written 07/2022
"""
import logging
import pathlib
import numpy as np
from IS2view.utilities import import_dependency
# attempt imports
h5netcdf = import_dependency("h5netcdf")
xr = import_dependency("xarray")
# default groups to skip
_default_skip_groups = (
"METADATA",
"orbit_info",
"quality_assessment",
)
[docs]
class convert:
np.seterr(invalid="ignore")
def __init__(self, filename=None, output=None):
"""Utilities for converting gridded ICESat-2 files from native netCDF4
Parameters
----------
filename: str, obj or NoneType, default None
input netCDF4 filename or io.BytesIO object
"""
self.filename = filename
self.output = output
# PURPOSE: convert the netCDF4 file to zarr copying all file data
[docs]
def nc_to_zarr(self, **kwds):
"""
convert a netCDF4 file to zarr copying all file data
Parameters
----------
**kwds: dict
keyword arguments for output
"""
kwds.setdefault("filename", self.filename)
kwds.setdefault("output", self.output)
kwds.setdefault("skip_groups", _default_skip_groups)
# update filenames
self.filename = kwds["filename"]
self.output = kwds["output"]
# split extension from netCDF4 file
if isinstance(self.filename, (str, pathlib.Path)):
filename = pathlib.Path(self.filename)
else:
filename = pathlib.Path(self.filename.filename)
# output zarr file
if self.output is None:
self.output = filename.with_suffix(".zarr")
# log input and output files
logging.info(self.filename)
logging.info(self.output)
# find each group within the input netCDF4 file
with h5netcdf.File(self.filename) as source:
# copy variables and attributes from the top-level group
# copy everything from the netCDF4 file to the zarr file
ds = xr.open_dataset(xr.backends.h5netcdf_.H5NetCDFStore(source))
ds.to_zarr(store=self.output, mode="a")
# for each group
for group in source.groups.keys():
# skip over specific groups
if group in kwds["skip_groups"]:
continue
# get netCDF4 group
logging.info(group)
nc = source.groups.get(group)
# copy everything from the netCDF4 group to the zarr file
ds = xr.open_dataset(xr.backends.h5netcdf_.H5NetCDFStore(nc))
ds.to_zarr(store=self.output, mode="a", group=group)