id,node_id,number,title,user,state,locked,assignee,milestone,comments,created_at,updated_at,closed_at,author_association,active_lock_reason,draft,pull_request,body,reactions,performed_via_github_app,state_reason,repo,type 1210753839,I_kwDOAMm_X85IKqMv,6506,HDF Netcdf4 error when writing to_netcdf,25382032,closed,0,,,2,2022-04-21T09:21:25Z,2023-09-12T15:12:28Z,2023-09-12T15:12:28Z,NONE,,,,"### What is your issue? I have a dataarray of significant wave height data for the Mediterranean Sea and I'm trying to compute the monthly quantiles. I have used the following code for some models and it was working correctly: ``` dict_ds_months = dict(ds.groupby('time.month')) for month in dict_ds_months.keys(): ds_month = dict_ds_months[month] q_month = ds_month.chunk({'time': -1}).quantile(quantiles, dim='time').rename(quantile='quantiles') q_month.to_netcdf(fn_q_month) ``` Now I have a new model to analyse and it is failing: ``` HDF5-DIAG: Error detected in HDF5 (1.12.1) thread 1: #000: H5Dio.c line 179 in H5Dread(): can't read data major: Dataset minor: Read failed #001: H5VLcallback.c line 2011 in H5VL_dataset_read(): dataset read failed major: Virtual Object Layer minor: Read failed #002: H5VLcallback.c line 1978 in H5VL__dataset_read(): dataset read failed major: Virtual Object Layer minor: Read failed #003: H5VLnative_dataset.c line 166 in H5VL__native_dataset_read(): can't read data major: Dataset minor: Read failed #004: H5Dio.c line 545 in H5D__read(): can't read data major: Dataset minor: Read failed #005: H5Dchunk.c line 2549 in H5D__chunk_read(): unable to read raw data chunk major: Low-level I/O minor: Read failed #006: H5Dchunk.c line 3904 in H5D__chunk_lock(): data pipeline read failed major: Dataset minor: Filter operation failed #007: H5Z.c line 1400 in H5Z_pipeline(): filter returned failure during read major: Data filters minor: Read failed #008: H5Zdeflate.c line 118 in H5Z__filter_deflate(): inflate() failed major: Data filters minor: Unable to initialize object Traceback (most recent call last): File ""/home/wavewatch/lloarca/dicca_climate_change.git/b03_bias_adjustment_waves.py"", line 198, in get_quantiles(fns=dir_cordex_wave_rcp85.glob(f'WW3_{info_model[-1]}_*.nc'), data=info_model[2], period='rcp85', File ""/home/wavewatch/lloarca/dicca_climate_change.git/b03_bias_adjustment_waves.py"", line 78, in get_quantiles q_month.to_netcdf(fn_q_month) File ""/opt/anaconda3/lib/python3.8/site-packages/xarray/core/dataarray.py"", line 2847, in to_netcdf return dataset.to_netcdf(*args, **kwargs) File ""/opt/anaconda3/lib/python3.8/site-packages/xarray/core/dataset.py"", line 1901, in to_netcdf return to_netcdf( File ""/opt/anaconda3/lib/python3.8/site-packages/xarray/backends/api.py"", line 1081, in to_netcdf writes = writer.sync(compute=compute) File ""/opt/anaconda3/lib/python3.8/site-packages/xarray/backends/common.py"", line 166, in sync delayed_store = da.store( File ""/opt/anaconda3/lib/python3.8/site-packages/dask/array/core.py"", line 1043, in store compute_as_if_collection(Array, store_dsk, store_keys, **kwargs) File ""/opt/anaconda3/lib/python3.8/site-packages/dask/base.py"", line 315, in compute_as_if_collection return schedule(dsk2, keys, **kwargs) File ""/opt/anaconda3/lib/python3.8/site-packages/dask/threaded.py"", line 79, in get results = get_async( File ""/opt/anaconda3/lib/python3.8/site-packages/dask/local.py"", line 517, in get_async raise_exception(exc, tb) File ""/opt/anaconda3/lib/python3.8/site-packages/dask/local.py"", line 325, in reraise raise exc File ""/opt/anaconda3/lib/python3.8/site-packages/dask/local.py"", line 223, in execute_task result = _execute_task(task, data) File ""/opt/anaconda3/lib/python3.8/site-packages/dask/core.py"", line 121, in _execute_task return func(*(_execute_task(a, cache) for a in args)) File ""/opt/anaconda3/lib/python3.8/site-packages/dask/array/core.py"", line 106, in getter c = np.asarray(c) File ""/opt/anaconda3/lib/python3.8/site-packages/xarray/core/indexing.py"", line 358, in __array__ return np.asarray(self.array, dtype=dtype) File ""/opt/anaconda3/lib/python3.8/site-packages/xarray/core/indexing.py"", line 522, in __array__ return np.asarray(self.array, dtype=dtype) File ""/opt/anaconda3/lib/python3.8/site-packages/xarray/core/indexing.py"", line 423, in __array__ return np.asarray(array[self.key], dtype=None) File ""/opt/anaconda3/lib/python3.8/site-packages/xarray/coding/variables.py"", line 70, in __array__ return self.func(self.array) File ""/opt/anaconda3/lib/python3.8/site-packages/xarray/coding/variables.py"", line 137, in _apply_mask data = np.asarray(data, dtype=dtype) File ""/opt/anaconda3/lib/python3.8/site-packages/xarray/core/indexing.py"", line 423, in __array__ return np.asarray(array[self.key], dtype=None) File ""/opt/anaconda3/lib/python3.8/site-packages/xarray/backends/netCDF4_.py"", line 93, in __getitem__ return indexing.explicit_indexing_adapter( File ""/opt/anaconda3/lib/python3.8/site-packages/xarray/core/indexing.py"", line 712, in explicit_indexing_adapter result = raw_indexing_method(raw_key.tuple) File ""/opt/anaconda3/lib/python3.8/site-packages/xarray/backends/netCDF4_.py"", line 106, in _getitem array = getitem(original_array, key) File ""src/netCDF4/_netCDF4.pyx"", line 4406, in netCDF4._netCDF4.Variable.__getitem__ File ""src/netCDF4/_netCDF4.pyx"", line 5350, in netCDF4._netCDF4.Variable._get File ""src/netCDF4/_netCDF4.pyx"", line 1927, in netCDF4._netCDF4._ensure_nc_success RuntimeError: NetCDF: HDF error ``` I have tried updating xarray, netcdf4, h5netcdf; changing the engine, changing the encoding. Any thoughts? thanks!","{""url"": ""https://api.github.com/repos/pydata/xarray/issues/6506/reactions"", ""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",,completed,13221727,issue 935818279,MDU6SXNzdWU5MzU4MTgyNzk=,5567,quantile to_netcdf loading original data,25382032,closed,0,,,4,2021-07-02T14:23:27Z,2021-07-05T15:48:50Z,2021-07-05T15:48:50Z,NONE,,,,"I have a dataset from a RCM which I have opened using: ` ds = xr.open_mfdataset(dir_hindcast.glob('WW3_data*.nc')) ` which is of shape (lon=180, lat=336, time=105193). I then perform the analysis: ``` quantiles = np.hstack([np.arange(1, 98, 1), [98, 99, 99.1, 99.3, 99.5, 99.7, 99.9, 99.99, 99.999]]) / 100 ds = ds.chunk({'time': -1}).quantile(quantiles, dim='time').rename(quantile='quantiles') ``` ds then has shape (quantiles=106, lon=180, lat=336). I then want to save this data to file: `ds.to_netcdf(dir_data / 'quantiles_hindc.nc')` It gives me the following error: raceback (most recent call last): File ""03_bias_adjustment_waves_nc.py"", line 54, in get_quantiles ds.to_netcdf(dir_data / 'quantiles_hindc.nc') File ""/opt/anaconda3/lib/python3.7/site-packages/xarray/core/dataarray.py"", line 808, in load ds = self._to_temp_dataset().load(**kwargs) File ""/opt/anaconda3/lib/python3.7/site-packages/xarray/core/dataset.py"", line 654, in load evaluated_data = da.compute(*lazy_data.values(), **kwargs) File ""/opt/anaconda3/lib/python3.7/site-packages/dask/base.py"", line 452, in compute results = schedule(dsk, keys, **kwargs) File ""/opt/anaconda3/lib/python3.7/site-packages/dask/threaded.py"", line 84, in get **kwargs File ""/opt/anaconda3/lib/python3.7/site-packages/dask/local.py"", line 486, in get_async raise_exception(exc, tb) File ""/opt/anaconda3/lib/python3.7/site-packages/dask/local.py"", line 316, in reraise raise exc File ""/opt/anaconda3/lib/python3.7/site-packages/dask/local.py"", line 222, in execute_task result = _execute_task(task, data) File ""/opt/anaconda3/lib/python3.7/site-packages/dask/core.py"", line 121, in _execute_task return func(*(_execute_task(a, cache) for a in args)) File ""/opt/anaconda3/lib/python3.7/site-packages/dask/core.py"", line 121, in return func(*(_execute_task(a, cache) for a in args)) File ""/opt/anaconda3/lib/python3.7/site-packages/dask/core.py"", line 121, in _execute_task return func(*(_execute_task(a, cache) for a in args)) File ""/opt/anaconda3/lib/python3.7/site-packages/dask/core.py"", line 121, in return func(*(_execute_task(a, cache) for a in args)) File ""/opt/anaconda3/lib/python3.7/site-packages/dask/core.py"", line 121, in _execute_task return func(*(_execute_task(a, cache) for a in args)) File ""/opt/anaconda3/lib/python3.7/site-packages/dask/array/core.py"", line 4527, in concatenate_axes return concatenate3(transposelist(arrays, axes, extradims=extradims)) File ""/opt/anaconda3/lib/python3.7/site-packages/dask/array/core.py"", line 4510, in concatenate3 result = np.empty(shape=shape, dtype=dtype(deepfirst(arrays))) MemoryError: Unable to allocate 47.4 GiB for an array with shape (180, 336, 105193) and data type float64 I don't know why it is still keeping the original data on memory when I have replaced it with the quantiles dataset. I have also tried assigning it to a different variable ds_quantiles and doing ds.close() beforehand but it still gives the same error. Any ideas? Thanks","{""url"": ""https://api.github.com/repos/pydata/xarray/issues/5567/reactions"", ""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",,completed,13221727,issue