home / github / issues

Menu
  • GraphQL API
  • Search all tables

issues: 2066807588

This data as json

id node_id number title user state locked assignee milestone comments created_at updated_at closed_at author_association active_lock_reason draft pull_request body reactions performed_via_github_app state_reason repo type
2066807588 I_kwDOAMm_X857MPsk 8590 ValueError: conflicting sizes for dimension in xr.open_dataset("reference://"...) VS. no error in xr.open_dataset(direct_file_path) for h5 90292403 closed 0     3 2024-01-05T06:34:34Z 2024-04-11T06:54:44Z 2024-04-11T06:54:44Z NONE      

What is your issue?

Hi all, on a project I am attempting a dataset read using the xarray JSON reference system. Metadata for this file (an ATL03 h5 file) can be found here: https://nsidc.org/sites/default/files/icesat2_atl03_data_dict_v005.pdf

  1. Reading a group with variables that have 2 dimensions or less produces no issues. E.g Group "gt1l/heights" (documented as /gtx/heights in the PDF) ds = xr.open_dataset("reference://", engine="zarr", backend_kwargs={ "consolidated": False, "storage_options": {"fo": JSON_PATH}, "group": "gt1l/heights" })
  2. Reading a group with a variable that has 3+ dimensions causes the following error. The group "ancillary_data/calibrations/dead_time_radiometric_signal_loss/gt1l" contains variable rad_corr which contains 3 dimensions. ds = xr.open_dataset("reference://", engine="zarr", backend_kwargs={ "consolidated": False, "storage_options": {"fo": JSON_PATH}, "group": "ancillary_data/calibrations/dead_time_radiometric_signal_loss/gt1l" }) ``` { "name": "ValueError", "message": "conflicting sizes for dimension 'phony_dim_1': length 498 on 'width' and length 160 on {'phony_dim_0': 'dead_time', 'phony_dim_1': 'rad_corr', 'phony_dim_2': 'rad_corr'}", "stack": "--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In[2], line 1 ----> 1 ds = xr.open_dataset(\"reference://\", engine=\"zarr\", backend_kwargs={ 2 \"consolidated\": False, 3 \"storage_options\": {\"fo\": JSON_PATH}, 4 \"group\": group_path 5 })

File ~/opt/anaconda3/envs/kerchunkc/lib/python3.8/site-packages/xarray/backends/api.py:539, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, backend_kwargs, kwargs) 527 decoders = _resolve_decoders_kwargs( 528 decode_cf, 529 open_backend_dataset_parameters=backend.open_dataset_parameters, (...) 535 decode_coords=decode_coords, 536 ) 538 overwrite_encoded_chunks = kwargs.pop(\"overwrite_encoded_chunks\", None) --> 539 backend_ds = backend.open_dataset( 540 filename_or_obj, 541 drop_variables=drop_variables, 542 decoders, 543 kwargs, 544 ) 545 ds = _dataset_from_backend_dataset( 546 backend_ds, 547 filename_or_obj, (...) 555 kwargs, 556 ) 557 return ds

File ~/opt/anaconda3/envs/kerchunkc/lib/python3.8/site-packages/xarray/backends/zarr.py:862, in ZarrBackendEntrypoint.open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, group, mode, synchronizer, consolidated, chunk_store, storage_options, stacklevel) 860 store_entrypoint = StoreBackendEntrypoint() 861 with close_on_error(store): --> 862 ds = store_entrypoint.open_dataset( 863 store, 864 mask_and_scale=mask_and_scale, 865 decode_times=decode_times, 866 concat_characters=concat_characters, 867 decode_coords=decode_coords, 868 drop_variables=drop_variables, 869 use_cftime=use_cftime, 870 decode_timedelta=decode_timedelta, 871 ) 872 return ds

File ~/opt/anaconda3/envs/kerchunkc/lib/python3.8/site-packages/xarray/backends/store.py:43, in StoreBackendEntrypoint.open_dataset(self, store, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta) 29 encoding = store.get_encoding() 31 vars, attrs, coord_names = conventions.decode_cf_variables( 32 vars, 33 attrs, (...) 40 decode_timedelta=decode_timedelta, 41 ) ---> 43 ds = Dataset(vars, attrs=attrs) 44 ds = ds.set_coords(coord_names.intersection(vars)) 45 ds.set_close(store.close)

File ~/opt/anaconda3/envs/kerchunkc/lib/python3.8/site-packages/xarray/core/dataset.py:604, in Dataset.init(self, data_vars, coords, attrs) 601 if isinstance(coords, Dataset): 602 coords = coords.variables --> 604 variables, coord_names, dims, indexes, _ = merge_data_and_coords( 605 data_vars, coords, compat=\"broadcast_equals\" 606 ) 608 self._attrs = dict(attrs) if attrs is not None else None 609 self._close = None

File ~/opt/anaconda3/envs/kerchunkc/lib/python3.8/site-packages/xarray/core/merge.py:575, in merge_data_and_coords(data_vars, coords, compat, join) 573 objects = [data_vars, coords] 574 explicit_coords = coords.keys() --> 575 return merge_core( 576 objects, 577 compat, 578 join, 579 explicit_coords=explicit_coords, 580 indexes=Indexes(indexes, coords), 581 )

File ~/opt/anaconda3/envs/kerchunkc/lib/python3.8/site-packages/xarray/core/merge.py:761, in merge_core(objects, compat, join, combine_attrs, priority_arg, explicit_coords, indexes, fill_value) 756 prioritized = _get_priority_vars_and_indexes(aligned, priority_arg, compat=compat) 757 variables, out_indexes = merge_collected( 758 collected, prioritized, compat=compat, combine_attrs=combine_attrs 759 ) --> 761 dims = calculate_dimensions(variables) 763 coord_names, noncoord_names = determine_coords(coerced) 764 if explicit_coords is not None:

File ~/opt/anaconda3/envs/kerchunkc/lib/python3.8/site-packages/xarray/core/variable.py:3208, in calculate_dimensions(variables) 3206 last_used[dim] = k 3207 elif dims[dim] != size: -> 3208 raise ValueError( 3209 f\"conflicting sizes for dimension {dim!r}: \" 3210 f\"length {size} on {k!r} and length {dims[dim]} on {last_used!r}\" 3211 ) 3212 return dims

ValueError: conflicting sizes for dimension 'phony_dim_1': length 498 on 'width' and length 160 on {'phony_dim_0': 'dead_time', 'phony_dim_1': 'rad_corr', 'phony_dim_2': 'rad_corr'}" }

```

  1. Now, contrast with reading the same group with the 3+ dimension variables, but using the direct h5 file path. This does not produce an error ds = xr.open_dataset("/Users/katrinasharonin/Downloads/ATL03_20230816235231_08822014_006_01.h5", group="ancillary_data/calibrations/dead_time_radiometric_signal_loss/gt1l")

The JSON reference file has been attached for reference ATL03_REF_NONUTM.json

{
    "url": "https://api.github.com/repos/pydata/xarray/issues/8590/reactions",
    "total_count": 1,
    "+1": 1,
    "-1": 0,
    "laugh": 0,
    "hooray": 0,
    "confused": 0,
    "heart": 0,
    "rocket": 0,
    "eyes": 0
}
  completed 13221727 issue

Links from other tables

  • 2 rows from issues_id in issues_labels
  • 0 rows from issue in issue_comments
Powered by Datasette · Queries took 0.661ms · About: xarray-datasette