issues: 1538181776
This data as json
id | node_id | number | title | user | state | locked | assignee | milestone | comments | created_at | updated_at | closed_at | author_association | active_lock_reason | draft | pull_request | body | reactions | performed_via_github_app | state_reason | repo | type |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1538181776 | I_kwDOAMm_X85brsqQ | 7452 | Vectorizing behaviour of apply_ufunc with dask="allowed" and vectorize=True depends on whether dask array is loaded | 33802211 | open | 0 | 0 | 2023-01-18T15:19:41Z | 2023-03-06T17:55:24Z | NONE | What happened?I have an Now, whether this works as intended depends on whether I preload the array:
- If I run this code with Is this a bug? Do dask arrays need some other arguments set in xr.apply_ufunc to resolve this issue? I didn't find the solution to this issue in the documentation. What did you expect to happen?Same vectorizing behaviour whether the array is loaded or not. Minimal Complete Verifiable Example```Python from cftime import Datetime360Day import xarray as xr import numpy as np def cf360day_to_days(da_tcrd): year, month, day = np.array([[c.year, c.month, c.day] for c in da_tcrd]).T return (year - year[0]) * 360 + (month - 1) * 30 + day def minimal_1d(dur_data, dur_time): print("dur_data shape:\n", dur_data.shape) dur_end_day = cf360day_to_days(dur_time) data_mask = np.isfinite(dur_data) dur_end_day = dur_end_day[data_mask] return dur_end_day y0 = 1981 times = [Datetime360Day(y // 360 + y0, y % 360 // 30 + 1, y % 30 + 1) for y in np.arange(400)] # included as this datatype has caused problems before data = xr.DataArray( data=np.random.randn(4,4,400), dims=["q", "structure", "time"], coords={ "q": ['0.01', '0.05', '0.10', '0.20'], "structure": ['A', 'B', 'C', 'D'], "time": times} ).chunk({"time": -1, "q": 1, "structure": 1}) res = xr.apply_ufunc( minimal_1d, data, #.load(), data["time"], input_core_dims=[["time"], ["time"]], output_core_dims=(("time", ),), dask="allowed", vectorize=False, output_dtypes=[float], ) ``` MVCE confirmation
Relevant log output```Python dur_data shape: (4, 4, 400) IndexError Traceback (most recent call last) Input In [181], in <cell line: 15>() 2 times = [Datetime360Day(y // 360 + 1981, 3 y % 360 // 30 + 1, 4 y % 30 + 1) 5 for y in np.arange(400)] 6 data = xr.DataArray( 7 data=np.random.randn(4,4,400), 8 dims=["q", "structure", "time"], (...) 12 "time": times} 13 ).chunk({"time": -1, "q": 1, "structure": 1}) ---> 15 res = xr.apply_ufunc( 16 minimal_1d, 17 data, #.load(), 18 data["time"], 19 input_core_dims=[["time"], ["time"]], 20 output_core_dims=(("time", ),), 21 dask="allowed", vectorize=False, 22 output_dtypes=[float], 23 ) File ~/.conda/envs/new-ada-env/lib/python3.8/site-packages/xarray/core/computation.py:1159, in apply_ufunc(func, input_core_dims, output_core_dims, exclude_dims, vectorize, join, dataset_join, dataset_fill_value, keep_attrs, kwargs, dask, output_dtypes, output_sizes, meta, dask_gufunc_kwargs, args) 1157 # feed DataArray apply_variable_ufunc through apply_dataarray_vfunc 1158 elif any(isinstance(a, DataArray) for a in args): -> 1159 return apply_dataarray_vfunc( 1160 variables_vfunc, 1161 args, 1162 signature=signature, 1163 join=join, 1164 exclude_dims=exclude_dims, 1165 keep_attrs=keep_attrs, 1166 ) 1167 # feed Variables directly through apply_variable_ufunc 1168 elif any(isinstance(a, Variable) for a in args): File ~/.conda/envs/new-ada-env/lib/python3.8/site-packages/xarray/core/computation.py:286, in apply_dataarray_vfunc(func, signature, join, exclude_dims, keep_attrs, args) 281 result_coords = build_output_coords( 282 args, signature, exclude_dims, combine_attrs=keep_attrs 283 ) 285 data_vars = [getattr(a, "variable", a) for a in args] --> 286 result_var = func(data_vars) 288 if signature.num_outputs > 1: 289 out = tuple( 290 DataArray(variable, coords, name=name, fastpath=True) 291 for variable, coords in zip(result_var, result_coords) 292 ) File ~/.conda/envs/new-ada-env/lib/python3.8/site-packages/xarray/core/computation.py:727, in apply_variable_ufunc(func, signature, exclude_dims, dask, output_dtypes, vectorize, keep_attrs, dask_gufunc_kwargs, args) 722 if vectorize: 723 func = _vectorize( 724 func, signature, output_dtypes=output_dtypes, exclude_dims=exclude_dims 725 ) --> 727 result_data = func(input_data) 729 if signature.num_outputs == 1: 730 result_data = (result_data,) Input In [174], in minimal_1d(dur_data, dur_time) 65 dur_end_day = cf360day_to_days(dur_time) 66 data_mask = np.isfinite(dur_data) ---> 67 dur_end_day = dur_end_day[data_mask] 69 return dur_end_day IndexError: too many indices for array: array is 1-dimensional, but 3 were indexed ``` Anything else we need to know?No response Environment
INSTALLED VERSIONS
------------------
commit: None
python: 3.8.15 | packaged by conda-forge | (default, Nov 22 2022, 08:49:35)
[GCC 10.4.0]
python-bits: 64
OS: Linux
OS-release: 3.10.0-1160.45.1.el7.x86_64
machine: x86_64
processor: x86_64
byteorder: little
LC_ALL: None
LANG: en_GB.UTF-8
LOCALE: ('en_GB', 'UTF-8')
libhdf5: 1.12.2
libnetcdf: 4.8.1
xarray: 2022.3.0
pandas: 1.2.3
numpy: 1.23.5
scipy: 1.9.3
netCDF4: 1.6.2
pydap: None
h5netcdf: None
h5py: None
Nio: None
zarr: None
cftime: 1.6.0
nc_time_axis: 1.4.1
PseudoNetCDF: None
rasterio: 1.3.4
cfgrib: None
iris: 3.1.0
bottleneck: 1.3.2
dask: 2022.05.0
distributed: 2022.5.0
matplotlib: 3.6.2
cartopy: 0.21.1
seaborn: 0.11.1
numbagg: None
fsspec: 2022.11.0
cupy: None
pint: None
sparse: None
setuptools: 59.8.0
pip: 22.3.1
conda: None
pytest: None
IPython: 8.4.0
sphinx: None
|
{ "url": "https://api.github.com/repos/pydata/xarray/issues/7452/reactions", "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 } |
13221727 | issue |