issues: 279909699
This data as json
id | node_id | number | title | user | state | locked | assignee | milestone | comments | created_at | updated_at | closed_at | author_association | active_lock_reason | draft | pull_request | body | reactions | performed_via_github_app | state_reason | repo | type |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
279909699 | MDU6SXNzdWUyNzk5MDk2OTk= | 1765 | Error when using .apply_ufunc with .groupby_bins | 14314623 | closed | 0 | 7 | 2017-12-06T21:17:34Z | 2020-03-25T15:31:02Z | 2020-03-25T15:31:01Z | CONTRIBUTOR | I am trying to create a function that applies a .groupby_bins operation over specified dimensions of a xarray dataset. E.g. I want to be able to sum temperture, salinity and other values grouped by oceanic oxygen concentrations. I want to be able to be flexible over which dimensions I apply the groupby_bins operation. For instance, I would like to apply it in every depth colum (resulting in an array of (x,y,time) but also over all spatial dimensions, resulting in a timeseries. I currently run into a strange error when I try the following. Code Sample, a copy-pastable example if possible
```python def _func(data, bin_data, bins): """Group unlabeled array 'data' according to values in 'bin_data' using bins defined in 'bins' and sum all values""" labels = bins[1:] da_data = xr.DataArray(data, name='data') da_bin_data = xr.DataArray(bin_data, name='bin_data')
def wrapper(obj, bin_obj, bins, dims): obj = obj.copy() bin_obj = bin_obj.copy() n_bins = len(bins)-1
Groupby bins problem with small bins?x_raw = np.arange(20) y_raw = np.arange(10) z_raw = np.arange(15) x = xr.DataArray(dsa.from_array(x_raw, chunks=(-1)), dims=['x'], coords={'x':('x', x_raw)}) y = xr.DataArray(dsa.from_array(y_raw, chunks=(-1)), dims=['y'], coords={'y':('y', y_raw)}) z = xr.DataArray(dsa.from_array(z_raw, chunks=(-1)), dims=['z'], coords={'z':('z', z_raw)}) da = xr.DataArray(dsa.ones([20, 10, 15], chunks=[-1, -1, -1]), dims=['x', 'y', 'z'], coords={ 'x':x, 'y':y, 'z':z }) da ```
```python bins = np.arange(0,30,1) apply private function on unlabled arraybinned_data = _func(da.data, da.data, bins)
print(binned_data.compute())
```ValueError Traceback (most recent call last) <ipython-input-13-05a5150badbf> in <module>() 1 binned_full = wrapper(da, da, bins, dims=['x','y','z']) ----> 2 print(binned_full.data.compute()) ~/miniconda/envs/standard/lib/python3.6/site-packages/dask/base.py in compute(self, kwargs) 133 dask.base.compute 134 """ --> 135 (result,) = compute(self, traverse=False, kwargs) 136 return result 137 ~/miniconda/envs/standard/lib/python3.6/site-packages/dask/base.py in compute(args, kwargs) 331 postcomputes = [a.dask_postcompute() if is_dask_collection(a) 332 else (None, a) for a in args] --> 333 results = get(dsk, keys, kwargs) 334 results_iter = iter(results) 335 return tuple(a if f is None else f(next(results_iter), a) ~/miniconda/envs/standard/lib/python3.6/site-packages/dask/threaded.py in get(dsk, result, cache, num_workers, kwargs) 73 results = get_async(pool.apply_async, len(pool._pool), dsk, result, 74 cache=cache, get_id=_thread_get_id, ---> 75 pack_exception=pack_exception, kwargs) 76 77 # Cleanup pools associated to dead threads ~/miniconda/envs/standard/lib/python3.6/site-packages/dask/local.py in get_async(apply_async, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, **kwargs) 519 _execute_task(task, data) # Re-execute locally 520 else: --> 521 raise_exception(exc, tb) 522 res, worker_id = loads(res_info) 523 state['cache'][key] = res ~/miniconda/envs/standard/lib/python3.6/site-packages/dask/compatibility.py in reraise(exc, tb) 58 if exc.traceback is not tb: 59 raise exc.with_traceback(tb) ---> 60 raise exc 61 62 else: ~/miniconda/envs/standard/lib/python3.6/site-packages/dask/local.py in execute_task(key, task_info, dumps, loads, get_id, pack_exception) 288 try: 289 task, data = loads(task_info) --> 290 result = _execute_task(task, data) 291 id = get_id() 292 result = dumps((result, id)) ~/miniconda/envs/standard/lib/python3.6/site-packages/dask/local.py in _execute_task(arg, cache, dsk) 269 func, args = arg[0], arg[1:] 270 args2 = [_execute_task(a, cache) for a in args] --> 271 return func(*args2) 272 elif not ishashable(arg): 273 return arg ~/miniconda/envs/standard/lib/python3.6/site-packages/numpy/lib/function_base.py in call(self, args, *kwargs) 2737 vargs.extend([kwargs[_n] for _n in names]) 2738 -> 2739 return self._vectorize_call(func=func, args=vargs) 2740 2741 def _get_ufunc_and_otypes(self, func, args): ~/miniconda/envs/standard/lib/python3.6/site-packages/numpy/lib/function_base.py in _vectorize_call(self, func, args)
2803 """Vectorized call to ~/miniconda/envs/standard/lib/python3.6/site-packages/numpy/lib/function_base.py in _vectorize_call_with_signature(self, func, args) 2844 2845 for index in np.ndindex(broadcast_shape): -> 2846 results = func((arg[index] for arg in args)) 2847 2848 n_results = len(results) if isinstance(results, tuple) else 1 <ipython-input-5-6d60d3b0b704> in _func(data, bin_data, bins) 7 8 binned = da_data.groupby_bins(da_bin_data, bins, labels=labels, ----> 9 include_lowest=True).sum() 10 return binned 11 ~/Work/CODE/PYTHON/xarray/xarray/core/common.py in groupby_bins(self, group, bins, right, labels, precision, include_lowest, squeeze) 466 cut_kwargs={'right': right, 'labels': labels, 467 'precision': precision, --> 468 'include_lowest': include_lowest}) 469 470 def rolling(self, min_periods=None, center=False, **windows): ~/Work/CODE/PYTHON/xarray/xarray/core/groupby.py in init(self, obj, group, squeeze, grouper, bins, cut_kwargs) 225 226 if bins is not None: --> 227 binned = pd.cut(group.values, bins, **cut_kwargs) 228 new_dim_name = group.name + '_bins' 229 group = DataArray(binned, group.coords, name=new_dim_name) ~/miniconda/envs/standard/lib/python3.6/site-packages/pandas/core/reshape/tile.py in cut(x, bins, right, labels, retbins, precision, include_lowest) 134 precision=precision, 135 include_lowest=include_lowest, --> 136 dtype=dtype) 137 138 return _postprocess_for_cut(fac, bins, retbins, x_is_series, ~/miniconda/envs/standard/lib/python3.6/site-packages/pandas/core/reshape/tile.py in _bins_to_cuts(x, bins, right, labels, precision, include_lowest, dtype, duplicates) 227 return result, bins 228 --> 229 unique_bins = algos.unique(bins) 230 if len(unique_bins) < len(bins) and len(bins) != 2: 231 if duplicates == 'raise': ~/miniconda/envs/standard/lib/python3.6/site-packages/pandas/core/algorithms.py in unique(values) 362 363 table = htable(len(values)) --> 364 uniques = table.unique(values) 365 uniques = _reconstruct_data(uniques, dtype, original) 366 pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.unique() ~/miniconda/envs/standard/lib/python3.6/site-packages/pandas/_libs/hashtable.cpython-36m-darwin.so in View.MemoryView.memoryview_cwrapper() ~/miniconda/envs/standard/lib/python3.6/site-packages/pandas/_libs/hashtable.cpython-36m-darwin.so in View.MemoryView.memoryview.cinit() ValueError: buffer source array is read-only ``` This error only gets triggered upon computation. Problem descriptionI am not sure If this is a bug or a user error on my side. I am still trying to get used to I am not sure if the rewrapping in xr.DataArrays in Output of
|
{ "url": "https://api.github.com/repos/pydata/xarray/issues/1765/reactions", "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 } |
completed | 13221727 | issue |