issue_comments: 740941441

This data as json

html_url	issue_url	id	node_id	user	created_at	updated_at	author_association	body	reactions	performed_via_github_app	issue
https://github.com/pydata/xarray/issues/4663#issuecomment-740941441	https://api.github.com/repos/pydata/xarray/issues/4663	740941441	MDEyOklzc3VlQ29tbWVudDc0MDk0MTQ0MQ==	2448579	2020-12-08T19:58:51Z	2020-12-08T20:02:55Z	MEMBER	Thanks for the great example! This looks like a duplicate of https://github.com/pydata/xarray/issues/2801. If you agree, can we move the conversation there? I like using our `raise_if_dask_computes` context since it points out where the compute is happening ``` python import dask.array as da import numpy as np from xarray.tests import raise_if_dask_computes Use a custom array type to know when data is being evaluated class Array(): `def __init__(self, x): self.shape = (x.shape[0],) self.ndim = x.ndim self.dtype = 'bool' self.x = x def __getitem__(self, idx): if idx[0].stop > 0: print('Evaluating') return (self.x > .5).__getitem__(idx)` with raise_if_dask_computes(max_computes=1): ds = xr.Dataset(dict( a=('x', da.from_array(Array(np.random.rand(100)))), b=(('x', 'y'), da.random.random((100, 10))), c=(('x', 'y'), da.random.random((100, 10))), d=(('x', 'y'), da.random.random((100, 10))), )) ds.sel(x=ds.a) ``` ```python RuntimeError Traceback (most recent call last) <ipython-input-76-8efd3a1c3fe5> in <module> 26 d=(('x', 'y'), da.random.random((100, 10))), 27 )) ---> 28 ds.sel(x=ds.a) /project/mrgoodbar/dcherian/python/xarray/xarray/core/dataset.py in sel(self, indexers, method, tolerance, drop, indexers_kwargs) 2211 self, indexers=indexers, method=method, tolerance=tolerance 2212 ) -> 2213 result = self.isel(indexers=pos_indexers, drop=drop) 2214 return result._overwrite_indexes(new_indexes) 2215 /project/mrgoodbar/dcherian/python/xarray/xarray/core/dataset.py in isel(self, indexers, drop, missing_dims, indexers_kwargs) 2058 indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel") 2059 if any(is_fancy_indexer(idx) for idx in indexers.values()): -> 2060 return self._isel_fancy(indexers, drop=drop, missing_dims=missing_dims) 2061 2062 # Much faster algorithm for when all indexers are ints, slices, one-dimensional /project/mrgoodbar/dcherian/python/xarray/xarray/core/dataset.py in _isel_fancy(self, indexers, drop, missing_dims) 2122 indexes[name] = new_index 2123 elif var_indexers: -> 2124 new_var = var.isel(indexers=var_indexers) 2125 else: 2126 new_var = var.copy(deep=False) /project/mrgoodbar/dcherian/python/xarray/xarray/core/variable.py in isel(self, indexers, missing_dims, indexers_kwargs) 1118 1119 key = tuple(indexers.get(dim, slice(None)) for dim in self.dims) -> 1120 return self[key] 1121 1122 def squeeze(self, dim=None): /project/mrgoodbar/dcherian/python/xarray/xarray/core/variable.py in getitem(self, key) 766 array `x.values` directly. 767 """ --> 768 dims, indexer, new_order = self._broadcast_indexes(key) 769 data = as_indexable(self._data)[indexer] 770 if new_order: /project/mrgoodbar/dcherian/python/xarray/xarray/core/variable.py in _broadcast_indexes(self, key) 625 dims.append(d) 626 if len(set(dims)) == len(dims): --> 627 return self._broadcast_indexes_outer(key) 628 629 return self._broadcast_indexes_vectorized(key) /project/mrgoodbar/dcherian/python/xarray/xarray/core/variable.py in _broadcast_indexes_outer(self, key) 680 k = k.data 681 if not isinstance(k, BASIC_INDEXING_TYPES): --> 682 k = np.asarray(k) 683 if k.size == 0: 684 # Slice by empty list; numpy could not infer the dtype ~/miniconda3/envs/dcpy_old_dask/lib/python3.7/site-packages/numpy/core/_asarray.py in asarray(a, dtype, order) 83 84 """ ---> 85 return array(a, dtype, copy=False, order=order) 86 87 ~/miniconda3/envs/dcpy_old_dask/lib/python3.7/site-packages/dask/array/core.py in array(self, dtype, kwargs) 1374 1375 def array(self, dtype=None, kwargs): -> 1376 x = self.compute() 1377 if dtype and x.dtype != dtype: 1378 x = x.astype(dtype) ~/miniconda3/envs/dcpy_old_dask/lib/python3.7/site-packages/dask/base.py in compute(self, kwargs) 165 dask.base.compute 166 """ --> 167 (result,) = compute(self, traverse=False,** kwargs) 168 return result 169 ~/miniconda3/envs/dcpy_old_dask/lib/python3.7/site-packages/dask/base.py in compute(args, kwargs) 450 postcomputes.append(x.dask_postcompute()) 451 --> 452 results = schedule(dsk, keys, kwargs) 453 return repack([f(r, a) for r, (f, a) in zip(results, postcomputes)]) 454 /project/mrgoodbar/dcherian/python/xarray/xarray/tests/init.py in call(self, dsk, keys, kwargs) 112 raise RuntimeError( 113 "Too many computes. Total: %d > max: %d." --> 114 % (self.total_computes, self.max_computes) 115 ) 116 return dask.get(dsk, keys, kwargs) RuntimeError: Too many computes. Total: 2 > max: 1. ``` So here it looks like we don't support indexing by dask arrays, so as we loop through the dataset the `indexer` array gets computed each time. As you say, this should be avoided.	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }		759709924