home / github / issue_comments

Menu
  • Search all tables
  • GraphQL API

issue_comments: 740941441

This data as json

html_url issue_url id node_id user created_at updated_at author_association body reactions performed_via_github_app issue
https://github.com/pydata/xarray/issues/4663#issuecomment-740941441 https://api.github.com/repos/pydata/xarray/issues/4663 740941441 MDEyOklzc3VlQ29tbWVudDc0MDk0MTQ0MQ== 2448579 2020-12-08T19:58:51Z 2020-12-08T20:02:55Z MEMBER

Thanks for the great example!

This looks like a duplicate of https://github.com/pydata/xarray/issues/2801. If you agree, can we move the conversation there?

I like using our raise_if_dask_computes context since it points out where the compute is happening

``` python import dask.array as da import numpy as np

from xarray.tests import raise_if_dask_computes

Use a custom array type to know when data is being evaluated

class Array():

def __init__(self, x):
    self.shape = (x.shape[0],)
    self.ndim = x.ndim
    self.dtype = 'bool'
    self.x = x

def __getitem__(self, idx):
    if idx[0].stop > 0:
        print('Evaluating')
    return (self.x > .5).__getitem__(idx)

with raise_if_dask_computes(max_computes=1): ds = xr.Dataset(dict( a=('x', da.from_array(Array(np.random.rand(100)))), b=(('x', 'y'), da.random.random((100, 10))), c=(('x', 'y'), da.random.random((100, 10))), d=(('x', 'y'), da.random.random((100, 10))), )) ds.sel(x=ds.a) ```

```python

RuntimeError Traceback (most recent call last) <ipython-input-76-8efd3a1c3fe5> in <module> 26 d=(('x', 'y'), da.random.random((100, 10))), 27 )) ---> 28 ds.sel(x=ds.a)

/project/mrgoodbar/dcherian/python/xarray/xarray/core/dataset.py in sel(self, indexers, method, tolerance, drop, **indexers_kwargs) 2211 self, indexers=indexers, method=method, tolerance=tolerance 2212 ) -> 2213 result = self.isel(indexers=pos_indexers, drop=drop) 2214 return result._overwrite_indexes(new_indexes) 2215

/project/mrgoodbar/dcherian/python/xarray/xarray/core/dataset.py in isel(self, indexers, drop, missing_dims, **indexers_kwargs) 2058 indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel") 2059 if any(is_fancy_indexer(idx) for idx in indexers.values()): -> 2060 return self._isel_fancy(indexers, drop=drop, missing_dims=missing_dims) 2061 2062 # Much faster algorithm for when all indexers are ints, slices, one-dimensional

/project/mrgoodbar/dcherian/python/xarray/xarray/core/dataset.py in _isel_fancy(self, indexers, drop, missing_dims) 2122 indexes[name] = new_index 2123 elif var_indexers: -> 2124 new_var = var.isel(indexers=var_indexers) 2125 else: 2126 new_var = var.copy(deep=False)

/project/mrgoodbar/dcherian/python/xarray/xarray/core/variable.py in isel(self, indexers, missing_dims, **indexers_kwargs) 1118 1119 key = tuple(indexers.get(dim, slice(None)) for dim in self.dims) -> 1120 return self[key] 1121 1122 def squeeze(self, dim=None):

/project/mrgoodbar/dcherian/python/xarray/xarray/core/variable.py in getitem(self, key) 766 array x.values directly. 767 """ --> 768 dims, indexer, new_order = self._broadcast_indexes(key) 769 data = as_indexable(self._data)[indexer] 770 if new_order:

/project/mrgoodbar/dcherian/python/xarray/xarray/core/variable.py in _broadcast_indexes(self, key) 625 dims.append(d) 626 if len(set(dims)) == len(dims): --> 627 return self._broadcast_indexes_outer(key) 628 629 return self._broadcast_indexes_vectorized(key)

/project/mrgoodbar/dcherian/python/xarray/xarray/core/variable.py in _broadcast_indexes_outer(self, key) 680 k = k.data 681 if not isinstance(k, BASIC_INDEXING_TYPES): --> 682 k = np.asarray(k) 683 if k.size == 0: 684 # Slice by empty list; numpy could not infer the dtype

~/miniconda3/envs/dcpy_old_dask/lib/python3.7/site-packages/numpy/core/_asarray.py in asarray(a, dtype, order) 83 84 """ ---> 85 return array(a, dtype, copy=False, order=order) 86 87

~/miniconda3/envs/dcpy_old_dask/lib/python3.7/site-packages/dask/array/core.py in array(self, dtype, kwargs) 1374 1375 def array(self, dtype=None, kwargs): -> 1376 x = self.compute() 1377 if dtype and x.dtype != dtype: 1378 x = x.astype(dtype)

~/miniconda3/envs/dcpy_old_dask/lib/python3.7/site-packages/dask/base.py in compute(self, kwargs) 165 dask.base.compute 166 """ --> 167 (result,) = compute(self, traverse=False, kwargs) 168 return result 169

~/miniconda3/envs/dcpy_old_dask/lib/python3.7/site-packages/dask/base.py in compute(args, kwargs) 450 postcomputes.append(x.dask_postcompute()) 451 --> 452 results = schedule(dsk, keys, kwargs) 453 return repack([f(r, a) for r, (f, a) in zip(results, postcomputes)]) 454

/project/mrgoodbar/dcherian/python/xarray/xarray/tests/init.py in call(self, dsk, keys, kwargs) 112 raise RuntimeError( 113 "Too many computes. Total: %d > max: %d." --> 114 % (self.total_computes, self.max_computes) 115 ) 116 return dask.get(dsk, keys, kwargs)

RuntimeError: Too many computes. Total: 2 > max: 1. ```

So here it looks like we don't support indexing by dask arrays, so as we loop through the dataset the indexer array gets computed each time. As you say, this should be avoided.

{
    "total_count": 0,
    "+1": 0,
    "-1": 0,
    "laugh": 0,
    "hooray": 0,
    "confused": 0,
    "heart": 0,
    "rocket": 0,
    "eyes": 0
}
  759709924
Powered by Datasette · Queries took 0.919ms · About: xarray-datasette