id,node_id,number,title,user,state,locked,assignee,milestone,comments,created_at,updated_at,closed_at,author_association,active_lock_reason,draft,pull_request,body,reactions,performed_via_github_app,state_reason,repo,type
210651795,MDU6SXNzdWUyMTA2NTE3OTU=,1287,Groupby method on larger files fails unless explicitly load data in 0.9.1,7926249,closed,0,,,3,2017-02-28T00:37:53Z,2020-04-11T16:19:53Z,2020-04-11T16:19:52Z,NONE,,,,"Below x is from a 4.2GB file opened with engine='h5netcdf'.  The following groupby method was working without explicit load() before in v0.8.2, but now fails unless explicitly load either the entire object or the var of interest.

```Python-traceback
In [52]: x.EBeam_ebeamPhotonEnergy.groupby('step').mean()
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-52-e0f4fcacc6a2> in <module>()
----> 1 x.EBeam_ebeamPhotonEnergy.groupby('step').mean()

/reg/g/psdm/sw/conda/inst/miniconda2-prod-rhel7/envs/ana-1.2.1/lib/python2.7/site-packages/xarray/core/common.pyc in wrapped_func(self, dim, axis, skipna, keep_attrs, **kwargs)
     17                              keep_attrs=False, **kwargs):
     18                 return self.reduce(func, dim, axis, keep_attrs=keep_attrs,
---> 19                                    skipna=skipna, allow_lazy=True, **kwargs)
     20         else:
     21             def wrapped_func(self, dim=None, axis=None, keep_attrs=False,

/reg/g/psdm/sw/conda/inst/miniconda2-prod-rhel7/envs/ana-1.2.1/lib/python2.7/site-packages/xarray/core/groupby.pyc in reduce(self, func, dim, axis, keep_attrs, shortcut, **kwargs)
    577         def reduce_array(ar):
    578             return ar.reduce(func, dim, axis, keep_attrs=keep_attrs, **kwargs)
--> 579         return self.apply(reduce_array, shortcut=shortcut)
    580 
    581 ops.inject_reduce_methods(DataArrayGroupBy)

/reg/g/psdm/sw/conda/inst/miniconda2-prod-rhel7/envs/ana-1.2.1/lib/python2.7/site-packages/xarray/core/groupby.pyc in apply(self, func, shortcut, **kwargs)
    521         applied = (maybe_wrap_array(arr, func(arr, **kwargs))
    522                    for arr in grouped)
--> 523         return self._combine(applied, shortcut=shortcut)
    524 
    525     def _combine(self, applied, shortcut=False):

/reg/g/psdm/sw/conda/inst/miniconda2-prod-rhel7/envs/ana-1.2.1/lib/python2.7/site-packages/xarray/core/groupby.pyc in _combine(self, applied, shortcut)
    525     def _combine(self, applied, shortcut=False):
    526         """"""Recombine the applied objects like the original.""""""
--> 527         applied_example, applied = peek_at(applied)
    528         coord, dim, positions = self._infer_concat_args(applied_example)
    529         if shortcut:

/reg/g/psdm/sw/conda/inst/miniconda2-prod-rhel7/envs/ana-1.2.1/lib/python2.7/site-packages/xarray/core/utils.pyc in peek_at(iterable)
    112     """"""
    113     gen = iter(iterable)
--> 114     peek = next(gen)
    115     return peek, itertools.chain([peek], gen)
    116 

/reg/g/psdm/sw/conda/inst/miniconda2-prod-rhel7/envs/ana-1.2.1/lib/python2.7/site-packages/xarray/core/groupby.pyc in <genexpr>((arr,))
    520             grouped = self._iter_grouped()
    521         applied = (maybe_wrap_array(arr, func(arr, **kwargs))
--> 522                    for arr in grouped)
    523         return self._combine(applied, shortcut=shortcut)
    524 

/reg/g/psdm/sw/conda/inst/miniconda2-prod-rhel7/envs/ana-1.2.1/lib/python2.7/site-packages/xarray/core/groupby.pyc in reduce_array(ar)
    576         """"""
    577         def reduce_array(ar):
--> 578             return ar.reduce(func, dim, axis, keep_attrs=keep_attrs, **kwargs)
    579         return self.apply(reduce_array, shortcut=shortcut)
    580 

/reg/g/psdm/sw/conda/inst/miniconda2-prod-rhel7/envs/ana-1.2.1/lib/python2.7/site-packages/xarray/core/variable.pyc in reduce(self, func, dim, axis, keep_attrs, allow_lazy, **kwargs)
    902         if dim is not None:
    903             axis = self.get_axis_num(dim)
--> 904         data = func(self.data if allow_lazy else self.values,
    905                     axis=axis, **kwargs)
    906 

/reg/g/psdm/sw/conda/inst/miniconda2-prod-rhel7/envs/ana-1.2.1/lib/python2.7/site-packages/xarray/core/variable.pyc in data(self)
    263             return self._data
    264         else:
--> 265             return self.values
    266 
    267     @data.setter

/reg/g/psdm/sw/conda/inst/miniconda2-prod-rhel7/envs/ana-1.2.1/lib/python2.7/site-packages/xarray/core/variable.pyc in values(self)
    304     def values(self):
    305         """"""The variable's data as a numpy.ndarray""""""
--> 306         return _as_array_or_item(self._data)
    307 
    308     @values.setter

/reg/g/psdm/sw/conda/inst/miniconda2-prod-rhel7/envs/ana-1.2.1/lib/python2.7/site-packages/xarray/core/variable.pyc in _as_array_or_item(data)
    180     TODO: remove this (replace with np.asarray) once these issues are fixed
    181     """"""
--> 182     data = np.asarray(data)
    183     if data.ndim == 0:
    184         if data.dtype.kind == 'M':

/reg/g/psdm/sw/conda/inst/miniconda2-prod-rhel7/envs/ana-1.2.1/lib/python2.7/site-packages/numpy/core/numeric.pyc in asarray(a, dtype, order)
    480 
    481     """"""
--> 482     return array(a, dtype, copy=False, order=order)
    483 
    484 def asanyarray(a, dtype=None, order=None):

/reg/g/psdm/sw/conda/inst/miniconda2-prod-rhel7/envs/ana-1.2.1/lib/python2.7/site-packages/xarray/core/indexing.pyc in __array__(self, dtype)
    415 
    416     def __array__(self, dtype=None):
--> 417         self._ensure_cached()
    418         return np.asarray(self.array, dtype=dtype)
    419 

/reg/g/psdm/sw/conda/inst/miniconda2-prod-rhel7/envs/ana-1.2.1/lib/python2.7/site-packages/xarray/core/indexing.pyc in _ensure_cached(self)
    412     def _ensure_cached(self):
    413         if not isinstance(self.array, np.ndarray):
--> 414             self.array = np.asarray(self.array)
    415 
    416     def __array__(self, dtype=None):

/reg/g/psdm/sw/conda/inst/miniconda2-prod-rhel7/envs/ana-1.2.1/lib/python2.7/site-packages/numpy/core/numeric.pyc in asarray(a, dtype, order)
    480 
    481     """"""
--> 482     return array(a, dtype, copy=False, order=order)
    483 
    484 def asanyarray(a, dtype=None, order=None):

/reg/g/psdm/sw/conda/inst/miniconda2-prod-rhel7/envs/ana-1.2.1/lib/python2.7/site-packages/xarray/core/indexing.pyc in __array__(self, dtype)
    396 
    397     def __array__(self, dtype=None):
--> 398         return np.asarray(self.array, dtype=dtype)
    399 
    400     def __getitem__(self, key):

/reg/g/psdm/sw/conda/inst/miniconda2-prod-rhel7/envs/ana-1.2.1/lib/python2.7/site-packages/numpy/core/numeric.pyc in asarray(a, dtype, order)
    480 
    481     """"""
--> 482     return array(a, dtype, copy=False, order=order)
    483 
    484 def asanyarray(a, dtype=None, order=None):

/reg/g/psdm/sw/conda/inst/miniconda2-prod-rhel7/envs/ana-1.2.1/lib/python2.7/site-packages/xarray/core/indexing.pyc in __array__(self, dtype)
    371     def __array__(self, dtype=None):
    372         array = orthogonally_indexable(self.array)
--> 373         return np.asarray(array[self.key], dtype=None)
    374 
    375     def __getitem__(self, key):

/reg/g/psdm/sw/conda/inst/miniconda2-prod-rhel7/envs/ana-1.2.1/lib/python2.7/site-packages/numpy/core/numeric.pyc in asarray(a, dtype, order)
    480 
    481     """"""
--> 482     return array(a, dtype, copy=False, order=order)
    483 
    484 def asanyarray(a, dtype=None, order=None):

/reg/g/psdm/sw/conda/inst/miniconda2-prod-rhel7/envs/ana-1.2.1/lib/python2.7/site-packages/xarray/core/indexing.pyc in __array__(self, dtype)
    371     def __array__(self, dtype=None):
    372         array = orthogonally_indexable(self.array)
--> 373         return np.asarray(array[self.key], dtype=None)
    374 
    375     def __getitem__(self, key):

/reg/g/psdm/sw/conda/inst/miniconda2-prod-rhel7/envs/ana-1.2.1/lib/python2.7/site-packages/xarray/core/utils.pyc in __getitem__(self, key)
    418 
    419     def __getitem__(self, key):
--> 420         return self.array[key]
    421 
    422     def __repr__(self):

/reg/g/psdm/sw/conda/inst/miniconda2-prod-rhel7/envs/ana-1.2.1/lib/python2.7/site-packages/h5netcdf/core.pyc in __getitem__(self, key)
     96 
     97     def __getitem__(self, key):
---> 98         return self._h5ds[key]
     99 
    100     def __setitem__(self, key, value):

h5py/_objects.pyx in h5py._objects.with_phil.wrapper (/reg/g/psdm/sw/conda/inst/miniconda2-dev-rhel7/conda-bld/h5py_1484088818009/work/h5py-2.7.0rc2/h5py/_objects.c:3185)()

h5py/_objects.pyx in h5py._objects.with_phil.wrapper (/reg/g/psdm/sw/conda/inst/miniconda2-dev-rhel7/conda-bld/h5py_1484088818009/work/h5py-2.7.0rc2/h5py/_objects.c:3143)()

/reg/g/psdm/sw/conda/inst/miniconda2-prod-rhel7/envs/ana-1.2.1/lib/python2.7/site-packages/h5py/_hl/dataset.pyc in __getitem__(self, args)
    472 
    473         # Perform the dataspace selection.
--> 474         selection = sel.select(self.shape, args, dsid=self.id)
    475 
    476         if selection.nselect == 0:

/reg/g/psdm/sw/conda/inst/miniconda2-prod-rhel7/envs/ana-1.2.1/lib/python2.7/site-packages/h5py/_hl/selections.pyc in select(shape, args, dsid)
     70         elif isinstance(arg, np.ndarray):
     71             sel = PointSelection(shape)
---> 72             sel[arg]
     73             return sel
     74 

/reg/g/psdm/sw/conda/inst/miniconda2-prod-rhel7/envs/ana-1.2.1/lib/python2.7/site-packages/h5py/_hl/selections.pyc in __getitem__(self, arg)
    210         """""" Perform point-wise selection from a NumPy boolean array """"""
    211         if not (isinstance(arg, np.ndarray) and arg.dtype.kind == 'b'):
--> 212             raise TypeError(""PointSelection __getitem__ only works with bool arrays"")
    213         if not arg.shape == self.shape:
    214             raise TypeError(""Boolean indexing array has incompatible shape"")

TypeError: PointSelection __getitem__ only works with bool arrays

In [55]: x.EBeam_ebeamPhotonEnergy.load().groupby('step').mean()
Out[55]: 
<xarray.DataArray 'EBeam_ebeamPhotonEnergy' (step: 65)>
array([ 9045.704225,  9045.543454,  9044.706383,  9048.093055,  9045.000397,
        9043.704296,  9047.318191,  9047.263165,  9044.936176,  9045.865424,
        9041.683054,  9043.722705,  9044.589087,  9044.032681,  9045.478713,
        9043.710082,  9042.061361,  9043.566752,  9045.868629,  9042.725552,
        9043.797367,  9042.54982 ,  9042.713473,  9042.841264,  9042.689603,
        9043.722533,  9045.750076,  9046.675932,  9045.879081,  9047.069358,
        9044.710387,  9044.921831,  9044.884213,  9045.407495,  9043.409401,
        9044.276054,  9050.751243,  9042.902832,  9045.744036,  9042.485437,
        9042.760976,  9044.195124,  9042.054604,  9042.274026,  9042.602458,
        9045.287399,  9044.173708,  9043.708038,  9043.264793,  9047.872945,
        9044.193639,  9044.424931,  9044.065687,  9043.453713,  9045.404021,
        9042.907949,  9045.248006,  9045.170088,  9045.345617,  9046.065199,
        9045.846606,  9041.401112,  9042.031974,  9043.363107,  9041.911583])
Coordinates:
  * step     (step) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 ...
```","{""url"": ""https://api.github.com/repos/pydata/xarray/issues/1287/reactions"", ""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",,completed,13221727,issue