home / github / issue_comments

Menu
  • GraphQL API
  • Search all tables

issue_comments: 433510805

This data as json

html_url issue_url id node_id user created_at updated_at author_association body reactions performed_via_github_app issue
https://github.com/pydata/xarray/issues/1192#issuecomment-433510805 https://api.github.com/repos/pydata/xarray/issues/1192 433510805 MDEyOklzc3VlQ29tbWVudDQzMzUxMDgwNQ== 14314623 2018-10-26T18:59:07Z 2018-10-26T18:59:07Z CONTRIBUTOR

I should add that I would be happy to work on an implementation, but probably need a good amount of pointers.

Here is the implementation that I have been using (only works with dask.arrays at this point).

Should have posted that earlier to avoid @rabernat s zingers over here. ```python def aggregate(da, blocks, func=np.nanmean, debug=False): """ Performs efficient block averaging in one or multiple dimensions. Only works on regular grid dimensions. Parameters ---------- da : xarray DataArray (must be a dask array!) blocks : list List of tuples containing the dimension and interval to aggregate over func : function Aggregation function.Defaults to numpy.nanmean Returns ------- da_agg : xarray Data Aggregated array Examples -------- >>> from xarrayutils import aggregate >>> import numpy as np >>> import xarray as xr >>> import matplotlib.pyplot as plt >>> %matplotlib inline >>> import dask.array as da >>> x = np.arange(-10,10) >>> y = np.arange(-10,10) >>> xx,yy = np.meshgrid(x,y) >>> z = xx2-yy2 >>> a = xr.DataArray(da.from_array(z, chunks=(20, 20)), coords={'x':x,'y':y}, dims=['y','x']) >>> print a <xarray.DataArray 'array-7e422c91624f207a5f7ebac426c01769' (y: 20, x: 20)> dask.array<array-7..., shape=(20, 20), dtype=int64, chunksize=(20, 20)> Coordinates: * y (y) int64 -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 * x (x) int64 -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 >>> blocks = [('x',2),('y',5)] >>> a_coarse = aggregate(a,blocks,func=np.mean) >>> print a_coarse <xarray.DataArray 'array-7e422c91624f207a5f7ebac426c01769' (y: 2, x: 10)> dask.array<coarsen..., shape=(2, 10), dtype=float64, chunksize=(2, 10)> Coordinates: * y (y) int64 -10 0 * x (x) int64 -10 -8 -6 -4 -2 0 2 4 6 8 Attributes: Coarsened with: <function mean at 0x111754230> Coarsenblocks: [('x', 2), ('y', 10)] """ # Check if the input is a dask array (I might want to convert this # automaticlaly in the future) if not isinstance(da.data, Array): raise RuntimeError('data array data must be a dask array') # Check data type of blocks # TODO write test if (not all(isinstance(n[0], str) for n in blocks) or not all(isinstance(n[1], int) for n in blocks)):

    print('blocks input', str(blocks))
    raise RuntimeError("block dimension must be dtype(str), \
    e.g. ('lon',4)")

# Check if the given array has the dimension specified in blocks
try:
    block_dict = dict((da.get_axis_num(x), y) for x, y in blocks)
except ValueError:
    raise RuntimeError("'blocks' contains non matching dimension")

# Check the size of the excess in each aggregated axis
blocks = [(a[0], a[1], da.shape[da.get_axis_num(a[0])] % a[1])
          for a in blocks]

# for now default to trimming the excess
da_coarse = coarsen(func, da.data, block_dict, trim_excess=True)

# for now default to only the dims
new_coords = dict([])
# for cc in da.coords.keys():
warnings.warn("WARNING: only dimensions are carried over as coordinates")
for cc in list(da.dims):
    new_coords[cc] = da.coords[cc]
    for dd in blocks:
        if dd[0] in list(da.coords[cc].dims):
            new_coords[cc] = \
                new_coords[cc].isel(
                    **{dd[0]: slice(0, -(1 + dd[2]), dd[1])})

attrs = {'Coarsened with': str(func), 'Coarsenblocks': str(blocks)}
da_coarse = xr.DataArray(da_coarse, dims=da.dims, coords=new_coords,
                         name=da.name, attrs=attrs)
return da_coarse

```

{
    "total_count": 0,
    "+1": 0,
    "-1": 0,
    "laugh": 0,
    "hooray": 0,
    "confused": 0,
    "heart": 0,
    "rocket": 0,
    "eyes": 0
}
  198742089
Powered by Datasette · Queries took 0.651ms · About: xarray-datasette