issue_comments: 433510805

This data as json

html_url	issue_url	id	node_id	user	created_at	updated_at	author_association	body	reactions	performed_via_github_app	issue
https://github.com/pydata/xarray/issues/1192#issuecomment-433510805	https://api.github.com/repos/pydata/xarray/issues/1192	433510805	MDEyOklzc3VlQ29tbWVudDQzMzUxMDgwNQ==	14314623	2018-10-26T18:59:07Z	2018-10-26T18:59:07Z	CONTRIBUTOR	I should add that I would be happy to work on an implementation, but probably need a good amount of pointers. Here is the implementation that I have been using (only works with dask.arrays at this point). Should have posted that earlier to avoid @rabernat s zingers over here. ```python def aggregate(da, blocks, func=np.nanmean, debug=False): """ Performs efficient block averaging in one or multiple dimensions. Only works on regular grid dimensions. Parameters ---------- da : xarray DataArray (must be a dask array!) blocks : list List of tuples containing the dimension and interval to aggregate over func : function Aggregation function.Defaults to numpy.nanmean Returns ------- da_agg : xarray Data Aggregated array Examples -------- >>> from xarrayutils import aggregate >>> import numpy as np >>> import xarray as xr >>> import matplotlib.pyplot as plt >>> %matplotlib inline >>> import dask.array as da >>> x = np.arange(-10,10) >>> y = np.arange(-10,10) >>> xx,yy = np.meshgrid(x,y) >>> z = xx2-yy2 >>> a = xr.DataArray(da.from_array(z, chunks=(20, 20)), coords={'x':x,'y':y}, dims=['y','x']) >>> print a <xarray.DataArray 'array-7e422c91624f207a5f7ebac426c01769' (y: 20, x: 20)> dask.array<array-7..., shape=(20, 20), dtype=int64, chunksize=(20, 20)> Coordinates: * y (y) int64 -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 * x (x) int64 -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 >>> blocks = [('x',2),('y',5)] >>> a_coarse = aggregate(a,blocks,func=np.mean) >>> print a_coarse <xarray.DataArray 'array-7e422c91624f207a5f7ebac426c01769' (y: 2, x: 10)> dask.array<coarsen..., shape=(2, 10), dtype=float64, chunksize=(2, 10)> Coordinates: * y (y) int64 -10 0 * x (x) int64 -10 -8 -6 -4 -2 0 2 4 6 8 Attributes: Coarsened with: <function mean at 0x111754230> Coarsenblocks: [('x', 2), ('y', 10)] """ # Check if the input is a dask array (I might want to convert this # automaticlaly in the future) if not isinstance(da.data, Array): raise RuntimeError('data array data must be a dask array') # Check data type of blocks # TODO write test if (not all(isinstance(n[0], str) for n in blocks) or not all(isinstance(n[1], int) for n in blocks)): print('blocks input', str(blocks)) raise RuntimeError("block dimension must be dtype(str), \ e.g. ('lon',4)") # Check if the given array has the dimension specified in blocks try: block_dict = dict((da.get_axis_num(x), y) for x, y in blocks) except ValueError: raise RuntimeError("'blocks' contains non matching dimension") # Check the size of the excess in each aggregated axis blocks = [(a[0], a[1], da.shape[da.get_axis_num(a[0])] % a[1]) for a in blocks] # for now default to trimming the excess da_coarse = coarsen(func, da.data, block_dict, trim_excess=True) # for now default to only the dims new_coords = dict([]) # for cc in da.coords.keys(): warnings.warn("WARNING: only dimensions are carried over as coordinates") for cc in list(da.dims): new_coords[cc] = da.coords[cc] for dd in blocks: if dd[0] in list(da.coords[cc].dims): new_coords[cc] = \ new_coords[cc].isel( **{dd[0]: slice(0, -(1 + dd[2]), dd[1])}) attrs = {'Coarsened with': str(func), 'Coarsenblocks': str(blocks)} da_coarse = xr.DataArray(da_coarse, dims=da.dims, coords=new_coords, name=da.name, attrs=attrs) return da_coarse ```	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }		198742089