html_url,issue_url,id,node_id,user,created_at,updated_at,author_association,body,reactions,performed_via_github_app,issue https://github.com/pydata/xarray/issues/2699#issuecomment-456999707,https://api.github.com/repos/pydata/xarray/issues/2699,456999707,MDEyOklzc3VlQ29tbWVudDQ1Njk5OTcwNw==,1217238,2019-01-23T22:58:50Z,2019-01-23T23:13:59Z,MEMBER,"I think this will work (though it needs more tests): ```python import bottleneck import dask.array as da import numpy as np def _last_element(array, axis): slices = [slice(None)] * array.ndim slices[axis] = slice(-1, None) return array[tuple(slices)] def _concat_push_slice(last_elements, array, axis): concatenated = np.concatenate([last_elements, array], axis=axis) pushed = bottleneck.push(concatenated, axis=axis) slices = [slice(None)] * array.ndim slices[axis] = slice(1, None) sliced = pushed[tuple(slices)] return sliced def push(array, axis): if axis < 0: axis += array.ndim pushed = array.map_blocks(bottleneck.push, dtype=array.dtype, axis=axis) new_chunks = list(array.chunks) new_chunks[axis] = tuple(1 for _ in array.chunks[axis]) last_elements = pushed.map_blocks( _last_element, dtype=array.dtype, chunks=tuple(new_chunks), axis=axis) pushed_last_elements = ( last_elements.rechunk({axis: -1}) .map_blocks(bottleneck.push, dtype=array.dtype, axis=axis) .rechunk({axis: 1}) ) nan_shape = tuple(1 if axis == a else s for a, s in enumerate(array.shape)) nan_chunks = tuple((1,) if axis == a else c for a, c in enumerate(array.chunks)) shifted_pushed_last_elements = da.concatenate( [da.full(np.nan, shape=nan_shape, chunks=nan_chunks), pushed_last_elements[(slice(None),) * axis + (slice(None, -1),)]], axis=axis) return da.map_blocks( _concat_push_slice, shifted_pushed_last_elements, pushed, dtype=array.dtype, chunks=array.chunks, axis=axis, ) # tests array = np.array([np.nan, np.nan, np.nan, 1, 2, 3, np.nan, np.nan, 4, 5, np.nan, 6]) expected = bottleneck.push(array, axis=0) for c in range(1, 11): actual = push(da.from_array(array, chunks=c), axis=0).compute() np.testing.assert_equal(actual, expected) ```","{""total_count"": 3, ""+1"": 3, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",,402413097 https://github.com/pydata/xarray/issues/2699#issuecomment-456988170,https://api.github.com/repos/pydata/xarray/issues/2699,456988170,MDEyOklzc3VlQ29tbWVudDQ1Njk4ODE3MA==,1217238,2019-01-23T22:17:21Z,2019-01-23T22:17:38Z,MEMBER,"Thanks for the clear report. Indeed, this looks like a bug. `bfill()` and `ffill()` are implemented on dask arrays via `apply_ufunc`, but they're applied independently on each chunk -- there's no filling between chunks: https://github.com/pydata/xarray/blob/ddacf405fb256714ce01e1c4c464f829e1cc5058/xarray/core/missing.py#L262-L289 Instead, I think we need a multi-step process for parallelizing `bottleneck.push`, e.g., 1. Forward fill each chunk independently. 2. Slice out the *last element* of each chunk and forward fill these. 3. Prepend filled last elements to the start of each chunk, and forward fill them again.","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",,402413097