html_url,issue_url,id,node_id,user,created_at,updated_at,author_association,body,reactions,performed_via_github_app,issue
https://github.com/pydata/xarray/issues/2699#issuecomment-456999707,https://api.github.com/repos/pydata/xarray/issues/2699,456999707,MDEyOklzc3VlQ29tbWVudDQ1Njk5OTcwNw==,1217238,2019-01-23T22:58:50Z,2019-01-23T23:13:59Z,MEMBER,"I think this will work (though it needs more tests):
```python
import bottleneck
import dask.array as da
import numpy as np
def _last_element(array, axis):
slices = [slice(None)] * array.ndim
slices[axis] = slice(-1, None)
return array[tuple(slices)]
def _concat_push_slice(last_elements, array, axis):
concatenated = np.concatenate([last_elements, array], axis=axis)
pushed = bottleneck.push(concatenated, axis=axis)
slices = [slice(None)] * array.ndim
slices[axis] = slice(1, None)
sliced = pushed[tuple(slices)]
return sliced
def push(array, axis):
if axis < 0:
axis += array.ndim
pushed = array.map_blocks(bottleneck.push, dtype=array.dtype, axis=axis)
new_chunks = list(array.chunks)
new_chunks[axis] = tuple(1 for _ in array.chunks[axis])
last_elements = pushed.map_blocks(
_last_element, dtype=array.dtype, chunks=tuple(new_chunks), axis=axis)
pushed_last_elements = (
last_elements.rechunk({axis: -1})
.map_blocks(bottleneck.push, dtype=array.dtype, axis=axis)
.rechunk({axis: 1})
)
nan_shape = tuple(1 if axis == a else s for a, s in enumerate(array.shape))
nan_chunks = tuple((1,) if axis == a else c for a, c in enumerate(array.chunks))
shifted_pushed_last_elements = da.concatenate(
[da.full(np.nan, shape=nan_shape, chunks=nan_chunks),
pushed_last_elements[(slice(None),) * axis + (slice(None, -1),)]],
axis=axis)
return da.map_blocks(
_concat_push_slice,
shifted_pushed_last_elements,
pushed,
dtype=array.dtype,
chunks=array.chunks,
axis=axis,
)
# tests
array = np.array([np.nan, np.nan, np.nan, 1, 2, 3,
np.nan, np.nan, 4, 5, np.nan, 6])
expected = bottleneck.push(array, axis=0)
for c in range(1, 11):
actual = push(da.from_array(array, chunks=c), axis=0).compute()
np.testing.assert_equal(actual, expected)
```","{""total_count"": 3, ""+1"": 3, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",,402413097
https://github.com/pydata/xarray/issues/2699#issuecomment-456988170,https://api.github.com/repos/pydata/xarray/issues/2699,456988170,MDEyOklzc3VlQ29tbWVudDQ1Njk4ODE3MA==,1217238,2019-01-23T22:17:21Z,2019-01-23T22:17:38Z,MEMBER,"Thanks for the clear report. Indeed, this looks like a bug.
`bfill()` and `ffill()` are implemented on dask arrays via `apply_ufunc`, but they're applied independently on each chunk -- there's no filling between chunks:
https://github.com/pydata/xarray/blob/ddacf405fb256714ce01e1c4c464f829e1cc5058/xarray/core/missing.py#L262-L289
Instead, I think we need a multi-step process for parallelizing `bottleneck.push`, e.g.,
1. Forward fill each chunk independently.
2. Slice out the *last element* of each chunk and forward fill these.
3. Prepend filled last elements to the start of each chunk, and forward fill them again.","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",,402413097