issue_comments: 1001676665

This data as json

html_url issue_url id node_id user created_at updated_at author_association body reactions performed_via_github_app issue

html_url	issue_url	id	node_id	user	created_at	updated_at	author_association	body	reactions	performed_via_github_app	issue
https://github.com/pydata/xarray/issues/6112#issuecomment-1001676665	https://api.github.com/repos/pydata/xarray/issues/6112	1001676665	IC_kwDOAMm_X847tF95	25071375	2021-12-27T17:53:07Z	2021-12-27T17:59:57Z	CONTRIBUTOR	yes, of course, by the way, it would be possible to add something like the following code for the case that there is a limit? I know this code generates like 4x more tasks but at least it does the job so, probably a warning could be sufficient. (If it is not good enough to be added there is no problem, probably building the graph manually will be a better option than using this algorithm for the forward fill with limits). ```py def ffill(x: xr.DataArray, dim: str, limit=None): def _fill_with_last_one(a, b): # cumreduction apply the push func over all the blocks first so, # the only missing part is filling the missing values using # the last data for every one of them if isinstance(a, np.ma.masked_array) or isinstance(b, np.ma.masked_array): a = np.ma.getdata(a) b = np.ma.getdata(b) values = np.where(~np.isnan(b), b, a) return np.ma.masked_array(values, mask=np.ma.getmaskarray(b)) return np.where(~np.isnan(b), b, a) from bottleneck import push def _ffill(arr): return xr.DataArray( da.reductions.cumreduction( func=push, binop=_fill_with_last_one, ident=np.nan, x=arr.data, axis=arr.dims.index(dim), dtype=arr.dtype, method="sequential", ), dims=x.dims, coords=x.coords ) if limit is not None: axis = x.dims.index(dim) arange = xr.DataArray( da.broadcast_to( da.arange( x.shape[axis], chunks=x.chunks[axis], dtype=x.dtype ).reshape( tuple(size if i == axis else 1 for i, size in enumerate(x.shape)) ), x.shape, x.chunks ), coords=x.coords, dims=x.dims ) valid_limits = (arange - _ffill(arange.where(x.notnull(), np.nan))) <= limit return _ffill(arr).where(valid_limits, np.nan) return _ffill(arr) ```	{ "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0 }		1088893989

https://github.com/pydata/xarray/issues/6112#issuecomment-1001676665

https://api.github.com/repos/pydata/xarray/issues/6112

1001676665

IC_kwDOAMm_X847tF95

25071375

2021-12-27T17:53:07Z

2021-12-27T17:59:57Z

CONTRIBUTOR

yes, of course, by the way, it would be possible to add something like the following code for the case that there is a limit? I know this code generates like 4x more tasks but at least it does the job so, probably a warning could be sufficient. (If it is not good enough to be added there is no problem, probably building the graph manually will be a better option than using this algorithm for the forward fill with limits).

```py def ffill(x: xr.DataArray, dim: str, limit=None):

def _fill_with_last_one(a, b):
    # cumreduction apply the push func over all the blocks first so, 
    # the only missing part is filling the missing values using
    # the last data for every one of them
    if isinstance(a, np.ma.masked_array) or isinstance(b, np.ma.masked_array):
        a = np.ma.getdata(a)
        b = np.ma.getdata(b)
        values = np.where(~np.isnan(b), b, a)
        return np.ma.masked_array(values, mask=np.ma.getmaskarray(b))

    return np.where(~np.isnan(b), b, a)


from bottleneck import push


def _ffill(arr):
    return xr.DataArray(
        da.reductions.cumreduction(
            func=push,
            binop=_fill_with_last_one,
            ident=np.nan,
            x=arr.data,
            axis=arr.dims.index(dim),
            dtype=arr.dtype,
            method="sequential",
        ),
        dims=x.dims,
        coords=x.coords
    )

if limit is not None:
    axis = x.dims.index(dim)
    arange = xr.DataArray(
        da.broadcast_to(
            da.arange(
                x.shape[axis],
                chunks=x.chunks[axis],
                dtype=x.dtype
            ).reshape(
                tuple(size if i == axis else 1 for i, size in enumerate(x.shape))
            ),
            x.shape,
            x.chunks
        ),
        coords=x.coords,
        dims=x.dims
    )
    valid_limits = (arange - _ffill(arange.where(x.notnull(), np.nan))) <= limit
    return _ffill(arr).where(valid_limits, np.nan)

return _ffill(arr)

```

{
    "total_count": 0,
    "+1": 0,
    "-1": 0,
    "laugh": 0,
    "hooray": 0,
    "confused": 0,
    "heart": 0,
    "rocket": 0,
    "eyes": 0
}

1088893989