html_url,issue_url,id,node_id,user,created_at,updated_at,author_association,body,reactions,performed_via_github_app,issue
https://github.com/pydata/xarray/issues/5629#issuecomment-960133879,https://api.github.com/repos/pydata/xarray/issues/5629,960133879,IC_kwDOAMm_X845Onr3,2448579,2021-11-03T21:39:08Z,2023-01-24T20:19:19Z,MEMBER,"In reading through #5933, I realized there's an alternate solution that preserves the desirable feature of fitting along chunked dimensions (at least for skipna=False)

Since we are only ever fitting along one dimension, we can apply the reshape-other-dimensions operation blockwise. As long as we reshape back in a consistent manner (again blockwise) this is identical to our current stacking approach, but substantially more efficient. In particular, it avoids merging chunks.

```python
def reshape_block(arr, output_chunks):
    """"""
    Blockwise reshape array to match output_chunks

    This method applies a reshape operation blockwise, so it only makes sense if
    you do not care about the order of elements (along reshaped axes) after reshaping.
    The order of elements will depend on the chunking of ``arr``.
    It is substantially more efficient than a usual reshape, which makes sure array elements
    appear in a numpy-like order regardless of chunking.

    Based on https://github.com/dask/dask/issues/4855#issuecomment-497062002
    """"""

    from itertools import product

    import numpy as np
    from dask.array import Array
    from dask.base import tokenize
    from dask.highlevelgraph import HighLevelGraph

    name = ""reshape-block-"" + tokenize(arr)
    ichunks = tuple(range(len(chunks_v)) for chunks_v in arr.chunks)
    ochunks = tuple(range(len(chunks_v)) for chunks_v in output_chunks)

    dsk = {
        (name, *ochunk): (np.reshape, (arr.name, *ichunk), oshape)
        for ichunk, ochunk, oshape in zip(
            product(*ichunks), product(*ochunks), product(*output_chunks)
        )
    }

    graph = HighLevelGraph.from_collections(name, dsk, dependencies=[arr])
    res = Array(graph, name, output_chunks, arr.dtype, meta=arr._meta)

    return res
         
        
def polyfit(da, dim, skipna=False):
    import itertools

    import numpy as np

    da = da.transpose(..., dim)
    arr = da.data

    do_blockwise_reshape = arr.ndim > 1 and any(
        len(chunks) > 1 for chunks in arr.chunks
    )
    if do_blockwise_reshape:
        output_chunks = (tuple(map(np.product, product(*arr.chunks[:-1]))),) + (
            arr.chunks[-1],
        )
        other_dims = tuple(dim_ for dim_ in da.dims if dim_ != dim)
        reshaped = xr.DataArray(
            reshape_block(arr, output_chunks), dims=(""__reshaped__"", dim)
        )
    else:
        reshaped = da

    fit = reshaped.polyfit(dim, 1, skipna=skipna)

    if do_blockwise_reshape:
        result = xr.Dataset()
        for var in fit:
            result[var] = (
                (""degree"",) + other_dims,
                reshape_block(fit[var].data, ((2,),) + arr.chunks[:-1]),
            )

        for dim_ in other_dims:
            result[dim_] = da[dim_]
        result[""degree""] = fit[""degree""]
        return result
    else:
        return fit
```

Here's the graph 1 chunk along time (like @jbusecke's example, and skipna=True)
![image](https://user-images.githubusercontent.com/2448579/140196626-c6316d0a-468c-4540-822a-c53156c7040b.png)

And when the time dimension is chunked (and skipna=False)

![image](https://user-images.githubusercontent.com/2448579/140196389-0e06c04f-1f70-408f-89c6-3d7d8064c9c7.png)

#### older version

<details>

``` python
from itertools import product

import numpy as np
from dask.array import Array
from dask.array.utils import assert_eq
from dask.base import tokenize
from dask.highlevelgraph import HighLevelGraph


def reshape_block(arr, output_chunks):
    """"""
    Blockwise reshape array to match output_chunks

    This method applies a reshape operation blockwise, so it only makes sense if
    you do not care about the order of elements (along reshaped axes) after reshaping.
    The order of elements will depend on the chunking of ``arr``.
    It is substantially more efficient than a usual reshape, which makes sure array elements
    appear in a numpy-like order regardless of chunking.
    
    Based on https://github.com/dask/dask/issues/4855#issuecomment-497062002
    """"""

    name = ""reshape-block-"" + tokenize(arr)
    ichunks = tuple(range(len(chunks_v)) for chunks_v in arr.chunks)
    ochunks = tuple(range(len(chunks_v)) for chunks_v in output_chunks)

    dsk = {
        (name, *ochunk): (np.reshape, (arr.name, *ichunk), oshape)
        for ichunk, ochunk, oshape in zip(
            product(*ichunks), product(*ochunks), product(*output_chunks)
        )
    }

    graph = HighLevelGraph.from_collections(name, dsk, dependencies=[arr])
    res = Array(graph, name, output_chunks, arr.dtype, meta=arr._meta)

    return res

# assumes we're collapsing axes (0,1) and preserving axis 2
output_chunks = (tuple(map(np.product, product(*arr.chunks[:2]))),) + (arr.chunks[-1],)
reshaped = reshape_block(arr, output_chunks)

# make sure that we roundtrip
actual = reshape_block(reshaped, arr.chunks)
assert_eq(actual, arr) # True
```
``` python
def new_polyfit(da):
    arr = da.data
    output_chunks = (tuple(map(np.product, product(*arr.chunks[:2]))),) + (
        arr.chunks[-1],
    )
    reshaped = xr.DataArray(reshape_block(arr, output_chunks), dims=(""xy"", ""time""))
    fit = reshaped.polyfit(""time"", 1, skipna=True)
    result = xr.Dataset()
    for var in fit:
        result[var] = (
            (""degree"", ""x"", ""y""),
            reshape_block(fit[var].data, ((2,),) + arr.chunks[:-1]),
        )

    result[""x""] = da[""x""]
    result[""y""] = da[""y""]
    result[""degree""] = fit[""degree""]
    return result


poly = da.polyfit(""time"", 1, skipna=False)

xr.testing.assert_allclose(poly, new_polyfit(da))  # True!
```
</details>


","{""total_count"": 1, ""+1"": 1, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",,950882492
https://github.com/pydata/xarray/issues/5629#issuecomment-961370482,https://api.github.com/repos/pydata/xarray/issues/5629,961370482,IC_kwDOAMm_X845TVly,2448579,2021-11-04T19:56:35Z,2021-11-04T19:57:13Z,MEMBER,">  a clever trick to get around the apply_ufunc constraints

To clarify, the constraint here is that `lstsq` expects 2D arrays, that's why the reshaping was needed in the first place.

`apply_ufunc(..., dask=""parallelized"")` automatically applies your dask-unaware function blockwise which only makes sense if there is a single chunk along the core dimension, hence that constraint.

`apply_ufunc(..., dask=""allowed"")` has no constraints, your function has to know how to deal with chunked data though.

> uses blockwise to apply a function that reshapes the data in that block to be 1D.

Yes, same idea. Though when you do it that way, you have to implement the function you're applying (that xhistogram PR reimplements `dask.array.bincount`). Reimplementing `lstsq` is a lot more complicated, hence this approach.","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",,950882492
https://github.com/pydata/xarray/issues/5629#issuecomment-961349927,https://api.github.com/repos/pydata/xarray/issues/5629,961349927,IC_kwDOAMm_X845TQkn,35968931,2021-11-04T19:25:41Z,2021-11-04T19:26:51Z,MEMBER,"> I was thinking the general idea of reshape_block seems like a clever trick to get around the apply_ufunc constraints and could be used for some other functions as well. But maybe there aren't that many functions that could make use of it.

The idea seems similar to [what we used in xhistogram](https://github.com/xgcm/xhistogram/pull/49), which uses blockwise to apply a function that reshapes the data in that block to be 1D.

EDIT: But in that case our algorithm was one that could be applied blockwise, `apply_ufunc` was not used.","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",,950882492
https://github.com/pydata/xarray/issues/5629#issuecomment-961347645,https://api.github.com/repos/pydata/xarray/issues/5629,961347645,IC_kwDOAMm_X845TQA9,14371165,2021-11-04T19:22:19Z,2021-11-04T19:22:41Z,MEMBER,I was thinking the general idea of reshape_block seems like a clever trick to get around the apply_ufunc constraints and could be used for some other functions as well. But maybe there aren't that many functions that could make use of it.,"{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",,950882492
https://github.com/pydata/xarray/issues/5629#issuecomment-961230139,https://api.github.com/repos/pydata/xarray/issues/5629,961230139,IC_kwDOAMm_X845SzU7,2448579,2021-11-04T16:51:43Z,2021-11-04T16:54:02Z,MEMBER,">  Is that something that could be implemented in apply_ufunc?

I interpreted this as asking whether this general idea could be implemented in apply-ufunc, to which the answer is ""no"" (I think) because it's pretty specific. At best we put this logic in a wrapper function and call that with `apply_ufunc(..., dask=""allowed"")`.

This should handle all current use-cases of `polyfit`, we just change the call to `stack`.
","{""total_count"": 1, ""+1"": 1, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",,950882492
https://github.com/pydata/xarray/issues/5629#issuecomment-961187488,https://api.github.com/repos/pydata/xarray/issues/5629,961187488,IC_kwDOAMm_X845So6g,35968931,2021-11-04T16:04:17Z,2021-11-04T16:04:17Z,MEMBER,"> The problem is that there is no general solution here. You have to write a chunk-aware function and then use apply_ufunc(..., dask=""allowed""). This suggestion only works because dask's lstsq can work with chunked dimensions.

Sorry, what case does your suggestion not handle? It would be really nice to support chunked core dims if we are going to the effort of rewriting polyfit.","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",,950882492
https://github.com/pydata/xarray/issues/5629#issuecomment-961154611,https://api.github.com/repos/pydata/xarray/issues/5629,961154611,IC_kwDOAMm_X845Sg4z,2448579,2021-11-04T15:27:49Z,2021-11-04T15:27:49Z,MEMBER,"The problem is that there is no general solution here. You have to write a chunk-aware function and then use `apply_ufunc(..., dask=""allowed"")`. This suggestion only works because dask's `lstsq` can work with chunked dimensions.","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",,950882492
https://github.com/pydata/xarray/issues/5629#issuecomment-960414601,https://api.github.com/repos/pydata/xarray/issues/5629,960414601,IC_kwDOAMm_X845PsOJ,14371165,2021-11-04T02:58:33Z,2021-11-04T02:58:33Z,MEMBER,"@dcherian Is that something that could be implemented in apply_ufunc? Not being able to apply along a chunked dim I think happens quite often, right?","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",,950882492
https://github.com/pydata/xarray/issues/5629#issuecomment-887207964,https://api.github.com/repos/pydata/xarray/issues/5629,887207964,IC_kwDOAMm_X8404bgc,5821660,2021-07-27T04:54:54Z,2021-07-27T04:54:54Z,MEMBER,"`apply_ufunc` was refactored to use `dask.array.apply_gufunc` (which moves the vectorization to dask) after implementation of `polyfit`.
So those concerns about slowness of `apply_ufunc` + vectorization where based on the formerly used `_apply_blockwise` approach and might not be an issue any more with the current implementation of `apply_ufunc `. ","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",,950882492
https://github.com/pydata/xarray/issues/5629#issuecomment-886907556,https://api.github.com/repos/pydata/xarray/issues/5629,886907556,IC_kwDOAMm_X8403SKk,2448579,2021-07-26T17:55:51Z,2021-07-26T17:55:51Z,MEMBER,You could write a wrapper that does `reshape` + `apply_along_axis` and call that wrapper in `apply_ufunc` without `vectorize`,"{""total_count"": 2, ""+1"": 2, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",,950882492
https://github.com/pydata/xarray/issues/5629#issuecomment-886905635,https://api.github.com/repos/pydata/xarray/issues/5629,886905635,IC_kwDOAMm_X8403Rsj,20629530,2021-07-26T17:52:42Z,2021-07-26T17:52:42Z,CONTRIBUTOR,">  @aulemahal what do you think about how easy it might be to change polyfit to use apply_ufunc?

Should be easy! The code was written without apply_ufunc for a few reasons, one being that using `apply_ufunc+vectorize` was slower than the `apply_along_axis` trick it currently uses. But, with the issue raised here, that might not be true...","{""total_count"": 1, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 1, ""rocket"": 0, ""eyes"": 0}",,950882492
https://github.com/pydata/xarray/issues/5629#issuecomment-885216203,https://api.github.com/repos/pydata/xarray/issues/5629,885216203,IC_kwDOAMm_X840w1PL,39069044,2021-07-22T20:37:04Z,2021-07-22T20:37:04Z,CONTRIBUTOR,"Potentially a useful comparison: here is the task graph for the same problem posed to `da.curvefit`, which uses `xr.apply_ufunc(scipy.optimize.curvefit, ..., vectorize=True)`. Looks a lot cleaner, and I think `xr.polyfit` could be re-implemented this way without much trouble.
```python
import xarray as xr
import dask.array as dsa

def linear(x, m, b):
    return m * x + b

da = xr.DataArray(dsa.random.random((4,6, 100), chunks=(1,2,100)), dims=['x','y', 'time'])
fit = da.curvefit('time', linear).curvefit_coefficients
```
![image](https://user-images.githubusercontent.com/39069044/126705616-4fa914a7-14f0-4727-bf3a-e916627de4e7.png)
","{""total_count"": 3, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 3, ""rocket"": 0, ""eyes"": 0}",,950882492
https://github.com/pydata/xarray/issues/5629#issuecomment-885087800,https://api.github.com/repos/pydata/xarray/issues/5629,885087800,IC_kwDOAMm_X840wV44,35968931,2021-07-22T17:31:02Z,2021-07-22T17:31:58Z,MEMBER,"Thanks for the clear example @jbusecke . I think your example is helpful enough that we should keep this open (even if the same PR will hopefully close both this and #4554)

Was about to say the same @dcherian! @aulemahal what do you think about how easy it might be to change `polyfit` to use `apply_ufunc`?","{""total_count"": 1, ""+1"": 1, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",,950882492
https://github.com/pydata/xarray/issues/5629#issuecomment-885086453,https://api.github.com/repos/pydata/xarray/issues/5629,885086453,IC_kwDOAMm_X840wVj1,14314623,2021-07-22T17:28:52Z,2021-07-22T17:28:52Z,CONTRIBUTOR,Should I close this one then?,"{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",,950882492
https://github.com/pydata/xarray/issues/5629#issuecomment-885085284,https://api.github.com/repos/pydata/xarray/issues/5629,885085284,IC_kwDOAMm_X840wVRk,2448579,2021-07-22T17:27:00Z,2021-07-22T17:27:00Z,MEMBER,"It's the same thing: `stack` is a reshape which can be very inefficient with dask. I think we want to replace the stack+apply_along_axis approach with `xr.apply_ufunc(np.polyfit, ..., vectorize=True)`. ","{""total_count"": 2, ""+1"": 2, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",,950882492