issue_comments: 777602757
This data as json
| html_url | issue_url | id | node_id | user | created_at | updated_at | author_association | body | reactions | performed_via_github_app | issue |
|---|---|---|---|---|---|---|---|---|---|---|---|
| https://github.com/pydata/xarray/pull/4849#issuecomment-777602757 | https://api.github.com/repos/pydata/xarray/issues/4849 | 777602757 | MDEyOklzc3VlQ29tbWVudDc3NzYwMjc1Nw== | 39069044 | 2021-02-11T16:02:54Z | 2021-02-11T16:02:54Z | CONTRIBUTOR | I've been playing around with this some more, and found the performance to be much better using a process-heavy dask scheduler. For example: ```python import xarray as xr import numpy as np import time import dask def exponential(x, a, xc): return np.exp((x - xc) / a) x = np.arange(-5, 5, 0.001) t = np.arange(-5, 5, 0.01) X, T = np.meshgrid(x, t) Z1 = np.random.uniform(low=-5, high=5, size=X.shape) Z2 = exponential(Z1, 3, X) + np.random.normal(scale=0.1, size=Z1.shape) ds = xr.Dataset( data_vars=dict(var1=(["t", "x"], Z1), var2=(["t", "x"], Z2)), coords={"t": t, "x": x}, ) ds = ds.chunk({'x':10}) def test_fit(): start = time.time() fit = ds.var2.curvefit( coords=ds.var1, func=exponential, reduce_dim="t", ).compute() print(f'Fitting time: {time.time() - start:.2f}s') with dask.config.set(scheduler='threads'):
test_fit()
with dask.config.set(scheduler='processes'):
test_fit()
with dask.distributed.Client() as client:
test_fit()
with dask.distributed.Client(n_workers=8, threads_per_worker=1) as client:
test_fit()
|
{
"total_count": 0,
"+1": 0,
"-1": 0,
"laugh": 0,
"hooray": 0,
"confused": 0,
"heart": 0,
"rocket": 0,
"eyes": 0
} |
797302408 |