html_url,issue_url,id,node_id,user,created_at,updated_at,author_association,body,reactions,performed_via_github_app,issue
https://github.com/pydata/xarray/issues/3945#issuecomment-1115469330,https://api.github.com/repos/pydata/xarray/issues/3945,1115469330,IC_kwDOAMm_X85CfLYS,26384082,2022-05-02T23:37:47Z,2022-05-02T23:37:47Z,NONE,"In order to maintain a list of currently relevant issues, we mark issues as stale after a period of inactivity
If this issue remains relevant, please comment here or remove the `stale` label; otherwise it will be marked as closed automatically
","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",,595784008
https://github.com/pydata/xarray/issues/3945#issuecomment-610426980,https://api.github.com/repos/pydata/xarray/issues/3945,610426980,MDEyOklzc3VlQ29tbWVudDYxMDQyNjk4MA==,2448579,2020-04-07T14:41:08Z,2020-04-07T14:41:08Z,MEMBER,xhistogram probably does what you want: https://github.com/xgcm/xhistogram,"{""total_count"": 1, ""+1"": 1, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",,595784008
https://github.com/pydata/xarray/issues/3945#issuecomment-610407293,https://api.github.com/repos/pydata/xarray/issues/3945,610407293,MDEyOklzc3VlQ29tbWVudDYxMDQwNzI5Mw==,1200058,2020-04-07T14:06:03Z,2020-04-07T14:17:12Z,NONE,"First prototype:
```python
def value_counts(v, global_unique_values, newdim: str):
unique_values, counts = dask.compute(*np.unique(v, return_counts=True))
# find out where in `global_unique_values` the unique values of `v` are located
_, idx1, idx2 = np.intersect1d(unique_values, global_unique_values, return_indices=True)
# assign values according to `global_unique_values`
retval = np.zeros_like(global_unique_values)
retval[idx2] = counts[idx1]
# ## alternative:
# counts = xr.DataArray(
# counts,
# dims=[newdim, ],
# coords={newdim: unique_values},
# )
# counts, = xr.align(counts, indexes={newdim: global_unique_values}, fill_value=0)
return retval
def xr_value_counts(obj, unique_values=None, **kwargs):
(newdim, apply_dims), = kwargs.items()
if type(apply_dims) == str:
# convert scalars to list
apply_dims = [apply_dims]
if type(apply_dims) != list:
# cast iterables to list
apply_dims = [*apply_dims]
if not unique_values:
# map(np.unique) and reduce(np.unique)
unique_values = np.unique(da.map_blocks(np.unique, obj.data.flatten()).compute())
else:
unique_values = np.sort(unique_values)
retval = xr.apply_ufunc(
lambda v: value_counts(v, global_unique_values=unique_values, newdim=newdim),
obj,
input_core_dims=[apply_dims],
output_core_dims=[[newdim]],
dask=""allowed"",
vectorize=True,
)
retval.coords[newdim] = unique_values
return retval
test_da = xr.DataArray(
[
[0,1,1,1,3,4],
[0,6,1,1,3,4],
],
dims=[""dim_0"", ""dim_1""],
coords={""dim_1"": [2,5,7,4,3,6]},
)
test_values = xr_value_counts(test_da, value_counts=""dim_1"")
assert np.all(
test_values.values == np.array([
[1, 3, 1, 1, 0],
[1, 2, 1, 1, 1]
])
)
assert np.all(
test_values.value_counts == np.array([0, 1, 3, 4, 6])
)
```
Example:
```python
test_da = xr.DataArray(
[
[0,1,1,1,3,4],
[0,6,1,1,3,4],
],
dims=[""dim_0"", ""dim_1""],
coords={""dim_1"": [2,5,7,4,3,6]},
)
print(test_da)
#
# array([[0, 1, 1, 1, 3, 4],
# [0, 6, 1, 1, 3, 4]])
# Coordinates:
# * dim_1 (dim_1) int64 2 5 7 4 3 6
# Dimensions without coordinates: dim_0
print(xr_value_counts(test_da, value_counts=""dim_1""))
#
# array([[1, 3, 1, 1, 0],
# [1, 2, 1, 1, 1]])
# Coordinates:
# * value_counts (value_counts) int64 0 1 3 4 6
# Dimensions without coordinates: dim_0
```
Probably not the fastest solution and executes eagerly but it works.
What do you think?","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",,595784008