html_url,issue_url,id,node_id,user,created_at,updated_at,author_association,body,reactions,performed_via_github_app,issue https://github.com/pydata/xarray/issues/3945#issuecomment-1115469330,https://api.github.com/repos/pydata/xarray/issues/3945,1115469330,IC_kwDOAMm_X85CfLYS,26384082,2022-05-02T23:37:47Z,2022-05-02T23:37:47Z,NONE,"In order to maintain a list of currently relevant issues, we mark issues as stale after a period of inactivity If this issue remains relevant, please comment here or remove the `stale` label; otherwise it will be marked as closed automatically ","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",,595784008 https://github.com/pydata/xarray/issues/3945#issuecomment-610426980,https://api.github.com/repos/pydata/xarray/issues/3945,610426980,MDEyOklzc3VlQ29tbWVudDYxMDQyNjk4MA==,2448579,2020-04-07T14:41:08Z,2020-04-07T14:41:08Z,MEMBER,xhistogram probably does what you want: https://github.com/xgcm/xhistogram,"{""total_count"": 1, ""+1"": 1, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",,595784008 https://github.com/pydata/xarray/issues/3945#issuecomment-610407293,https://api.github.com/repos/pydata/xarray/issues/3945,610407293,MDEyOklzc3VlQ29tbWVudDYxMDQwNzI5Mw==,1200058,2020-04-07T14:06:03Z,2020-04-07T14:17:12Z,NONE,"First prototype: ```python def value_counts(v, global_unique_values, newdim: str): unique_values, counts = dask.compute(*np.unique(v, return_counts=True)) # find out where in `global_unique_values` the unique values of `v` are located _, idx1, idx2 = np.intersect1d(unique_values, global_unique_values, return_indices=True) # assign values according to `global_unique_values` retval = np.zeros_like(global_unique_values) retval[idx2] = counts[idx1] # ## alternative: # counts = xr.DataArray( # counts, # dims=[newdim, ], # coords={newdim: unique_values}, # ) # counts, = xr.align(counts, indexes={newdim: global_unique_values}, fill_value=0) return retval def xr_value_counts(obj, unique_values=None, **kwargs): (newdim, apply_dims), = kwargs.items() if type(apply_dims) == str: # convert scalars to list apply_dims = [apply_dims] if type(apply_dims) != list: # cast iterables to list apply_dims = [*apply_dims] if not unique_values: # map(np.unique) and reduce(np.unique) unique_values = np.unique(da.map_blocks(np.unique, obj.data.flatten()).compute()) else: unique_values = np.sort(unique_values) retval = xr.apply_ufunc( lambda v: value_counts(v, global_unique_values=unique_values, newdim=newdim), obj, input_core_dims=[apply_dims], output_core_dims=[[newdim]], dask=""allowed"", vectorize=True, ) retval.coords[newdim] = unique_values return retval test_da = xr.DataArray( [ [0,1,1,1,3,4], [0,6,1,1,3,4], ], dims=[""dim_0"", ""dim_1""], coords={""dim_1"": [2,5,7,4,3,6]}, ) test_values = xr_value_counts(test_da, value_counts=""dim_1"") assert np.all( test_values.values == np.array([ [1, 3, 1, 1, 0], [1, 2, 1, 1, 1] ]) ) assert np.all( test_values.value_counts == np.array([0, 1, 3, 4, 6]) ) ``` Example: ```python test_da = xr.DataArray( [ [0,1,1,1,3,4], [0,6,1,1,3,4], ], dims=[""dim_0"", ""dim_1""], coords={""dim_1"": [2,5,7,4,3,6]}, ) print(test_da) # # array([[0, 1, 1, 1, 3, 4], # [0, 6, 1, 1, 3, 4]]) # Coordinates: # * dim_1 (dim_1) int64 2 5 7 4 3 6 # Dimensions without coordinates: dim_0 print(xr_value_counts(test_da, value_counts=""dim_1"")) # # array([[1, 3, 1, 1, 0], # [1, 2, 1, 1, 1]]) # Coordinates: # * value_counts (value_counts) int64 0 1 3 4 6 # Dimensions without coordinates: dim_0 ``` Probably not the fastest solution and executes eagerly but it works. What do you think?","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",,595784008