id,node_id,number,title,user,state,locked,assignee,milestone,comments,created_at,updated_at,closed_at,author_association,active_lock_reason,draft,pull_request,body,reactions,performed_via_github_app,state_reason,repo,type
466994138,MDU6SXNzdWU0NjY5OTQxMzg=,3096,Support parallel writes to zarr store,18643609,closed,0,,,9,2019-07-11T16:31:25Z,2022-04-08T04:43:15Z,2022-04-08T04:43:14Z,NONE,,,,"#### MCVE Code Sample

```python
import multiprocessing
import xarray as xr
import numpy as np
from s3fs import S3FileSystem, S3Map
from time import sleep

from main import set_aws_credentials
set_aws_credentials()


def download_file(lead_time):
    return 'path_to_your_file'


def make_xarray_dataset(file_path, lead_time):
    var1 = np.random.rand(1, 721, 1440, 22)
    var2 = np.random.rand(1, 721, 1440, 22)
    lat = np.linspace(-90, 90, 721)
    lon = np.linspace(0, 360, 1440)
    height = range(22)
    ds = xr.Dataset({'var1': (['lead_time', 'lat', 'lon', 'height'], var1),
                     'var2': (['lead_time', 'lat', 'lon', 'height'], var2)},
                    coords={'lat': lat,
                            'lon': lon,
                            'height': height,
                            'lead_time': [lead_time]})
    return ds

def upload_to_s3(dataset, append):
    s3 = S3FileSystem()
    s3map = S3Map('S3_path_to_your_zarr', s3=s3)
    # If we are appending to an already existing dataset
    if append:
        dataset.to_zarr(store=s3map, mode='a', append_dim='lead_time')
    else:
        dataset.to_zarr(store=s3map, mode='w')


def lead_time_worker(lead_time, append=True):
    file_path = download_file(lead_time)
    dataset = make_xarray_dataset(file_path, lead_time)
    upload_to_s3(dataset, append=True)
    return 0


if __name__ == '__main__':
    lead_times = range(10)
    first_lead_time = True
    processes = []
    for lead_time in lead_times:
        if first_lead_time:
            process = multiprocessing.Process(target=lead_time_worker,
                                              args=(lead_time, False))
            process.start()
            process.join()
            first_lead_time = False
        else:
            process = multiprocessing.Process(target=lead_time_worker,
                                              args=(lead_time,))
            process.start()
            processes.append(process)
            sleep(5) # Sleep in order to shift the different processes so that they don't begin at the same time
    for p in processes:
        p.join()
```
will raise 

> ValueError: conflicting sizes for dimension 'lead_time': length X on 'Var1' and length Y on 'Var2'

<details>

```
Traceback (most recent call last):
  File ""main.py"", line 200, in lead_time_worker
    upload_to_s3(dataset, cloud_zarr_path, append=True)
  File ""main.py"", line 167, in upload_to_gcloud
    ds.to_zarr(store=s3map, mode='a', append_dim='lead_time')
  File ""/home/ubuntu/anaconda3/envs/GFS-data-retrieval/lib/python3.7/site-packages/xarray/core/dataset.py"", line 1414, in to_zarr
    consolidated=consolidated, append_dim=append_dim)
  File ""/home/ubuntu/anaconda3/envs/GFS-data-retrieval/lib/python3.7/site-packages/xarray/backends/api.py"", line 1101, in to_zarr
    dump_to_store(dataset, zstore, writer, encoding=encoding)
  File ""/home/ubuntu/anaconda3/envs/GFS-data-retrieval/lib/python3.7/site-packages/xarray/backends/api.py"", line 929, in dump_to_store
    unlimited_dims=unlimited_dims)
  File ""/home/ubuntu/anaconda3/envs/GFS-data-retrieval/lib/python3.7/site-packages/xarray/backends/zarr.py"", line 354, in store
    ds = open_zarr(self.ds.store, chunks=None)
  File ""/home/ubuntu/anaconda3/envs/GFS-data-retrieval/lib/python3.7/site-packages/xarray/backends/zarr.py"", line 557, in open_zarr
    ds = maybe_decode_store(zarr_store)
  File ""/home/ubuntu/anaconda3/envs/GFS-data-retrieval/lib/python3.7/site-packages/xarray/backends/zarr.py"", line 545, in maybe_decode_store
    drop_variables=drop_variables)
  File ""/home/ubuntu/anaconda3/envs/GFS-data-retrieval/lib/python3.7/site-packages/xarray/conventions.py"", line 527, in decode_cf
    ds = Dataset(vars, attrs=attrs)
  File ""/home/ubuntu/anaconda3/envs/GFS-data-retrieval/lib/python3.7/site-packages/xarray/core/dataset.py"", line 423, in __init__
    self._set_init_vars_and_dims(data_vars, coords, compat)
  File ""/home/ubuntu/anaconda3/envs/GFS-data-retrieval/lib/python3.7/site-packages/xarray/core/dataset.py"", line 445, in _set_init_vars_and_dims
    data_vars, coords, compat=compat)
  File ""/home/ubuntu/anaconda3/envs/GFS-data-retrieval/lib/python3.7/site-packages/xarray/core/merge.py"", line 379, in merge_data_and_coords
    indexes=indexes)
  File ""/home/ubuntu/anaconda3/envs/GFS-data-retrieval/lib/python3.7/site-packages/xarray/core/merge.py"", line 460, in merge_core
    dims = calculate_dimensions(variables)
  File ""/home/ubuntu/anaconda3/envs/GFS-data-retrieval/lib/python3.7/site-packages/xarray/core/dataset.py"", line 125, in calculate_dimensions
    (dim, size, k, dims[dim], last_used[dim]))
ValueError: conflicting sizes for dimension 'lead_time': length X on 'var1' and length Y on 'var2'
```

</details>

#### Problem Description

First of all, thanks a lot to the community for the PR #2706, I was really looking forward to it. I already experienced using the new append parameter, and got some problems trying to do it in a parallel way.

I want to upload a very big zarr (global numerical weather prediction, output of GFS model that you can check out [here](https://ftp.ncep.noaa.gov/data/nccf/com/gfs/prod/)) on a S3 bucket. In order to speed this up, as each single file of the source contains the data for one lead time(length of time between the issuance of a forecast and the occurrence of the phenomena that were predicted) and I want to concatenate them all, I tried to have one process per lead time and all of them to append to the same data store using [Dataset.to_zarr()](https://github.com/pydata/xarray/blob/8f0d9e5c9909c93a90306ed7cb5a80c1c2e1c97d/xarray/core/dataset.py#L1400) with `append=True`.

However, when doing that, I get the error described above. Indeed, the processes are appending simultaneously, so the data is not necessarily consistent when a new process tries to append, some variables will already have the values of one lead time and some will not because the process is not finished, which will lead to [calculate_dimensions()](https://github.com/pydata/xarray/blob/8f0d9e5c9909c93a90306ed7cb5a80c1c2e1c97d/xarray/core/dataset.py#L113) raising this error.

I wonder if there is a way I haven't found to work around this using simply a synchronizer? If not, do you think it would be possible (and reasonable) to implement a parameter allowing to bypass this check on the append dimension in an 'eventually consistent' approach?

#### Output of ``xr.show_versions()``

<details>

INSTALLED VERSIONS
------------------
commit: None
python: 3.7.3 (default, Mar 27 2019, 22:11:17) 
[GCC 7.3.0]
python-bits: 64
OS: Linux
OS-release: 4.15.0-1032-aws
machine: x86_64
processor: x86_64
byteorder: little
LC_ALL: None
LANG: C.UTF-8
LOCALE: en_US.UTF-8
libhdf5: None
libnetcdf: None

xarray: 0.12.2
pandas: 0.24.2
numpy: 1.16.4
scipy: None
netCDF4: None
pydap: None
h5netcdf: None
h5py: None
Nio: 1.5.5
zarr: 2.3.2
cftime: None
nc_time_axis: None
PseudonetCDF: None
rasterio: None
cfgrib: None
iris: None
bottleneck: None
dask: 2.0.0
distributed: 2.0.1
matplotlib: None
cartopy: None
seaborn: None
numbagg: None
setuptools: 41.0.1
pip: 19.1.1
conda: None
pytest: None
IPython: None
sphinx: None

</details>
","{""url"": ""https://api.github.com/repos/pydata/xarray/issues/3096/reactions"", ""total_count"": 1, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 1, ""rocket"": 0, ""eyes"": 0}",,completed,13221727,issue
474582412,MDU6SXNzdWU0NzQ1ODI0MTI=,3170,Dataset.to_zarr() with mode='a' does not work with groups,18643609,closed,0,,,0,2019-07-30T13:22:58Z,2020-03-02T12:19:16Z,2020-03-02T12:19:16Z,NONE,,,,"#### MCVE Code Sample

```python

import xarray as xr
import numpy as np
from s3fs import S3FileSystem, S3Map

s3 = S3FileSystem()
bucket_name = 'your-bucket-name'
s3_path = bucket_name + 'some_path.zarr'
store = S3Map(s3_path, s3=s3)
for i in range(6):
    if i%2 == 0:
        group = 'Group1'
    else:
        group = 'Group2'
    lead_time = i//2
    var1 = np.random.rand(1)
    ds = xr.Dataset({'var1': (['lead_time'], var1)},
                    coords={'lead_time': [lead_time]})
    ds.to_zarr(store=store, mode='a', append_dim='lead_time', group=group)

```

#### Output
This code returns the following error:
<details>

```
Traceback (most recent call last):
  File ""/home/vincent/anaconda3/envs/hanover_backend/lib/python3.7/site-packages/xarray/core/dataset.py"", line 1019, in _construct_dataarray
    variable = self._variables[name]
KeyError: 'var1'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File ""/home/vincent/Documents/Greenlytics/SiteForecast/debugging-script.py"", line 201, in <module>
    ds.to_zarr(store=store, mode='a', append_dim='lead_time', group=group)
  File ""/home/vincent/anaconda3/envs/hanover_backend/lib/python3.7/site-packages/xarray/core/dataset.py"", line 1433, in to_zarr
    consolidated=consolidated, append_dim=append_dim)
  File ""/home/vincent/anaconda3/envs/hanover_backend/lib/python3.7/site-packages/xarray/backends/api.py"", line 1101, in to_zarr
    dump_to_store(dataset, zstore, writer, encoding=encoding)
  File ""/home/vincent/anaconda3/envs/hanover_backend/lib/python3.7/site-packages/xarray/backends/api.py"", line 929, in dump_to_store
    unlimited_dims=unlimited_dims)
  File ""/home/vincent/anaconda3/envs/hanover_backend/lib/python3.7/site-packages/xarray/backends/zarr.py"", line 358, in store
    variables_with_encoding[vn].encoding = ds[vn].encoding
  File ""/home/vincent/anaconda3/envs/hanover_backend/lib/python3.7/site-packages/xarray/core/dataset.py"", line 1103, in __getitem__
    return self._construct_dataarray(key)
  File ""/home/vincent/anaconda3/envs/hanover_backend/lib/python3.7/site-packages/xarray/core/dataset.py"", line 1022, in _construct_dataarray
    self._variables, name, self._level_coords, self.dims)
  File ""/home/vincent/anaconda3/envs/hanover_backend/lib/python3.7/site-packages/xarray/core/dataset.py"", line 91, in _get_virtual_variable
    ref_var = variables[ref_name]
KeyError: 'var1'
```

</details>
The KeyError can happen on a variable name as well as on a dimension name, it depends on the runs.

#### Problem Description

I am trying to use the append mode introduced in the PR #2706 on zarr groups. This raises a KeyError as you can see in the trace above. Is it a bug or a feature that is not supported (yet)? 

#### Output of ``xr.show_versions()``
<details>

INSTALLED VERSIONS
------------------
commit: None
python: 3.7.1 (default, Dec 14 2018, 19:28:38) 
[GCC 7.3.0]
python-bits: 64
OS: Linux
OS-release: 4.18.0-20-generic
machine: x86_64
processor: x86_64
byteorder: little
LC_ALL: None
LANG: en_US.UTF-8
LOCALE: en_US.UTF-8
libhdf5: 1.10.2
libnetcdf: 4.6.3

xarray: 0.12.3
pandas: 0.24.2
numpy: 1.16.2
scipy: 1.2.1
netCDF4: 1.5.1.2
pydap: None
h5netcdf: None
h5py: None
Nio: None
zarr: 2.3.1
cftime: 1.0.3.4
nc_time_axis: None
PseudoNetCDF: None
rasterio: None
cfgrib: 0.9.6.1.post1
iris: None
bottleneck: None
dask: 1.1.5
distributed: None
matplotlib: 3.0.3
cartopy: None
seaborn: None
numbagg: None
setuptools: 40.8.0
pip: 19.0.3
conda: None
pytest: None
IPython: 7.5.0
sphinx: None

</details>
","{""url"": ""https://api.github.com/repos/pydata/xarray/issues/3170/reactions"", ""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",,completed,13221727,issue