html_url,issue_url,id,node_id,user,created_at,updated_at,author_association,body,reactions,performed_via_github_app,issue https://github.com/pydata/xarray/issues/2501#issuecomment-512663861,https://api.github.com/repos/pydata/xarray/issues/2501,512663861,MDEyOklzc3VlQ29tbWVudDUxMjY2Mzg2MQ==,7799184,2019-07-18T04:51:06Z,2019-07-18T04:52:17Z,CONTRIBUTOR,"Hi guys, I'm having some issue that looks similar to @rsignell-usgs. Trying to open 413 netcdf files using `open_mfdataset` with `parallel=True`. The dataset (successfully opened with `parallel=False`) has ~300G on disk and looks like: ```ipython In [1] import xarray as xr In [2]: dset = xr.open_mfdataset(""./bom-ww3/bom-ww3_*.nc"", chunks={'time': 744, 'latitude': 100, 'longitude': 100}, parallel=False) In [3]: dset Out[3]: Dimensions: (latitude: 190, longitude: 289, time: 302092) Coordinates: * longitude (longitude) float32 70.0 70.4 70.8 71.2 ... 184.4 184.8 185.2 * latitude (latitude) float32 -55.6 -55.2 -54.8 -54.4 ... 19.2 19.6 20.0 * time (time) datetime64[ns] 1979-01-01 ... 2013-05-31T23:00:00.000013440 Data variables: hs (time, latitude, longitude) float32 dask.array fp (time, latitude, longitude) float32 dask.array dp (time, latitude, longitude) float32 dask.array wl (time, latitude, longitude) float32 dask.array U10 (time, latitude, longitude) float32 dask.array V10 (time, latitude, longitude) float32 dask.array hs1 (time, latitude, longitude) float32 dask.array hs2 (time, latitude, longitude) float32 dask.array tp1 (time, latitude, longitude) float32 dask.array tp2 (time, latitude, longitude) float32 dask.array lp0 (time, latitude, longitude) float32 dask.array lp1 (time, latitude, longitude) float32 dask.array lp2 (time, latitude, longitude) float32 dask.array th0 (time, latitude, longitude) float32 dask.array th1 (time, latitude, longitude) float32 dask.array th2 (time, latitude, longitude) float32 dask.array hs0 (time, latitude, longitude) float32 dask.array tp0 (time, latitude, longitude) float32 dask.array ``` Trying to read it on a standard python session gives me core dumped: ```ipython In [1]: import xarray as xr In [2]: dset = xr.open_mfdataset(""./bom-ww3/bom-ww3_*.nc"", chunks={'time': 744, 'latitude': 100, 'longitude': 100}, parallel=True) Bus error (core dumped) ``` Trying to read it on a dask cluster I get: ```ipython In [1]: from dask.distributed import Client In [2]: import xarray as xr In [3]: client = Client() In [4]: dset = xr.open_mfdataset(""./bom-ww3/bom-ww3_*.nc"", chunks={'time': 744, 'latitude': 100, 'longitud ...: e': 100}, parallel=True) free(): double free detected in tcache 2free(): double free detected in tcache 2 free(): double free detected in tcache 2 distributed.nanny - WARNING - Worker process 18744 was killed by signal 11 distributed.nanny - WARNING - Restarting worker distributed.nanny - WARNING - Worker process 18740 was killed by signal 6 distributed.nanny - WARNING - Restarting worker distributed.nanny - WARNING - Worker process 18742 was killed by signal 7 distributed.nanny - WARNING - Worker process 18738 was killed by signal 6 distributed.nanny - WARNING - Restarting worker distributed.nanny - WARNING - Restarting worker free(): double free detected in tcache 2munmap_chunk(): invalid pointer free(): double free detected in tcache 2 free(): double free detected in tcache 2 distributed.nanny - WARNING - Worker process 19082 was killed by signal 6 distributed.nanny - WARNING - Restarting worker distributed.nanny - WARNING - Worker process 19073 was killed by signal 6 distributed.nanny - WARNING - Restarting worker --------------------------------------------------------------------------- KilledWorker Traceback (most recent call last) in () ----> 1 dset = xr.open_mfdataset(""./bom-ww3/bom-ww3_*.nc"", chunks={'time': 744, 'latitude': 100, 'longitude': 100}, parallel=True) /usr/local/lib/python3.7/dist-packages/xarray/backends/api.py in open_mfdataset(paths, chunks, concat_dim, compat, preprocess, engine, lock, data_vars, coords, combine, autoclose, parallel, **kwargs) 772 # calling compute here will return the datasets/file_objs lists, 773 # the underlying datasets will still be stored as dask arrays --> 774 datasets, file_objs = dask.compute(datasets, file_objs) 775 776 # Combine all datasets, closing them in case of a ValueError /usr/local/lib/python3.7/dist-packages/dask/base.py in compute(*args, **kwargs) 444 keys = [x.__dask_keys__() for x in collections] 445 postcomputes = [x.__dask_postcompute__() for x in collections] --> 446 results = schedule(dsk, keys, **kwargs) 447 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)]) 448 /home/oceanum/.local/lib/python3.7/site-packages/distributed/client.py in get(self, dsk, keys, restrictions, loose_restrictions, resources, sync, asynchronous, direct, retries, priority, fifo_timeout, actors, **kwargs) 2525 should_rejoin = False 2526 try: -> 2527 results = self.gather(packed, asynchronous=asynchronous, direct=direct) 2528 finally: 2529 for f in futures.values(): /home/oceanum/.local/lib/python3.7/site-packages/distributed/client.py in gather(self, futures, errors, direct, asynchronous) 1821 direct=direct, 1822 local_worker=local_worker, -> 1823 asynchronous=asynchronous, 1824 ) 1825 /home/oceanum/.local/lib/python3.7/site-packages/distributed/client.py in sync(self, func, asynchronous, callback_timeout, *args, **kwargs) 761 else: 762 return sync( --> 763 self.loop, func, *args, callback_timeout=callback_timeout, **kwargs 764 ) 765 /home/oceanum/.local/lib/python3.7/site-packages/distributed/utils.py in sync(loop, func, callback_timeout, *args, **kwargs) 330 e.wait(10) 331 if error[0]: --> 332 six.reraise(*error[0]) 333 else: 334 return result[0] /usr/lib/python3/dist-packages/six.py in reraise(tp, value, tb) 691 if value.__traceback__ is not tb: 692 raise value.with_traceback(tb) --> 693 raise value 694 finally: 695 value = None /home/oceanum/.local/lib/python3.7/site-packages/distributed/utils.py in f() 315 if callback_timeout is not None: 316 future = gen.with_timeout(timedelta(seconds=callback_timeout), future) --> 317 result[0] = yield future 318 except Exception as exc: 319 error[0] = sys.exc_info() /home/oceanum/.local/lib/python3.7/site-packages/tornado/gen.py in run(self) 733 734 try: --> 735 value = future.result() 736 except Exception: 737 exc_info = sys.exc_info() /home/oceanum/.local/lib/python3.7/site-packages/tornado/gen.py in run(self) 740 if exc_info is not None: 741 try: --> 742 yielded = self.gen.throw(*exc_info) # type: ignore 743 finally: 744 # Break up a reference to itself /home/oceanum/.local/lib/python3.7/site-packages/distributed/client.py in _gather(self, futures, errors, direct, local_worker) 1678 exc = CancelledError(key) 1679 else: -> 1680 six.reraise(type(exception), exception, traceback) 1681 raise exc 1682 if errors == ""skip"": /usr/lib/python3/dist-packages/six.py in reraise(tp, value, tb) 691 if value.__traceback__ is not tb: 692 raise value.with_traceback(tb) --> 693 raise value 694 finally: 695 value = None KilledWorker: ('open_dataset-e7916acb-6d9f-4532-ab76-5b9c1b1a39c2', ) ``` Is there anything obviously wrong I'm trying here please? ","{""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",,372848074