id,node_id,number,title,user,state,locked,assignee,milestone,comments,created_at,updated_at,closed_at,author_association,active_lock_reason,draft,pull_request,body,reactions,performed_via_github_app,state_reason,repo,type 1387341095,I_kwDOAMm_X85SsSUn,7092,"Save an nc file and open it again, the content of the data inside has changed",48684315,closed,0,,,2,2022-09-27T08:27:25Z,2023-09-16T10:35:43Z,2023-09-16T10:35:43Z,NONE,,,,"### What is your issue? After I processed the source nc file, I got an intermediate result and saved it. When I want to use the intermediate result again, I find that it does not match the saved result. ``` import math import numpy as np import pandas as pd import time import os import dateutil.parser import xarray as xr from tqdm import tqdm import time def hour2day(Dir, file): """""" 将小时分辨率的nc文件转为日分辨率的nc文件 估算O3新增的三个变量:eDownward UV radiation at the surface(uvb), surface net solar radiation(ssr), surface net thermal radiation(str), total column ozon(tco3) :param file: 待处理的小时分辨率的nc文件 """""" print('start', time.strftime('%H:%M:%S',time.localtime())) # time.strftime('%H:%M:%S',time.localtime()) xs = [xr.open_dataset(os.path.join(Dir, f)) for f in file] # nc = [nc.Dataset(os.path.join(Dir, f)) for f in file] start = xr.concat([xs[0], xs[1].sel(time = xs[1]['time'][:16])], dim = 'time').coarsen(time = 24).mean() end = xs[1].sel(time = xs[1]['time'][16:-8]).coarsen(time = 24).mean() year_xs = xr.concat([start, end], dim = 'time') print('end', time.strftime('%H:%M:%S', time.localtime())) return year_xs Dir = '/data/lcx/3_Atmos/ERA5/single/2019/' file = os.listdir(Dir) file = [f for f in file if f.endswith('.nc')] # 这里根据类型对nc文件进行选择 file.sort(key=lambda x: int(x[:4])) # 2014_end.nc, 2015.nc print(file) day_ds = hour2day(Dir, file) # day_ds = xr.open_dataset('E:/3_Atmos/O3_Mapping/temp_file/day_single_e11_2019.nc') day_ds.to_netcdf('/code/lcx/3_Atmos/data/test.nc') # 这里的形状是 365*147*256,包含11个变量, 应该无缺失值 ds = xr.open_dataset('/code/lcx/3_Atmos/data/test.nc') for var in ds.keys(): print(var, np.isnan(ds[var]).sum(),np.isnan(day_ds[var]).sum()) ``` the result is : ``` u10 array(1) array(0) v10 array(22) array(0) t2m array(444) array(0) ``` I tried to find the differences between them ``` in: new_ds['u10'][np.where(np.isnan(new_ds['u10']))], day_ds['u10'][np.where(np.isnan(new_ds['u10']))] out: ( array([[[nan]]], dtype=float32) Coordinates: * longitude (longitude) float32 135.8 * latitude (latitude) float32 31.25 * time (time) datetime64[ns] 2019-08-14T03:30:00, array([[[-14.674651]]], dtype=float32) Coordinates: * longitude (longitude) float32 135.8 * latitude (latitude) float32 31.25 * time (time) datetime64[ns] 2019-08-14T03:30:00) ``` ","{""url"": ""https://api.github.com/repos/pydata/xarray/issues/7092/reactions"", ""total_count"": 0, ""+1"": 0, ""-1"": 0, ""laugh"": 0, ""hooray"": 0, ""confused"": 0, ""heart"": 0, ""rocket"": 0, ""eyes"": 0}",,completed,13221727,issue