home / github / issue_comments

Menu
  • GraphQL API
  • Search all tables

issue_comments: 478560102

This data as json

html_url issue_url id node_id user created_at updated_at author_association body reactions performed_via_github_app issue
https://github.com/pydata/xarray/issues/2861#issuecomment-478560102 https://api.github.com/repos/pydata/xarray/issues/2861 478560102 MDEyOklzc3VlQ29tbWVudDQ3ODU2MDEwMg== 30219501 2019-04-01T12:32:31Z 2019-04-01T12:32:31Z NONE

The xarray coordinates-aware philosophy is nice to prevent from doing nothing useful. I have learned that also the 'data types' of the coordinates have to be identical, i.e. do not try to compare datasets with float32 coordinates and with float64 coordinates. Therefore, I was already educated by the Xarray's.

To provide you with an code example would mean that I have to extract all the steps done in the "BigScript" and the related files. It would mess up this feed here. However, you have asked so I try.

```

open and squeezing (for consistency between datasets)

self.DSref = xarray.open_dataset(DSfile_ref) self.DSproof = xarray.open_dataset(DSfile_proof) self.DSref = self.DSref.squeeze(); self.DSproof = self.DSproof.squeeze()

harmonize grids (the coordinates belonging together where copied from DSref to DSproof

self.DSproof = self.MetricCalcProg.HarmonizeHoriGrid(dsetref=self.DSref, \ dsetmod=self.DSproof,posdimnames=self.cfggeneral.PossibleDimNames, \ varnsref=self.varns_ref,varnsmod=self.varns_proof) self.DSproof, self.DSref = self.MetricCalcProg.HarmonizeVertGrid(dsetref=self.DSref, \ dsetmod=self.DSproof,posdimnames=self.cfggeneral.PossibleDimNames, \ varnsref=self.varns_ref,varnsmod=self.varns_proof) self.DSproof, self.DSref = self.MetricCalcProg.HarmonizeTempGrid(dsetref=self.DSref, \ dsetmod=self.DSproof,posdimnames=self.cfggeneral.PossibleDimNames,varnsref=self.varns_ref, \ varnsmod=self.varns_proof,unifreqme=self.cfgdatamining["target_EvalFrequency"]["method"])

to compute linear correlation, dataset A and B have to have equal sample sizes

self.DSproof = self.DSproof[varnsproof].where(self.DSref[varnsref].notnull().data).to_dataset( \ name=varnsproof) self.DSref = self.DSref[varnsref].where(self.DSproof[varnsproof].notnull().data).to_dataset( \ name=varnsref) ```

The methods for harmonization of the grids is defined as follows. Do not understand me wrong, but I have to deal with different datasets using different data types and variable names. I have to make the height-coordinate of dataset A consistent to the height-coordinate of dataset B (also the name). I would really like to have some tolerance options for making DataA-DataB. ``` def HarmonizeHoriGrid(self,dsetref=None,dsetmod=None,posdimnames=None,varnsref=None,varnsmod=None): """ Copy all the hor. coordinates from a reference-DS to the model dataset (needed due to inconsistencies in dtype, ..., i.e. small deviations) return model dataset but with harmonized horizontal grid; prone to errors because the check of coordinates has to be done for each variable; (e.g. the model contains WSS(lon1,lat1) and the obs has WSS(lon,lat)) --> however that should be harmonized by the cdo's interpolation) """

self.logger.debug("         Harmonization of horizontal grids prior evaluation.")
CoordInfref = self.FindCoordinatesOfVariables(datafile=None,dataset=dsetref,varnamelist=varnsref);
CoordInfmod = self.FindCoordinatesOfVariables(datafile=None,dataset=dsetmod,varnamelist=varnsmod);
dim_xyref = GenUti.SplitMetaDim(CoordInfref,mode='spatial',PossibleDimDict=posdimnames)
dim_xymod = GenUti.SplitMetaDim(CoordInfmod,mode='spatial',PossibleDimDict=posdimnames)
#
for varmod,varref in zip(varnsmod,varnsref):
    if varref in dim_xyref.keys() and varmod in dim_xymod.keys():
        for dimes in dim_xyref[varref]:
            if dimes in dim_xymod[varmod]:
                self.logger.debug("           Found for the variable "+varref+" the spat. dimension "+dimes+ \
                " in reference dataset and an equivalent in dataset to evaluate: "+varmod+","+dimes+ \
                ". -> Make datatype consistent now")
                dsetmod[dimes].data = dsetref[dimes].data
            else:
                self.logger.debug("           Found for the variable "+varref+" the spat. dimension "+dimes+ \
                " in reference dataset but not no equivalent in dataset to evaluate. ")
if ("rotated_pole" in dsetmod.data_vars) and ("rotated_pole" in dsetref.data_vars):
    dsetmod["rotated_pole"]=dsetref["rotated_pole"] # harmonize the type of rotated_pole
# return the model dataset with harmonized dimensions
return dsetmod

def HarmonizeTempGrid(self,dsetref=None,dsetmod=None,posdimnames=None,varnsref=None,varnsmod=None,unifreqme=None): """ Copy the values of the time coordinate from a reference-DS to the model dataset (needed due to inconsistencies in dtype, ...); return model dataset but with harmonized horizontal grid; the input datasets are already opened netcdf-files as xarray-datasets; """ self.logger.debug(" Harmonization of temporal grids prior evaluation.") DimInfref = self.FindDimensionsOfVariables(datafile=None,dataset=dsetref,varnamelist=varnsref) DimInfmod = self.FindDimensionsOfVariables(datafile=None,dataset=dsetmod,varnamelist=varnsmod) dim_tref = GenUti.SplitMetaDim(DimInfref,mode='temporal',PossibleDimDict=posdimnames) dim_tmod = GenUti.SplitMetaDim(DimInfmod,mode='temporal',PossibleDimDict=posdimnames) # for varmod,varref in zip(varnsmod,varnsref): if varref in dim_tref.keys() and varmod in dim_tmod.keys(): for dimes in dim_tref[varref]: if dimes in dim_tmod[varmod]: # helpstr=" Found for the variable "+varref+" the temp. dimension "+ \ dimes+" in reference dataset and an equaivalent in dataset to evaluate: "+ \ varmod+","+dimes+". -> Make datatype consistent depending on unifyfreqmethod"+\ unifreqme if unifreqme=="reselect": self.logger.debug(helpstr) timediff = (np.max(dsetmod[dimes].data-dsetref[dimes].data)) timediff = timediff / np.timedelta64(1,'s') if np.abs(int(timediff)) > 1: self.logger.warning(" The two datasets do not share the same "+\ "time-axis. Maximum difference is "+str(timediff)+' seconds') dsetmod[dimes].data = dsetref[dimes].data elif unifreqme=="resample" and (np.size(dsetref[dimes]) != np.size(dsetmod[dimes])): self.logger.debug(helpstr) inters = pandas.to_datetime(dsetref[dimes].data) inters = inters.intersection(pandas.to_datetime(dsetmod[dimes].data)) dsetmod = dsetmod.sel(time=inters,method='nearest') dsetref = dsetref.sel(time=inters,method='nearest') timediff = (np.max(dsetmod[dimes].data-dsetref[dimes].data)) timediff = timediff / np.timedelta64(1,'s') if np.abs(int(timediff)) > 1: self.logger.warning(" The two harmonized datasets still do not "+\ " share time axis. Max diff is "+str(timediff)+' seconds') dsetmod[dimes].data = dsetref[dimes].data else: self.logger.debug(" No harmonization needed here.") # return the model dataset with harmonized dimensions return dsetmod, dsetref

def HarmonizeVertGrid(self,dsetref=None,dsetmod=None,posdimnames=None,varnsref=None,varnsmod=None): """ Adapt the height coordinate from a reference-DS to the model dataset (needed due to different dim names) return model and reference dataset but with harmonized horizontal grid; the input datasets are already opened netcdf-files as xarray-datasets""" self.logger.debug(" Harmonization of vertical grids prior evaluation.") DimInfref = self.FindDimensionsOfVariables(datafile=None,dataset=dsetref,varnamelist=varnsref); DimInfmod = self.FindDimensionsOfVariables(datafile=None,dataset=dsetmod,varnamelist=varnsmod); dim_zref = GenUti.SplitMetaDim(DimInfref,mode='vertical',PossibleDimDict=posdimnames) dim_zmod = GenUti.SplitMetaDim(DimInfmod,mode='vertical',PossibleDimDict=posdimnames) for varref,varmod in zip(varnsref,varnsmod): if (dim_zref[varref] and dim_zmod[varmod]): self.logger.debug(" here we have to modify vert. coord. of "+varref+ " "+varmod) if len(dim_zref[varref])==1 and len(dim_zmod[varmod])==1: dsetmod = dsetmod.rename({ dim_zmod[varmod][0] : "height_"+varref }) dsetref = dsetref.rename({ dim_zref[varref][0] : "height_"+varref }) else: self.logger.error(" Many vertical dimensions found for the variable "+varref+" or "+varmod) self.logger.error(dsetmod[varref]) self.logger.error(dsetmod[varmod]) exit(); else: self.logger.debug(" No vertical dimensions found for the variable "+varref+" or "+varmod) return dsetmod, dsetref ```

{
    "total_count": 0,
    "+1": 0,
    "-1": 0,
    "laugh": 0,
    "hooray": 0,
    "confused": 0,
    "heart": 0,
    "rocket": 0,
    "eyes": 0
}
  427644858
Powered by Datasette · Queries took 0.88ms · About: xarray-datasette