Source code for cdms2.dataset

# Automatically adapted for numpy.oldnumeric Aug 01, 2007 by foo
# Further modified to be pure new numpy June 24th 2008

""" CDMS dataset and file objects"""
from __future__ import print_function
from .error import CDMSError
import sys
from . import Cdunif
import numpy
from . import cdmsNode
import os
import string
try:
    from urllib.parse import urlparse, urlunparse
    from urllib.request import urlopen
except ImportError:
    from urlparse import urlparse, urlunparse
    from urllib import urlopen
from . import cdmsobj
import re
from .CDMLParser import CDMLParser
from .cdmsobj import CdmsObj
from .axis import Axis, FileAxis, FileVirtualAxis, isOverlapVector
from .coord import FileAxis2D, DatasetAxis2D
from .auxcoord import FileAuxAxis1D, DatasetAuxAxis1D
from .grid import RectGrid, FileRectGrid
from .hgrid import FileCurveGrid, DatasetCurveGrid
from .gengrid import FileGenericGrid, DatasetGenericGrid
from .variable import DatasetVariable
from .fvariable import FileVariable
from .tvariable import asVariable
from .cdmsNode import CdDatatypes
from . import convention
import warnings
from collections import OrderedDict
from six import string_types
from .util import getenv_bool

# Default is serial mode until setNetcdfUseParallelFlag(1) is called
rk = 0
sz = 1
Cdunif.CdunifSetNCFLAGS("use_parallel", 0)
CdMpi = False

mpi_disabled = getenv_bool("CDMS_NO_MPI", "False")

try:
    # skip trying to load mpi4py module
    if mpi_disabled:
        raise Exception()

    from mpi4py import rc
    rc.initialize = False
    from mpi4py import MPI
except BaseException:
    rk = 0

try:
    from . import gsHost
    from pycf import libCFConfig as libcf
except BaseException:
    libcf = None


DuplicateAxis = "Axis already defined: "


[docs]class DuplicateAxisError(CDMSError): pass
DuplicateGrid = "Grid already defined: " DuplicateVariable = "Variable already defined: " FileNotFound = "File not found: " FileWasClosed = "File was closed: " InvalidDomain = "Domain elements must be axes or grids" ModeNotSupported = "Mode not supported: " SchemeNotSupported = "Scheme not supported: " # Regular expressions for parsing the file map. _Name = re.compile(r'[a-zA-Z_:][-a-zA-Z0-9._:]*') _ListStartPat = r'\[\s*' _ListStart = re.compile(_ListStartPat) _ListEndPat = r'\s*\]' _ListEnd = re.compile(_ListEndPat) _ListSepPat = r'\s*,\s*' _ListSep = re.compile(_ListSepPat) _IndexPat = r'(\d+|-)' _FilePath = r"([^\s\]\',]+)" # Two file map patterns, _IndexList4 is the original one, _IndexList5 supports # forecast data too... _IndexList4 = re.compile( _ListStartPat + _IndexPat + _ListSepPat + _IndexPat + _ListSepPat + _IndexPat + _ListSepPat + _IndexPat + _ListSepPat + _FilePath + _ListEndPat) _IndexList5 = re.compile( _ListStartPat + _IndexPat + _ListSepPat + _IndexPat + _ListSepPat + _IndexPat + _ListSepPat + _IndexPat + _ListSepPat + _IndexPat + _ListSepPat + _FilePath + _ListEndPat) _NPRINT = 20 _showCompressWarnings = True
[docs]def setCompressionWarnings(value=None): """Turn on/off the warnings for compression. Parameters ---------- value : * 0/1 False/True 'no'/'yes' or None (which sets it to the opposite Returns ------- Return set value. """ global _showCompressWarnings if value is None: value = not _showCompressWarnings if isinstance(value, string_types): if not value.slower() in ['y', 'n', 'yes', 'no']: raise CDMSError( "setCompressionWarnings flags must be yes/no or 1/0, or None to invert it") if value.lower()[0] == 'y': value = 1 else: value = 0 if not isinstance(value, (int, bool)): raise CDMSError( "setCompressionWarnings flags must be yes/no or 1/0, or None to invert it") if value in [1, True]: _showCompressWarnings = True elif value in [0, False]: _showCompressWarnings = False else: raise CDMSError( "setCompressionWarnings flags must be yes\/no or 1\/0, or None to invert it") return _showCompressWarnings
[docs]def setNetcdfUseNCSwitchModeFlag(value): """Tells cdms2 to switch constantly between netcdf define/write modes. Parameters ---------- value : 0/1, False/True. Returns ------- No return value. """ if value not in [True, False, 0, 1]: raise CDMSError( "Error UseNCSwitchMode flag must be 1(can use)/0(do not use) or true/False") if value in [0, False]: Cdunif.CdunifSetNCFLAGS("use_define_mode", 0) else: Cdunif.CdunifSetNCFLAGS("use_define_mode", 1)
[docs]def setNetcdfUseParallelFlag(value): """Enable/Disable NetCDF MPI I/O (Paralllelism). Parameters ---------- value : 0/1, False/True. Returns ------- No return value. """ if mpi_disabled: raise CDSMError("MPI support is disabled.") global CdMpi if value not in [True, False, 0, 1]: raise CDMSError( "Error UseParallel flag must be 1(can use)/0(do not use) or true/False") if value in [0, False]: Cdunif.CdunifSetNCFLAGS("use_parallel", 0) else: Cdunif.CdunifSetNCFLAGS("use_parallel", 1) CdMpi = True if not MPI.Is_initialized(): MPI.Init()
[docs]def getMpiRank(): """Return number of processor available. Returns ------- rank or 0 if MPI is not enabled. """ if CdMpi: rk = MPI.COMM_WORLD.Get_rank() return rk else: return 0
[docs]def getMpiSize(): """Return MPI size. Returns ------- MPI size or 0 if MPI is not enabled. """ if CdMpi: sz = MPI.COMM_WORLD.Get_size() return sz else: return 1
[docs]def setNetcdf4Flag(value): """Enable netCDF4 (HDF5) mode in libnetcdf. Parameters ---------- value : 0/1, False/True. Returns ------- No return value. """ if value not in [True, False, 0, 1]: raise CDMSError("Error NetCDF4 flag must be 1/0 or true/False") if value in [0, False]: Cdunif.CdunifSetNCFLAGS("netcdf4", 0) else: Cdunif.CdunifSetNCFLAGS("netcdf4", 1)
[docs]def setNetcdfClassicFlag(value): """Enable netCDF3 (classic) mode in libnetcdf. Parameters ---------- value : 0/1, False/True. Returns ------- No return value. """ if value not in [True, False, 0, 1]: raise CDMSError("Error NetCDF Classic flag must be 1/0 or true/False") if value in [0, False]: Cdunif.CdunifSetNCFLAGS("classic", 0) else: Cdunif.CdunifSetNCFLAGS("classic", 1)
[docs]def setNetcdfShuffleFlag(value): """Enable/Disable NetCDF shuffle. Parameters ---------- value : 0/1, False/True. Returns ------- No return value. """ if value not in [True, False, 0, 1]: raise CDMSError("Error NetCDF Shuffle flag must be 1/0 or true/False") if value in [0, False]: Cdunif.CdunifSetNCFLAGS("shuffle", 0) else: Cdunif.CdunifSetNCFLAGS("shuffle", 1)
[docs]def setNetcdfDeflateFlag(value): """Enable/Disable NetCDF deflattion. Parameters ---------- value : 0/1, False/True. Returns ------- No return value. """ if value not in [True, False, 0, 1]: raise CDMSError("Error NetCDF deflate flag must be 1/0 or true/False") if value in [0, False]: Cdunif.CdunifSetNCFLAGS("deflate", 0) else: Cdunif.CdunifSetNCFLAGS("deflate", 1)
[docs]def setNetcdfDeflateLevelFlag(value): """Sets NetCDF deflate level flag value Parameters ---------- value : Deflation Level 1-9. Returns ------- No return value. """ if value not in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]: raise CDMSError( "Error NetCDF deflate_level flag must be an integer < 10") Cdunif.CdunifSetNCFLAGS("deflate_level", value)
[docs]def getNetcdfUseNCSwitchModeFlag(): """Get current netCDF define mode. Returns ------- NetCDF define mode . """ return Cdunif.CdunifGetNCFLAGS("use_define_mode")
[docs]def getNetcdfUseParallelFlag(): """Get NetCDF UseParallel flag value. Parameters ---------- value : 0/1, False/True Returns ------- No return value. """ return Cdunif.CdunifGetNCFLAGS("use_parallel")
[docs]def getNetcdf4Flag(): """Get Net CD 4 Flag Returns ------- NetCDF4 flag value. """ return Cdunif.CdunifGetNCFLAGS("netcdf4")
[docs]def getNetcdfClassicFlag(): """Get Net CDF Classic Flag Returns ------- NetCDF classic flag value. """ return Cdunif.CdunifGetNCFLAGS("classic")
[docs]def getNetcdfShuffleFlag(): """Get Net CDF Shuffle Flag Returns ------- NetCDF shuffle flag value. """ return Cdunif.CdunifGetNCFLAGS("shuffle")
[docs]def getNetcdfDeflateFlag(): """Get Net CDF Deflate Flag Returns ------- NetCDF deflate flag value. """ return Cdunif.CdunifGetNCFLAGS("deflate")
[docs]def getNetcdfDeflateLevelFlag(): """Get Net CDF Deflate Level Flag Returns ------- NetCDF deflate level flag value. """ return Cdunif.CdunifGetNCFLAGS("deflate_level")
[docs]def useNetcdf3(): """ Turns off (0) NetCDF flags for shuffle/cuDa/deflatelevel Output files are generated as NetCDF3 Classic after that Returns ------- No return value. """ setNetcdfShuffleFlag(0) setNetcdfDeflateFlag(0) setNetcdfDeflateLevelFlag(0) setNetcdf4Flag(0)
# Create a tree from a file path. # Returns the parse tree root node.
[docs]def load(path): fd = open(path) text = fd.read() fd.close() p = CDMLParser() p.feed(text) p.close() return p.getRoot()
# Create a tree from a URI # URI is of the form scheme://netloc/path;parameters?query#fragment # where fragment may be an XPointer. # Returns the parse tree root node.
[docs]def loadURI(uri): (scheme, netloc, path, parameters, query, fragment) = urlparse(uri) uripath = urlunparse((scheme, netloc, path, '', '', '')) fd = urlopen(uripath) text = fd.read() fd.close() p = CDMLParser() p.feed(text) p.close() return p.getRoot()
# Create a dataset # 'path' is the XML file name, or netCDF filename for simple file create # 'template' is a string template for the datafile(s), for dataset creation
[docs]def createDataset(path, template=None): """Create a dataset. Parameters ---------- path : is the XML file name, or netCDF filename for simple file creation. template : is a string template for the datafile(s), for dataset creation. Returns ------- writing file handle. """ return openDataset(path, 'w', template)
# Open an existing dataset # 'uri' is a Uniform Resource Identifier, referring to a cdunif file, XML file, # or LDAP URL of a catalog dataset entry. # 'mode' is 'r', 'r+', 'a', or 'w'
[docs]def openDataset(uri, mode='r', template=None, dods=1, dpath=None, hostObj=None): """Open Dataset Parameters ---------- uri : str Filename to open. mode : str Mode to open file with. 'r': Read (default) 'w': Write 'a': Append template : str Template for the datafile(s) used for dataset creation. dods : int Opens remote/local files when set to 1 or attempts to open local when set 0. dpath : str Destination path. Returns ------- cdms2.CdmsFile Opened handle to file. """ uri = uri.strip() (scheme, netloc, path, parameters, query, fragment) = urlparse(uri) if scheme in ('', 'file'): if netloc: # In case of relative path... path = netloc + path path = os.path.expanduser(path) path = os.path.normpath(os.path.join(os.getcwd(), path)) root, ext = os.path.splitext(path) if ext in ['.xml', '.cdml']: if mode != 'r': raise ModeNotSupported(mode) datanode = load(path) else: # If the doesn't exist allow it to be created # Ok mpi has issues with bellow we need to test this only with 1 # rank if mode == "r" and not os.path.exists(path): raise FileNotFoundError(path) elif mode == "w": try: os.remove(path) except BaseException: pass return CdmsFile(path, mode, mpiBarrier=CdMpi) if libcf is not None: file = CdmsFile(path, mode, hostObj) if hasattr(file, libcf.CF_FILETYPE): if getattr(file, libcf.CF_FILETYPE) == libcf.CF_GLATT_FILETYPE_HOST: file.close() file = gsHost.open(path, mode) return file else: return CdmsFile(path, mode) elif scheme in ['http', 'gridftp', 'https']: if (dods): if mode != 'r': raise ModeNotSupported(mode) # DODS file? try: file = CdmsFile(uri, mode) return file except Exception: msg = "Error in DODS open of: " + uri if os.path.exists(os.path.join( os.path.expanduser("~"), ".dodsrc")): msg += "\nYou have a .dodsrc in your HOME directory, try to remove it" raise CDMSError(msg) else: try: datanode = loadURI(uri) return datanode except BaseException: datanode = loadURI(uri) raise CDMSError("Error in loadURI of: " + uri) else: raise SchemeNotSupported(scheme) # Determine dpath, the absolute path to data files: # dpath = # (1) head + node.directory, if .directory is relative # (2) node.directory, if absolute # (3) head, if no directory entry found (assume XML file is # at top level of data directory) # # Note: In general, dset.datapath is relative to the URL of the # enclosing database, but here the database is null, so the # datapath should be absolute. if dpath is None: direc = datanode.getExternalAttr('directory') head = os.path.dirname(path) if direc and (os.path.isabs(direc) or urlparse(direc).scheme != ''): dpath = direc elif direc: dpath = os.path.join(head, direc) else: dpath = head dataset = Dataset(uri, mode, datanode, None, dpath) return dataset
# Functions for parsing the file map.
[docs]def parselist(text, f): """Parse a string of the form [A, A, ...]. Parameters ---------- text : Input String. f : function which parses A and returns (A, nconsumed). Returns ------- Parser results. n number of matches. """ n = 0 m = _ListStart.match(text) if m is None: raise CDMSError("Parsing cdms_filemap near " + text[0:_NPRINT]) result = [] n += m.end() s, nconsume = f(text[n:]) result.append(s) n += nconsume while True: m = _ListSep.match(text[n:]) if m is None: break else: n += m.end() s, nconsume = f(text[n:]) result.append(s) n += nconsume m = _ListEnd.match(text[n:]) if m is None: raise CDMSError("Parsing cdms_filemap near " + text[n:n + _NPRINT]) n += m.end() return result, n
[docs]def parseIndexList(text): """Parse a string of the form [i,j,k,l,...,path]. Parameters ---------- text : i,j,k,l,... are indices or '-', and path is a filename. Coerce the indices to integers. Returns ------- Parser results. n number of matches. """ m = _IndexList4.match(text) nindices = 4 if m is None: m = _IndexList5.match(text) nindices = 5 if m is None: raise CDMSError("Parsing cdms_filemap near " + text[0:_NPRINT]) result = [None] * (nindices + 1) for i in range(nindices): s = m.group(i + 1) if s != '-': result[i] = int(s) result[nindices] = m.group(nindices + 1) return result, m.end()
[docs]def parseName(text): m = _Name.match(text) if m is None: raise CDMSError("Parsing cdms_filemap near " + text[0:_NPRINT]) return m.group(), m.end()
[docs]def parseVarMap(text): """Parse a string of the form [ namelist, slicelist ]""" n = 0 m = _ListStart.match(text) if m is None: raise CDMSError("Parsing cdms_filemap near " + text[0:_NPRINT]) result = [] n += m.end() s, nconsume = parselist(text[n:], parseName) result.append(s) n += nconsume m = _ListSep.match(text[n:]) if m is None: raise CDMSError("Parsing cdms_filemap near " + text[n:n + _NPRINT]) n += m.end() s, nconsume = parselist(text[n:], parseIndexList) result.append(s) n += nconsume m = _ListEnd.match(text[n:]) if m is None: raise CDMSError("Parsing cdms_filemap near " + text[n:n + _NPRINT]) n += m.end() return result, n
[docs]def parseFileMap(text): """Parse a CDMS filemap. Parameters ---------- filemap : list [ varmap, varmap, ...] varmap : list [ namelist, slicelist ] namelist : list [name, name, ...] slicelist : list [indexlist, indexlist, ,,,] indexlist : list [i,j,k,l,path] Returns ------- Parsing results. """ result, n = parselist(text, parseVarMap) if n < len(text): raise CDMSError("Parsing cdms_filemap near " + text[n:n + _NPRINT]) return result
# A CDMS dataset consists of a CDML/XML file and one or more data files try: from .cudsinterface import cuDataset except BaseException: pass
[docs]class Dataset(CdmsObj, cuDataset): def __init__(self, uri, mode, datasetNode=None, parent=None, datapath=None): if datasetNode is not None and datasetNode.tag != 'dataset': raise CDMSError('Node is not a dataset node') CdmsObj.__init__(self, datasetNode) for v in ['datapath', 'variables', 'axes', 'grids', 'xlinks', 'dictdict', 'default_variable_name', 'parent', 'uri', 'mode']: if v not in self.__cdms_internals__: val = self.__cdms_internals__ + [v, ] self.___cdms_internals__ = val cuDataset.__init__(self) self.parent = parent self.uri = uri self.mode = mode # Path of data files relative to parent db. # Note: .directory is the location of data relative to the location of # the XML file self.datapath = datapath self.variables = {} self.axes = {} self.grids = {} self.xlinks = {} self._gridmap_ = {} # Gridmap:(latname,lonname,order,maskname,gridclass) => grid (scheme, netloc, xmlpath, parameters, query, fragment) = urlparse(uri) self._xmlpath_ = xmlpath # Dictionary of dictionaries, keyed on node tags self.dictdict = {'variable': self.variables, 'axis': self.axes, 'rectGrid': self.grids, 'curveGrid': self.grids, 'genericGrid': self.grids, 'xlink': self.xlinks } # Dataset IDs are external, so may not have been defined yet. if not hasattr(self, 'id'): self.id = '<None>' self._status_ = 'open' self._convention_ = convention.getDatasetConvention(self) # Collect named children (having attribute 'id') into dictionaries if datasetNode is not None: coordsaux = self._convention_.getDsetnodeAuxAxisIds(datasetNode) for node in list(datasetNode.getIdDict().values()): if node.tag == 'variable': if node.id in coordsaux: if node.getDomain().getChildCount() == 1: obj = DatasetAuxAxis1D(self, node.id, node) else: obj = DatasetAxis2D(self, node.id, node) else: obj = DatasetVariable(self, node.id, node) self.variables[node.id] = obj elif node.tag == 'axis': obj = Axis(self, node) self.axes[node.id] = obj elif node.tag == 'rectGrid': obj = RectGrid(self, node) self.grids[node.id] = obj # elif node.tag == 'xlink': # obj = Xlink(node) # self.xlinks[node.id] = obj else: dict = self.dictdict.get(node.tag) if dict is not None: dict[node.id] = node else: self.dictdict[node.tag] = {node.id: node} # Initialize grid domains for grid in list(self.grids.values()): grid.initDomain(self.axes, self.variables) latname = grid.getLatitude().id lonname = grid.getLongitude().id mask = grid.getMaskVar() if mask is None: maskname = "" else: maskname = mask.id self._gridmap_[ (latname, lonname, grid.getOrder(), maskname)] = grid # Initialize variable domains. for var in list(self.variables.values()): var.initDomain(self.axes, self.grids) for var in list(self.variables.values()): # Get grid information for the variable. gridkey has the form # (latname,lonname,order,maskname,abstract_class). gridkey, lat, lon = var.generateGridkey( self._convention_, self.variables) # If the variable is gridded, lookup the grid. If no such grid exists, # create a unique gridname, create the grid, and add to the # gridmap. if gridkey is None: grid = None else: grid = self._gridmap_.get(gridkey) if grid is None: if hasattr(var, 'grid_type'): gridtype = var.grid_type else: gridtype = "generic" candidateBasename = None if gridkey[4] == 'rectGrid': gridshape = (len(lat), len(lon)) elif gridkey[4] == 'curveGrid': gridshape = lat.shape elif gridkey[4] == 'genericGrid': gridshape = lat.shape candidateBasename = 'grid_%d' % gridshape else: gridshape = (len(lat), len(lon)) if candidateBasename is None: candidateBasename = 'grid_%dx%d' % gridshape if candidateBasename not in self.grids: gridname = candidateBasename else: foundname = 0 for i in range(97, 123): # Lower-case letters candidateName = candidateBasename + \ '_' + chr(i) if candidateName not in self.grids: gridname = candidateName foundname = 1 break if not foundname: print( 'Warning: cannot generate a grid for variable', var.id) continue # Create the grid if gridkey[4] == 'rectGrid': node = cdmsNode.RectGridNode( gridname, lat.id, lon.id, gridtype, gridkey[2]) grid = RectGrid(self, node) grid.initDomain(self.axes, self.variables) elif gridkey[4] == 'curveGrid': grid = DatasetCurveGrid(lat, lon, gridname, self) else: grid = DatasetGenericGrid(lat, lon, gridname, self) self.grids[grid.id] = grid self._gridmap_[gridkey] = grid # Set the variable grid var.setGrid(grid) # Attach boundary variables for name in coordsaux: var = self.variables[name] bounds = self._convention_.getVariableBounds(self, var) var.setBounds(bounds) # Create the internal filemap, if attribute 'cdms_filemap' is present. # _filemap_ is a dictionary, mapping (varname, timestart, levstart) => path # # Also, for each partitioned variable, set attribute '_varpart_' to [timepart, levpart] # where timepart is the partition for time (or None if not time-dependent) # and levpart is the partition in the level dimension, or None if not applicable. # # For variables partitioned in both time and level dimension, it is assumed that # for a given variable the partitions are orthogonal. That is, for a given # variable, at any timeslice the level partition is the same. if hasattr(self, 'cdms_filemap'): self._filemap_ = {} filemap = parseFileMap(self.cdms_filemap) for varlist, varmap in filemap: for varname in varlist: timemap = {} levmap = {} fcmap = {} # The for loop was: # for tstart, tend, levstart, levend, path in varmap: # but now there _may_ be an additional item before path... for varm1 in varmap: tstart, tend, levstart, levend = varm1[0:4] if (len(varm1) >= 6): forecast = varm1[4] else: forecast = None path = varm1[-1] self._filemap_[ (varname, tstart, levstart, forecast)] = path if tstart is not None: # Collect unique (tstart, tend) tuples timemap[(tstart, tend)] = 1 if levstart is not None: levmap[(levstart, levend)] = 1 if forecast is not None: fcmap[(forecast, forecast)] = 1 tkeys = list(timemap.keys()) if len(tkeys) > 0: tkeys.sort() tpart = [list(x) for x in tkeys] else: tpart = None levkeys = list(levmap.keys()) if len(levkeys) > 0: levkeys.sort() levpart = [list(x) for x in levkeys] else: levpart = None fckeys = list(fcmap.keys()) if len(fckeys) > 0: fckeys.sort() if varname in self.variables: self.variables[varname]._varpart_ = [tpart, levpart]
[docs] def getConvention(self): """Get the metadata convention associated with this dataset or file.""" return self._convention_
# Get a dictionary of objects with the given tag
[docs] def getDictionary(self, tag): return self.dictdict[tag]
# Synchronize writes with data/metadata files
[docs] def sync(self): pass
# Close all files
[docs] def close(self): for dict in list(self.dictdict.values()): for obj in list(dict.values()): obj.parent = None del obj self.dictdict = {} self.variables = {} self.axes = {} self.grids = {} self.xlinks = {} self.parent = None self._status_ = 'closed'
# Note: Removed to allow garbage collection of reference cycles # def __del__(self): # if cdmsobj._debug==1: # print 'Deleting dataset',self.id # self.close() # Create an axis # 'name' is the string name of the Axis # 'ar' is the 1-D data array, or None for an unlimited axis # Return an axis object.
[docs] def createAxis(self, name, ar): pass
# Create an implicit rectilinear grid. lat, lon, and mask are objects. # order and type are strings
[docs] def createRectGrid(self, id, lat, lon, order, type="generic", mask=None): """Create rectilinear grid. Parameters ---------- id : str Identifier for the grid. lat : (cdms2.TransientAxis, cdms2.FileAxis) Latitude axis used to generate grid. lon : (cdms2.TransientAxis, cdms2.FileAxis) Longitude axis used to generate grid. order : str Order of axis e.g. "yx" mask : (numpy.ndarray) Array containing mask. """ node = cdmsNode.RectGridNode(id, lat.id, lon.id, type, order, mask.id) grid = RectGrid(self, node) grid.initDomain(self.axes, self.variables) self.grids[grid.id] = grid
# self._gridmap_[gridkey] = grid # Create a variable # 'name' is the string name of the Variable # 'datatype' is a CDMS datatype # 'axisnames' is a list of axes or grids # Return a variable object.
[docs] def createVariable(self, name, datatype, axisnames): pass
# Search for a pattern in a string-valued attribute. If attribute is None, # search all string attributes. If tag is 'dataset', just check the dataset, # else check all nodes in the dataset of class type matching the tag. If tag # is None, search the dataset and all objects contained in it.
[docs] def searchPattern(self, pattern, attribute, tag): resultlist = [] if tag is not None: tag = string.lower(tag) if tag in ('dataset', None): if self.searchone(pattern, attribute) == 1: resultlist = [self] else: resultlist = [] if tag is None: for dict in list(self.dictdict.values()): for obj in list(dict.values()): if obj.searchone(pattern, attribute): resultlist.append(obj) elif tag != 'dataset': dict = self.dictdict[tag] for obj in list(dict.values()): if obj.searchone(pattern, attribute): resultlist.append(obj) return resultlist
# Match a pattern in a string-valued attribute. If attribute is None, # search all string attributes. If tag is 'dataset', just check the dataset, # else check all nodes in the dataset of class type matching the tag. If tag # is None, search the dataset and all objects contained in it.
[docs] def matchPattern(self, pattern, attribute, tag): resultlist = [] if tag is not None: tag = string.lower(tag) if tag in ('dataset', None): if self.matchone(pattern, attribute) == 1: resultlist = [self] else: resultlist = [] if tag is None: for dict in list(self.dictdict.values()): for obj in list(dict.values()): if obj.matchone(pattern, attribute): resultlist.append(obj) elif tag != 'dataset': dict = self.dictdict[tag] for obj in list(dict.values()): if obj.matchone(pattern, attribute): resultlist.append(obj) return resultlist
# Apply a predicate, returning a list of all objects in the dataset # for which the predicate is true. The predicate is a function which # takes a dataset as an argument, and returns true or false. If the # tag is 'dataset', the predicate is applied to the dataset only. # If 'variable', 'axis', etc., it is applied only to that type of object # in the dataset. If None, it is applied to all objects, including # the dataset itself.
[docs] def searchPredicate(self, predicate, tag): resultlist = [] if tag is not None: tag = string.lower(tag) if tag in ('dataset', None): try: if predicate(*(self,)) == 1: resultlist.append(self) except AttributeError: pass if tag is None: for dict in list(self.dictdict.values()): for obj in list(dict.values()): try: if predicate(*(obj,)) == 1: resultlist.append(obj) except AttributeError: pass elif tag != "dataset": dict = self.dictdict[tag] for obj in list(dict.values()): try: if predicate(*(obj,)) == 1: resultlist.append(obj) except BaseException: pass return resultlist
# Return a sorted list of all data files associated with the dataset
[docs] def getPaths(self): pathdict = {} for var in list(self.variables.values()): for path, stuple in var.getPaths(): pathdict[path] = 1 result = sorted(list(pathdict.keys())) return result
# Open a data file associated with this dataset. # <filename> is relative to the self.datapath # <mode> is the open mode.
[docs] def openFile(self, filename, mode): # Opened via a local XML file? if self.parent is None: path = os.path.join(self.datapath, filename) if cdmsobj._debug == 1: sys.stdout.write(path + '\n') sys.stdout.flush() f = Cdunif.CdunifFile(path, mode) return f # Opened via a database else: dburls = self.parent.url if not isinstance(dburls, type([])): dburls = [dburls] # Try first to open as a local file for dburl in dburls: if os.path.isabs(self.directory): fileurl = os.path.join(self.directory, filename) else: try: fileurl = os.path.join(dburl, self.datapath, filename) except BaseException: print( 'Error joining', repr(dburl), self.datapath, filename) raise (scheme, netloc, path, parameters, query, fragment) = urlparse(fileurl) if scheme in ['file', ''] and os.path.isfile(path): if cdmsobj._debug == 1: sys.stdout.write(fileurl + '\n') sys.stdout.flush() f = Cdunif.CdunifFile(path, mode) return f # See if request manager is being used for file transfer db = self.parent if db.usingRequestManager(): cache = db.enableCache() lcbase = db.lcBaseDN lcpath = self.getLogicalCollectionDN(lcbase) # File location is logical collection path combined with # relative filename fileDN = (self.uri, filename) path = cache.getFile( filename, fileDN, lcpath=lcpath, userid=db.userid, useReplica=db.useReplica) try: f = Cdunif.CdunifFile(path, mode) except BaseException: # Try again, in case another process clobbered this file path = cache.getFile(fileurl, fileDN) f = Cdunif.CdunifFile(path, mode) return f # Try to read via FTP: for dburl in dburls: fileurl = os.path.join(dburl, self.datapath, filename) (scheme, netloc, path, parameters, query, fragment) = urlparse(fileurl) if scheme == 'ftp': cache = self.parent.enableCache() fileDN = (self.uri, filename) # Global file name path = cache.getFile(fileurl, fileDN) try: f = Cdunif.CdunifFile(path, mode) except BaseException: # Try again, in case another process clobbered this # file path = cache.getFile(fileurl, fileDN) f = Cdunif.CdunifFile(path, mode) return f # File not found raise FileNotFound(filename)
[docs] def getLogicalCollectionDN(self, base=None): """Return the logical collection distinguished name of this dataset. Notes ----- If <base> is defined, append it to the lc name. """ if hasattr(self, "lc"): dn = self.lc else: dn = "lc=%s" % self.id if base is not None: dn = "%s,%s" % (dn, base) return dn
[docs] def getVariable(self, id): """Get the variable object with the given id. Returns ------- None if not found.""" return self.variables.get(id)
[docs] def getVariables(self, spatial=0): """Get a list of variable objects. If spatial=1, only return those axes defined on latitude or longitude, excluding weights and bounds.""" retval = list(self.variables.values()) if spatial: retval = [x for x in retval if x.id[ 0:7] != "bounds_" and x.id[ 0:8] != "weights_" and ( (x.getLatitude() is not None) or ( x.getLongitude() is not None) or ( x.getLevel() is not None))] return retval
[docs] def getAxis(self, id): """Get the axis object with the given id. Returns ------- None if not found.""" return self.axes.get(id)
[docs] def getGrid(self, id): """Get the grid object with the given id. Returns ------- None if not found.""" return self.grids.get(id)
def __repr__(self): return "<Dataset: '%s', URI: '%s', mode: '%s', status: %s>" % ( self.id, self.uri, self.mode, self._status_)
# internattr.add_internal_attribute (Dataset, 'datapath', # 'variables', # 'axes', # 'grids', # 'xlinks', # 'dictdict', # 'default_variable_name', # 'parent', # 'uri', # 'mode')
[docs]class CdmsFile(CdmsObj, cuDataset): def __init__(self, path, mode, hostObj=None, mpiBarrier=False): if mpiBarrier: MPI.COMM_WORLD.Barrier() CdmsObj.__init__(self, None) cuDataset.__init__(self) value = self.__cdms_internals__ + ['datapath', 'variables', 'axes', 'grids', 'xlinks', 'dictdict', 'default_variable_name', 'id', 'uri', 'parent', 'mode'] self.___cdms_internals__ = value self.id = path if "://" in path: self.uri = path else: self.uri = "file://" + os.path.abspath(os.path.expanduser(path)) self._mode_ = mode if mode[0].lower() == "w": try: os.remove(path) except BaseException: pass self._file_ = Cdunif.CdunifFile(path, mode) self.variables = {} self.axes = {} self.grids = {} self.xlinks = {} self._gridmap_ = {} # self.attributes returns the Cdunif file dictionary. # self.replace_external_attributes(self._file_.__dict__) for att in self._file_.__dict__.keys(): self.__dict__.__setitem__(att, self._file_.__dict__[att]) self.attributes[att] = self._file_.__dict__[att] self._boundAxis_ = None # Boundary axis for cell vertices if self._mode_ == 'w': self.Conventions = convention.CFConvention.current self._status_ = 'open' self._convention_ = convention.getDatasetConvention(self) try: # A mosaic variable with coordinates attached, but the coordinate variables reside in a # different file. Add the coordinate variables to the mosaic # variables list. if hostObj is not None: for name in list(self._file_.variables.keys()): if 'coordinates' in dir(self._file_.variables[name]): coords = self._file_.variables[name].coordinates.split( ) for coord in coords: if coord not in list(self._file_.variables.keys()): cdunifvar = Cdunif.CdunifFile( hostObj.gridVars[coord][0], mode) self._file_.variables[coord] = cdunifvar.variables[coord] # Get lists of 1D and auxiliary coordinate axes coords1d = self._convention_.getAxisIds(self._file_.variables) coordsaux = self._convention_.getAxisAuxIds( self._file_.variables, coords1d) # Build variable list for name in list(self._file_.variables.keys()): if name not in coords1d: cdunifvar = self._file_.variables[name] if name in coordsaux: # Put auxiliary coordinate axes with variables, since there may be # a dimension with the same name. if len(cdunifvar.shape) == 2: self.variables[name] = FileAxis2D( self, name, cdunifvar) else: self.variables[name] = FileAuxAxis1D( self, name, cdunifvar) else: self.variables[name] = FileVariable( self, name, cdunifvar) # Build axis list for name in sorted(self._file_.dimensions.keys()): if name in coords1d: cdunifvar = self._file_.variables[name] elif name in coordsaux: cdunifvar = self._file_.variables[name] else: cdunifvar = None self.axes[name] = FileAxis(self, name, cdunifvar) self.axes = OrderedDict(sorted(list(self.axes.items()))) # Attach boundary variables for name in coordsaux: var = self.variables[name] bounds = self._convention_.getVariableBounds(self, var) var.setBounds(bounds) self.dictdict = { 'variable': self.variables, 'axis': self.axes, 'rectGrid': self.grids, 'curveGrid': self.grids, 'genericGrid': self.grids} # Initialize variable domains for var in list(self.variables.values()): var.initDomain(self.axes) # Build grids for var in list(self.variables.values()): # Get grid information for the variable. gridkey has the form # (latname,lonname,order,maskname, abstract_class). gridkey, lat, lon = var.generateGridkey( self._convention_, self.variables) # If the variable is gridded, lookup the grid. If no such grid exists, # create a unique gridname, create the grid, and add to the # gridmap. if gridkey is None: grid = None else: grid = self._gridmap_.get(gridkey) if grid is None: if hasattr(var, 'grid_type'): gridtype = var.grid_type else: gridtype = "generic" candidateBasename = None if gridkey[4] == 'rectGrid': gridshape = (len(lat), len(lon)) elif gridkey[4] == 'curveGrid': gridshape = lat.shape elif gridkey[4] == 'genericGrid': gridshape = lat.shape candidateBasename = 'grid_%d' % gridshape else: gridshape = (len(lat), len(lon)) if candidateBasename is None: candidateBasename = 'grid_%dx%d' % gridshape if candidateBasename not in self.grids: gridname = candidateBasename else: foundname = 0 for i in range(97, 123): # Lower-case letters candidateName = candidateBasename + \ '_' + chr(i) if candidateName not in self.grids: gridname = candidateName foundname = 1 break if not foundname: print( 'Warning: cannot generate a grid for variable', var.id) continue # Create the grid if gridkey[4] == 'rectGrid': grid = FileRectGrid( self, gridname, lat, lon, gridkey[2], gridtype) else: if gridkey[3] != '': if gridkey[3] in self.variables: maskvar = self.variables[gridkey[3]] else: print( 'Warning: mask variable %s not found' % gridkey[3]) maskvar = None else: maskvar = None if gridkey[4] == 'curveGrid': grid = FileCurveGrid( lat, lon, gridname, parent=self, maskvar=maskvar) else: try: grid = FileGenericGrid( lat, lon, gridname, parent=self, maskvar=maskvar) except BaseException: if(lat.rank() == 1 and lon.rank() == 1): grid = FileRectGrid( self, gridname, lat, lon, gridkey[2], gridtype) self.grids[grid.id] = grid self._gridmap_[gridkey] = grid # Set the variable grid var.setGrid(grid) except BaseException: self.close() raise def __enter__(self): return self def __exit__(self, type, value, traceback): if type is None: self.close() else: return False # setattr writes external global attributes to the file def __setattr__(self, name, value): self.__dict__[name] = value # attributes kept in sync w/file if name not in self.__cdms_internals__ and name[0] != '_': setattr(self._file_, name, value) self.attributes[name] = value # getattr reads external global attributes from the file # def __getattr__ (self, name): # g = self.get_property_g(name) # if g is not None: # return g(self, name) # if name in self.__cdms_internals__: # try: # return self.__dict__[name] # except KeyError: # raise AttributeError("%s instance has no attribute %s." % \ # (self.__class__.__name__, name)) # else: # return getattr(self._file_,name) # delattr deletes external global attributes in the file def __delattr__(self, name): try: del self.__dict__[name] except KeyError: raise AttributeError("%s instance has no attribute %s." % (self.__class__.__name__, name)) if name not in self.__cdms_internals__: delattr(self._file_, name) if(name in list(self.attributes.keys())): del(self.attributes[name])
[docs] def sync(self): """ Syncs the file on disk. """ if self._status_ == "closed": raise CDMSError(FileWasClosed + self.id) self._file_.sync()
[docs] def close(self): if self._status_ == "closed": return if hasattr(self, 'dictdict'): for dict in list(self.dictdict.values()): for obj in list(dict.values()): obj.parent = None del obj self.dictdict = self.variables = self.axes = {} self._file_.close() self._status_ = 'closed'
# Note: Removed to allow garbage collection of reference cycles # def __del__(self): # if cdmsobj._debug==1: # print 'Deleting file',self.id # If the object has been deallocated due to open error, # it will not have an attribute .dictdict # if hasattr(self,"dictdict") and self.dictdict != {}: # self.close() # Create an axis # 'name' is the string name of the Axis # 'ar' is the 1-D data array, or None for an unlimited axis # Set unlimited to true to designate the axis as unlimited # Return an axis object.
[docs] def createAxis(self, name, ar, unlimited=0): """ Create an axis. Parameters ---------- name : str is the string name of the Axis ar : numpy.ndarray/None is the 1-D data array, or None for an unlimited axis unlimited : (int/True/False) True/0 designate that the axis as unlimited. Returns ------- an axis object (cdms2.axis.FileAxis). """ if self._status_ == "closed": raise CDMSError(FileWasClosed + self.id) cufile = self._file_ if ar is None or (unlimited == 1 and getNetcdfUseParallelFlag() == 0): cufile.createDimension(str(name), None) if ar is None: typecode = numpy.dtype(numpy.float).char else: typecode = ar.dtype.char else: cufile.createDimension(str(name), len(ar)) typecode = ar.dtype.char # Compatibility: revert to old typecode for cdunif # typecode = typeconv.oldtypecodes[typecode] cuvar = cufile.createVariable(str(name), typecode, (str(name),)) # Cdunif should really create this extra dimension info: # (units,typecode,filename,varname_local,dimension_type,ncid) cufile.dimensioninfo[str(name)] = ('', typecode, str(name), '', 'global', -1) # Note: like netCDF-3, cdunif does not support 64-bit integers. # If ar has dtype int64 on a 64-bit machine, cuvar will be a 32-bit int, # and ar must be downcast. if ar is not None: if ar.dtype.char != 'l': cuvar[0:len(ar)] = numpy.ma.filled(ar) else: cuvar[0:len(ar)] = numpy.ma.filled(ar).astype(cuvar.typecode()) axis = FileAxis(self, name, cuvar) self.axes[name] = axis return axis
[docs] def createVirtualAxis(self, name, axislen): """Create an axis without any associated coordinate array. This axis is read-only. This is useful for the 'bound' axis. Parameters ---------- name : is the string name of the axis. axislen : is the integer length of the axis. Returns ------- axis : file axis whose id is name (cdms2.axis.FileVirtualAxis) Notes ----- For netCDF output, this just creates a dimension without the associated coordinate array. On reads the axis will look like an axis of type float with values [0.0, 1.0, ..., float(axislen-1)]. On write attempts an exception is raised. """ if self._status_ == "closed": raise CDMSError(FileWasClosed + self.id) cufile = self._file_ cufile.createDimension(str(name), axislen) cufile.dimensioninfo[str(name)] = ('', 'f', str(name), '', 'global', -1) axis = FileVirtualAxis(self, str(name), axislen) self.axes[str(name)] = axis return axis
# Copy axis description and data from another axis
[docs] def copyAxis(self, axis, newname=None, unlimited=0, index=None, extbounds=None): """Copy axis description and data from another axis. Parameters ---------- axis : axis to copy (cdms2.axis.FileAxis/cdms2.axis.FileVirtualAxis) newname : (None/str) new name for axis (default None) unlimited : (int/True/False) unlimited dimension (default 0) index : (int/None) (default None) extbounds : (numpy.ndarray) new bounds to use bounds (default None) Returns -------- copy of input axis (cdms2.axis.FileAxis/cdms2.axis.FileVirtualAxis) """ if newname is None: newname = axis.id if len(newname) > 127: msg = "axis name has more than 127 characters, name will be truncated" warnings.warn(msg, UserWarning) newname = newname[:127] if len(newname) > 127 else newname # If the axis already exists and has the same values, return existing if newname in self.axes: newaxis = self.axes[newname] if newaxis.isVirtual(): if len(axis) != len(newaxis): raise DuplicateAxisError(DuplicateAxis + newname) elif unlimited == 0 or (unlimited == 1 and getNetcdfUseParallelFlag() != 0): if len(axis) != len(newaxis) or numpy.alltrue( numpy.less(numpy.absolute(newaxis[:] - axis[:]), 1.e-5)) == 0: raise DuplicateAxisError(DuplicateAxis + newname) else: if index is None: isoverlap, index = isOverlapVector(axis[:], newaxis[:]) else: isoverlap = 1 if isoverlap: self._file_.sync() newaxis[index:index + len(axis)] = axis[:] if extbounds is None: axisBounds = axis.getBounds() else: axisBounds = extbounds if axisBounds is not None: newaxis.setBounds(axisBounds) else: raise DuplicateAxisError(DuplicateAxis + newname) elif axis.isVirtual(): newaxis = self.createVirtualAxis(newname, len(axis)) # Else create the new axis and copy its bounds and metadata else: newaxis = self.createAxis(newname, axis[:], unlimited) bounds = axis.getBounds() if bounds is not None: if hasattr(axis, 'bounds'): boundsid = axis.bounds else: boundsid = None newaxis.setBounds(bounds, persistent=1, boundsid=boundsid) for attname, attval in axis.attributes.items(): if attname not in ["datatype", "id", "length", "isvar", "name_in_file", "partition"]: setattr(newaxis, attname, attval) return newaxis
# Create an implicit rectilinear grid. lat, lon, and mask are objects. # order and type are strings
[docs] def createRectGrid(self, id, lat, lon, order, type="generic", mask=None): """ Create an implicit rectilinear grid. lat, lon, and mask are objects. order and type are strings. Parameters ---------- id : (str) grid name (default 0) lat : (numpy.ndarray) latitude array (default 1) lon : (numpy.ndarray) longitude array (default 2) order : (str) order (default 3) type : (str) grid type (defalut `generic`) mask : (None/numpy.ndarray) mask (default None) Returns ------- grid (cdms2.grid.FileRectGrid) """ grid = FileRectGrid(self, id, lat, lon, order, type, mask) self.grids[grid.id] = grid gridkey = (lat.id, lon.id, order, None) self._gridmap_[gridkey] = grid return grid
# Copy grid
[docs] def copyGrid(self, grid, newname=None): """ Create an implicit rectilinear grid. lat, lon, and mask are objects. Order and type are strings. Parameters ---------- newname : (str/None) new name for grid (default None) grid : file grid (cdms2.grid.FileRectGrid/cdms2.hgrid.FileCurveGrid/cdms2.gengrid.FileGenericGrid) Returns ------- file grid (cdms2.grid.FileRectGrid/cdms2.hgrid.FileCurveGrid/cdms2.gengrid.FileGenericGrid) """ if newname is None: if hasattr(grid, 'id'): newname = grid.id else: newname = 'Grid' oldlat = grid.getLatitude() if not hasattr(oldlat, 'id'): oldlat.id = 'latitude' oldlon = grid.getLongitude() if not hasattr(oldlon, 'id'): oldlon.id = 'longitude' lat = self.copyAxis(oldlat) lat.designateLatitude(persistent=1) lon = self.copyAxis(oldlon) lon.designateLongitude(persistent=1) # If the grid name already exists, and is the same, just return it if newname in self.grids: newgrid = self.grids[newname] newlat = newgrid.getLatitude() newlon = newgrid.getLongitude() if ((newlat is not lat) or (newlon is not lon) or (newgrid.getOrder() != grid.getOrder()) or (newgrid.getType() != grid.getType())): raise DuplicateGrid(newname) # else create a new grid and copy metadata else: newmask = grid.getMask() # Get the mask array newgrid = self.createRectGrid( newname, lat, lon, grid.getOrder(), grid.getType(), None) newgrid.setMask(newmask) # Set the mask array, non-persistently for attname in list(grid.attributes.keys()): setattr(newgrid, attname, getattr(grid, attname)) return newgrid
# Create a variable # 'name' is the string name of the Variable # 'datatype' is a CDMS datatype or numpy typecode # 'axesOrGrids' is a list of axes, grids. (Note: this should be # generalized to allow subintervals of axes and/or grids) # Return a variable object.
[docs] def createVariable(self, name, datatype, axesOrGrids, fill_value=None): """ Create a variable. Parameters ---------- name : The string name of the Variable datatype : A CDMS datatype or numpy typecode axesOrGrids : is a list of axes, grids. fill_value : fill_value (cast into data type). Notes ----- This should be generalized to allow subintervals of axes and/or grids. Returns ------- Return a variable object (cdms2.fvariable.FileVariable. """ if self._status_ == "closed": raise CDMSError(FileWasClosed + self.id) cufile = self._file_ if datatype in CdDatatypes: numericType = cdmsNode.CdToNumericType.get(datatype) else: numericType = datatype # Make a list of names of axes for _Cdunif dimensions = [] for obj in axesOrGrids: if isinstance(obj, FileAxis): dimensions.append(str(obj.id)) elif isinstance(obj, FileRectGrid): dimensions = dimensions + \ [str(obj.getAxis(0).id), str(obj.getAxis(1).id)] else: raise InvalidDomain try: # Compatibility: revert to old typecode for cdunif # numericType = typeconv.oldtypecodes[numericType] numericType = numpy.dtype(numericType).char cuvar = cufile.createVariable(str(name), numericType, tuple(dimensions)) except Exception as err: print(err) raise CDMSError("Creating variable " + name) var = FileVariable(self, name, cuvar) var.initDomain(self.axes) self.variables[name] = var if fill_value is not None: var.setMissing(fill_value) return var
# Search for a pattern in a string-valued attribute. If attribute is None, # search all string attributes. If tag is 'cdmsFile', just check the dataset, # else check all nodes in the dataset of class type matching the tag. If tag # is None, search the dataset and all objects contained in it.
[docs] def searchPattern(self, pattern, attribute, tag): """ Search for a pattern in a string-valued attribute. If attribute is None, search all string attributes. If tag is not None, it must match the internal node tag. Parameters ---------- pattern : expression pattern attribute : attribute name tag : node tag Returns ------- list of match pattern """ resultlist = [] if tag is not None: tag = string.lower(tag) if tag in ('cdmsFile', None, 'dataset'): if self.searchone(pattern, attribute) == 1: resultlist = [self] else: resultlist = [] if tag is None: for dict in list(self.dictdict.values()): for obj in list(dict.values()): if obj.searchone(pattern, attribute): resultlist.append(obj) elif tag not in ('cdmsFile', 'dataset'): dict = self.dictdict[tag] for obj in list(dict.values()): if obj.searchone(pattern, attribute): resultlist.append(obj) return resultlist
# Match a pattern in a string-valued attribute. If attribute is None, # search all string attributes. If tag is 'cdmsFile', just check the dataset, # else check all nodes in the dataset of class type matching the tag. If tag # is None, search the dataset and all objects contained in it.
[docs] def matchPattern(self, pattern, attribute, tag): """ Match for a pattern in a string-valued attribute. If attribute is None, search all string attributes. If tag is not None, it must match the internal node tag. Parameters ---------- pattern : String expression. attribute : Attribute Name. If `None` search all attributre. tag : node tag, if `cdmsFile` only match the current dataset otherwise match all object matching the tag. Returns ------- list of match patterns. """ resultlist = [] if tag is not None: tag = string.lower(tag) if tag in ('cdmsFile', None, 'dataset'): if self.matchone(pattern, attribute) == 1: resultlist = [self] else: resultlist = [] if tag is None: for dict in list(self.dictdict.values()): for obj in list(dict.values()): if obj.matchone(pattern, attribute): resultlist.append(obj) elif tag not in ('cdmsFile', 'dataset'): dict = self.dictdict[tag] for obj in list(dict.values()): if obj.matchone(pattern, attribute): resultlist.append(obj) return resultlist
# Apply a predicate, returning a list of all objects in the dataset # for which the predicate is true. The predicate is a function which # takes a dataset as an argument, and returns true or false. If the # tag is 'cdmsFile', the predicate is applied to the dataset only. # If 'variable', 'axis', etc., it is applied only to that type of object # in the dataset. If None, it is applied to all objects, including # the dataset itself.
[docs] def searchPredicate(self, predicate, tag): """ Apply a truth-valued predicate. Parameters ---------- predicate : function use as predicate tag : node tag. Returns ------- List containing a single instance [self] if the predicate is true and either tag is None or matches the object node tag. Empty list If the predicate returns false. """ resultlist = [] if tag is not None: tag = string.lower(tag) if tag in ('cdmsFile', None, 'dataset'): try: if predicate(*(self,)) == 1: resultlist.append(self) except AttributeError: pass if tag is None: for dict in list(self.dictdict.values()): for obj in list(dict.values()): try: if predicate(*(obj,)) == 1: resultlist.append(obj) except AttributeError: pass elif tag not in ('dataset', 'cdmsFile'): dict = self.dictdict[tag] for obj in list(dict.values()): try: if predicate(*(obj,)) == 1: resultlist.append(obj) except BaseException: pass return resultlist
[docs] def createVariableCopy(self, var, id=None, attributes=None, axes=None, extbounds=None, extend=0, fill_value=None, index=None, newname=None, grid=None): """Define a new variable, with the same axes and attributes as in <var>. Note ---- This function does not copy the data itself. Parameters ---------- var : variable to copy (cdms2.tvariable.TransientVariable or cdms2.fvariable.FileVariable) attributes : A dictionary of attributes. Default is var.attributes. axes : The list of axis objects. Default is var.getAxisList() extbounds : Bounds of the (portion of) the extended dimension being written. id or newname : String identifier of the new variable. extend : * 1 define the first dimension as the unlimited dimension. * 0 do not define an unlimited dimension. The default is the define the first dimension as unlimited only if it is a time dimension. fill_value : The missing value flag. index : The extended dimension index for writting. The default index is determined by lookup relative to the existing extended dimension. grid : The variable grid. `none` the value of var.getGrid() will used. Returns ------- file variable (cdms2.fvariable.FileVariable) """ if newname is None: newname = var.id if id is not None: newname = id if newname in self.variables: raise DuplicateVariable(newname) # Determine the extended axis name if any if axes is None: sourceAxislist = var.getAxisList() else: sourceAxislist = axes if var.rank() == 0: # scalars are not extensible extend = 0 if extend in (1, None): firstAxis = sourceAxislist[0] if firstAxis is not None and (extend == 1 or firstAxis.isTime()): extendedAxis = firstAxis.id else: extendedAxis = None else: extendedAxis = None # Create axes if necessary axislist = [] for axis in sourceAxislist: # classic does not handle int64 data if((axis[:].dtype == numpy.int64) and Cdunif.CdunifGetNCFLAGS("classic")): axis._data_ = numpy.array(axis[:], dtype=numpy.int32) if extendedAxis is None or axis.id != extendedAxis: try: newaxis = self.copyAxis(axis) except DuplicateAxisError: # Create a unique axis name setit = 0 for i in range(97, 123): # Lower-case letters try: newaxis = self.copyAxis( axis, axis.id + '_' + chr(i)) setit = 1 break except DuplicateAxisError: continue if setit == 0: raise DuplicateAxisError(DuplicateAxis + axis.id) else: newaxis = self.copyAxis( axis, unlimited=1, index=index, extbounds=extbounds) axislist.append(newaxis) # Copy variable metadata if attributes is None: attributes = var.attributes try: attributes['missing_value'] = var.missing_value except Exception as err: print(err) pass try: if fill_value is None: if('_FillValue' in attributes.keys()): attributes['_FillValue'] = numpy.array( var._FillValue).astype(var.dtype) attributes['missing_value'] = numpy.array( var._FillValue).astype(var.dtype) if('missing_value' in attributes.keys()): attributes['_FillValue'] = numpy.array( var.missing_value).astype(var.dtype) attributes['missing_value'] = numpy.array( var.missing_value).astype(var.dtype) else: attributes['_FillValue'] = numpy.array( fill_value).astype(var.dtype) attributes['missing_value'] = numpy.array( fill_value).astype(var.dtype) except BaseException: pass if "name" in attributes: if attributes['name'] != var.id: del(attributes['name']) # Create grid as necessary if grid is None: grid = var.getGrid() if grid is not None: coords = grid.writeToFile(self) if coords is not None: coordattr = "%s %s" % (coords[0].id, coords[1].id) if attributes is None: attributes = {'coordinates': coordattr} else: attributes['coordinates'] = coordattr # Create the new variable datatype = cdmsNode.NumericToCdType.get(var.typecode()) newvar = self.createVariable(str(newname), datatype, axislist) for attname, attval in list(attributes.items()): if attname not in ["id", "datatype", "parent"]: if isinstance(attval, string_types): attval = str(attval) setattr(newvar, str(attname), attval) if (attname == "_FillValue") or (attname == "missing_value"): setattr(newvar, "_FillValue", attval) setattr(newvar, "missing_value", attval) if fill_value is not None: newvar.setMissing(fill_value) return newvar
[docs] def write(self, var, attributes=None, axes=None, extbounds=None, id=None, extend=None, fill_value=None, index=None, typecode=None, dtype=None, pack=False): """Write var to the file. Notes ----- If the variable is not yet defined in the file, a definition is created. By default, the time dimension of the variable is defined as the `extended dimension` of the file. The function returns the corresponding file variable. Parameters ---------- var : variable to copy. attributes : The attribute dictionary for the variable. The default is var.attributes. axes : The list of file axes comprising the domain of the variable. The default is to copy var.getAxisList(). extbounds : The extended dimension bounds. Defaults to var.getAxis(0).getBounds(). id : The variable name in the file. Default is var.id. extend : * 1 causes the first dimension to be `extensible` iteratively writeable. The default is None, in which case the first dimension is extensible if it is time. * 0 to turn off this behaviour. fill_value : is the missing value flag. index : The extended dimension index to write to. The default index is determined b lookup relative to the existing extended dimension. dtype : The numpy dtype. typecode : Deprecated, for backward compatibility only Returns ------- File variable """ if _showCompressWarnings: if (Cdunif.CdunifGetNCFLAGS("shuffle") != 0) or (Cdunif.CdunifGetNCFLAGS( "deflate") != 0) or (Cdunif.CdunifGetNCFLAGS("deflate_level") != 0): warnings.warn("Files are written with compression and no shuffling\n" + "You can query different values of compression using the functions:\n" + "cdms2.getNetcdfShuffleFlag() returning 1 if shuffling is enabled, " + "0 otherwise\ncdms2.getNetcdfDeflateFlag() returning 1 if deflate is used, " + "0 otherwise\ncdms2.getNetcdfDeflateLevelFlag() " + "returning the level of compression for the deflate method\n\n" + "If you want to turn that off or set different values of compression " + "use the functions:\nvalue = 0\ncdms2.setNetcdfShuffleFlag(value) " + "## where value is either 0 or 1\ncdms2.setNetcdfDeflateFlag(value) " + "## where value is either 0 or 1\ncdms2.setNetcdfDeflateLevelFlag(value) " + "## where value is a integer between 0 and 9 included\n\nTo " + "produce NetCDF3 Classic files use:\ncdms2.useNetCDF3()\n" + "To Force NetCDF4 output with " + "classic format and no compressing use:\ncdms2.setNetcdf4Flag(1)\n" + "NetCDF4 file with no shuffling or deflate and noclassic will be open " + "for parallel i/o", Warning) # Make var an AbstractVariable if dtype is None and typecode is not None: # dtype = typeconv.convtypecode2(typecode) dtype = typecode typecode = dtype if typecode is not None and var.dtype.char != typecode: var = var.astype(typecode) if var.dtype.char == 'l' and Cdunif.CdunifGetNCFLAGS("classic"): var = var.astype(numpy.int32) if var.dtype.char == 'L' and Cdunif.CdunifGetNCFLAGS("classic"): var = var.astype(numpy.uint32) var = asVariable(var, writeable=0) if fill_value is None and hasattr(var, "fill_value"): fill_value = var.fill_value # Define the variable if necessary. if id is None: varid = var.id else: varid = id if len(varid) > 127: msg = "varid name has more than 127 characters, name will be truncate" warnings.warn(msg, UserWarning) varid = varid[:127] if len(varid) > 127 else varid if varid in self.variables: if pack: raise CDMSError( "You cannot pack an existing variable %s " % varid) v = self.variables[varid] else: if pack is not False: typ = numpy.int16 n = 16 else: typ = var.dtype v = self.createVariableCopy(var.astype(typ), attributes=attributes, axes=axes, extbounds=extbounds, id=varid, extend=extend, fill_value=fill_value, index=index) # If var has typecode numpy.int, and v is created from var, then v will have # typecode numpy.int32. (This is a Cdunif 'feature'). This causes a downcast error # for numpy versions 23+, so make the downcast explicit. if var.typecode() == numpy.int and v.typecode() == numpy.int32 and pack is False: var = var.astype(numpy.int32) # Write if axes is None: sourceAxislist = var.getAxisList() else: sourceAxislist = axes vrank = var.rank() if vrank == 0: # scalars are not extensible extend = 0 else: vec1 = sourceAxislist[0] if extend == 0 or (extend is None and not vec1.isTime()): if vrank > 0: if pack is not False: v[:] = numpy.zeros(var.shape, typ) else: v[:] = var.astype(v.dtype) else: v.assignValue(var.getValue()) else: # Determine if the first dimension of var overlaps the first # dimension of v vec2 = v.getAxis(0) if extbounds is None: bounds1 = vec1.getBounds() else: bounds1 = extbounds if index is None: isoverlap, index = isOverlapVector(vec1[:], vec2[:]) else: isoverlap = 1 if isoverlap == 1: # Make sure file is up to date before copying. # user could have extended the file previously. self.sync() v[index:index + len(vec1)] = var.astype(v.dtype) vec2[index:index + len(vec1)] = vec1[:].astype(vec2[:].dtype) if bounds1 is not None: vec2.setBounds(bounds1, persistent=1, index=index) else: msg1 = "Cannot write variable {v_id}: ".format(v_id=varid) msg2 = "the values of dimension {id}={val}".format(id=vec1.id, val=repr(vec1[:])) msg3 = "do not overlap the extended dimension {id} values: {val}".format(id=vec2.id, val=repr(vec2[:])) raise CDMSError("{m1} {m2} {m3}".format(m1=msg1, m2=msg2, m3=msg3)) # pack implementation source: # https://www.unidata.ucar.edu/software/netcdf/docs/BestPractices.html if pack: M = var.max() m = var.min() scale_factor = (M - m) / (pow(2, n) - 2) add_offset = (M + m) / 2. v.setMissing(-pow(2, n - 1)) scale_factor = scale_factor.astype(var.dtype) add_offset = add_offset.astype(var.dtype) tmp = (var - add_offset) / scale_factor tmp = numpy.round(tmp) tmp = tmp.astype(typ) v[:] = tmp.filled() v.scale_factor = scale_factor.astype(var.dtype) v.add_offset = add_offset.astype(var.dtype) if not hasattr(var, "valid_min"): v.valid_min = m.astype(var.dtype) if not hasattr(var, "valid_max"): v.valid_max = M.astype(var.dtype) return v
[docs] def write_it_yourself(self, obj): """Tell obj to write itself to self (already open for writing), using its writeg method (AbstractCurveGrid has such a method, for example). Notes ----- If `writeg` is not available, writeToFile will be used. If `writeToFile` is also not available, then `self.write(obj)` will be called to try to write obj as a variable. Parameters ---------- obj : object containing `writeg`, `writeToFile` or `write` method. Returns ------- Nothing is returned. """ # This method was formerly called writeg and just wrote an # AbstractCurveGrid. if (hasattr(obj, 'writeg') and callable(getattr(obj, 'writeg'))): obj.writeg(self) elif (hasattr(obj, 'writeToFile') and callable(getattr(obj, 'writeToFile'))): obj.writeToFile(self) else: self.write(obj)
[docs] def getVariable(self, id): """ Get the variable object with the given id. Returns None if not found. Parameters ---------- id : str id of the variable to get Returns ------- variable (cdms2.fvariable.FileVariable/None) file variable """ return self.variables.get(id)
[docs] def getVariables(self, spatial=0): """Get a list of variable objects. Parameters ---------- spatial : If spatial=1 or True, only return those axes defined on latitude or longitude, excluding weights and bounds Returns ------- file variable. """ retval = list(self.variables.values()) if spatial: retval = [x for x in retval if x.id[ 0:7] != "bounds_" and x.id[ 0:8] != "weights_" and ( (x.getLatitude() is not None) or ( x.getLongitude() is not None) or ( x.getLevel() is not None))] return retval
[docs] def getAxis(self, id): """Get the axis object with the given id. Returns None if not found. Parameters ---------- id : id of the axis to get Returns -------- file axis """ return self.axes.get(id)
[docs] def getGrid(self, id): """ Get the grid object with the given id. Returns None if not found. Parameters ---------- id : id of the grid to get Returns ------- file axis """ return self.grids.get(id)
[docs] def getBoundsAxis(self, n, boundid=None): """Get a bounds axis of length n. Create the bounds axis if necessary. Parameters ---------- n : bound id (bound_%d) Returns ------- bounds axis """ if boundid is None: if n == 2: boundid = "bound" else: boundid = "bound_%d" % n if boundid in self.axes: boundaxis = self.axes[boundid] else: boundaxis = self.createVirtualAxis(boundid, n) return boundaxis
def __repr__(self): filerep = repr(self._file_) loc = filerep.find("file") if loc == -1: loc = 0 return "<CDMS " + filerep[loc:-1] + ", status: %s>" % self._status_
# internattr.add_internal_attribute (CdmsFile, 'datapath', # 'variables', # 'axes', # 'grids', # 'xlinks', # 'dictdict', # 'default_variable_name', # 'id', # 'parent', # 'mode')