Source code for cdms2.variable

# Automatically adapted for numpy.oldnumeric Aug 01, 2007 by

"""
DatasetVariable: Dataset-based variables
"""
# from cdms2 import Cdunif
import numpy
from . import cdmsNode
import cdtime
import copy
# import os
import string
# import sys
# import types
# from . import cdmsobj
from .cdmsobj import getPathFromTemplate, Max32int
from .avariable import AbstractVariable
from .sliceut import slicePartition, sliceIntersect, reverseSlice, lenSlice
from .error import CDMSError

InvalidGridElement = "Grid domain elements are not yet implemented: "
InvalidRegion = "Invalid region: "
NoSuchAxisOrGrid = "No such axis or grid: "
OutOfRange = "Coordinate interval is out of range: "
TooManyPartitions = "Variable has too many partitioned axes, max is two: "
WriteNotImplemented = "Dataset write operation not implemented"
FileClosed = "Cannot read from closed file or dataset, variable: "


[docs]def timeindex(value, units, basetime, delta, delunits, calendar): """ Calculate (t - basetime)/delu Parameters ---------- where t : = reltime(value, units) and delu : is the time interval (delta, delunits) (e.g., 1 month). """ tval = cdtime.reltime(value, units) tounits = "%s since %s" % (delunits, basetime) newval = tval.torel(tounits, calendar) return int(newval.value / delta)
[docs]class DatasetVariable(AbstractVariable): """Variable (parent, variableNode=None) Parameters ---------- variableNode : is the variable tree node, if any. parent : is the containing dataset instance. """
[docs] def __init__(self, parent, id, variableNode=None): """ """ AbstractVariable.__init__(self, parent, variableNode) val = self.__cdms_internals__ + ['domain', 'name_in_file'] self.___cdms_internals__ = val self.id = id self.domain = [] # Get self.name_in_file from the .xml file if present if not hasattr(self, 'name_in_file'): self.name_in_file = id # if self.attributes.has_key('name_in_file'): # self.name_in_file = self.attributes['name_in_file'] if variableNode is not None: self._numericType_ = cdmsNode.CdToNumericType.get( variableNode.datatype) else: self._numericType_ = numpy.float assert self.id is not None
[docs] def __len__(self): "Length of first dimension" if len(self.domain) > 0: (axis, start, length, true_length) = self.domain[0] else: length = 0 return length
# def __repr__(self): # if self.parent is not None: # parentid = self.parent.id # else: # parentid = "**CLOSED**" # return "<Variable: %s, dataset: %s, shape: %s>"%(self.id, parentid, # `self.shape`) def __getitem__(self, key): if self.parent is None: raise CDMSError(FileClosed + str(self.id)) return AbstractVariable.__getitem__(self, key)
[docs] def getValue(self, squeeze=1): """Return the entire set of values.""" if self.parent is None: raise CDMSError(FileClosed + self.id) return self.getSlice(Ellipsis, squeeze=squeeze)
def __getslice__(self, low, high): if self.parent is None: raise CDMSError(FileClosed + self.id) # Hack to prevent netCDF overflow error on 64-bit architectures high = min(Max32int, high) return AbstractVariable.__getslice__(self, low, high) def __setitem__(self, index, value): raise CDMSError(WriteNotImplemented) def __setslice__(self, low, high, value): raise CDMSError(WriteNotImplemented) def _getShape(self): return self.getShape() def _getdtype(self): tc = self.typecode() return numpy.dtype(tc)
[docs] def getShape(self): shape = [] for (axis, start, length, true_length) in self.domain: shape.append(length) return tuple(shape)
[docs] def typecode(self): return numpy.dtype(self._numericType_).char
[docs] def size(self): "Number of elements." n = 1 for k in self.shape: n = k * n return n
[docs] def initDomain(self, axisdict, griddict): "Must be called by whoever made this Variable to set up axes, grids." self.domain = [] domnode = self._node_.getDomain() for denode in domnode.children(): dename = denode.getName() domelem = axisdict.get(dename) if domelem is None: domelem = griddict.get(dename) # if grid is None: # raise CDMSError(NoSuchAxisOrGrid + dename) # else: # raise CDMSError(InvalidGridElement + dename) partlenstr = denode.getExternalAttr('partition_length') if partlenstr is not None: truelen = int(partlenstr) else: truelen = denode.length self.domain.append((domelem, denode.start, denode.length, truelen))
# Get the template
[docs] def getTemplate(self): if hasattr(self, 'template'): template = self.template elif hasattr(self.parent, 'template'): template = self.parent.template else: template = None return template
[docs] def getAxis(self, n): if n < 0: n = n + self.rank() return self.domain[n][0]
[docs] def getDomain(self): return self.domain
# Get the paths associated with the interval region specified # by 'intervals'. This incorporates most of the logic of __getitem__, # without actually reading the data. # # 'specs' is a list of interval range specifications as defined # for getSlice. # # The function returns a list of tuples of the form (path,slicelist), # where path is the path of a file, and slicetuple is a tuple of # slices, of the same length as the rank of the variable, representing the # region of the variable which is contained in the file. The following # would retrieve the data for that file: # # f = Cdunif.CdunifFile(path,'r') # var = f.variables[self.name_in_file] # data = apply(var.getitem,slicelist) #
[docs] def getPaths(self, *specs, **keys): # Create an equivalent list of slices speclist = self._process_specs(specs, keys) slicelist = self.specs2slices(speclist) print(slicelist) # Generate the filelist npart, idims, partitionSlices = self.expertPaths(slicelist) # Flatten the list result = [] if partitionSlices is None: pass elif npart == 0: filename, slicelist = partitionSlices if filename is not None: result.append((filename, tuple(slicelist))) elif npart == 1: for filename, slicelist in partitionSlices: if filename is not None: result.append((filename, tuple(slicelist))) elif npart == 2: for filelist in partitionSlices: for filename, slicelist in filelist: if filename is not None: result.append((filename, tuple(slicelist))) return result
[docs] def genMatch(self, axis, interval, matchnames): """Helper function for expertPaths. Parameters ---------- axis : is a partitioned axis, either time or vertical level or forecast. interval : is an index interval (istart, iend). matchnames : is a partially filled list [id, timestart, timeend, levstart, levend, fc] If a filemap is used, matchnames has indices, otherwise has coordinates. Function : modifies matchnames based on axis and interval, returns the modified matchnames tuple. """ if axis.isTime(): if hasattr(self.parent, 'cdms_filemap'): start = interval[0] end = interval[1] else: # Use template method time0 = axis[interval[0]] time1 = axis[interval[1] - 1] isabs = (string.find(axis.units, " as ") != -1) if isabs: start = cdtime.abstime(time0, axis.units) end = cdtime.abstime(time1, axis.units) else: cal = axis.getCalendar() start = cdtime.reltime(time0, axis.units).tocomp(cal) end = cdtime.reltime(time1, axis.units).tocomp(cal) matchnames[1] = start matchnames[2] = end elif axis.isForecast(): start = axis.getValue()[interval[0]] end = axis.getValue()[interval[1] - 1] matchnames[5] = start matchnames[6] = end else: if hasattr(self.parent, 'cdms_filemap'): start = interval[0] end = interval[1] else: start = int(axis[interval[0]]) end = int(axis[interval[1] - 1]) matchnames[3] = start matchnames[4] = end return matchnames
[docs] def getFilePath(self, matchnames, template): """Lookup or generate the file path, depending on whether a filemap or template is present. """ if hasattr(self.parent, 'cdms_filemap'): id, tstart, tend, levstart, levend, fcstart, fcend = matchnames filename = self.parent._filemap_[ (self.id, tstart, levstart, fcstart)] # ... filemap uses dataset IDs else: filename = getPathFromTemplate(template, matchnames) return filename
[docs] def getPartition(self, axis): """Get the partition attribute for this variable, axis. Parameters ---------- axis : is either a time or level axis. If cdms_filemap is being used, get the partition from the _varpart_ attribute, otherwise (for templating) use axis.partition. """ if hasattr(self.parent, 'cdms_filemap'): if axis.isTime(): partition = self._varpart_[0] elif axis.isForecast(): partition = axis.partition else: # level partition = self._varpart_[1] else: # Template method partition = axis.partition return partition
[docs] def expertPaths(self, slist): """ Expert Paths Parameters ---------- expertPaths : (self, slicelist) takes a list of slices Returns ------- a 3-tuple (npart, dimensionlist, partitionSlices) Where npart : is the number of partitioned dimensions: 0, 1, or 2; dimensionlist : is a tuple of length npart, having the dimension numbers of the partitioned dimensions; partitionSlices : is the list of file-specific (filename, slice) corresponding to the paths and slices within the files to be read. The exact form of partitionSlices depends on the value of npart npart : partitionSlices 0 : (filename,slicelist) 1 : [(filename,slicelist),...,(filename,slicelist)] 2 : [[(filename,slicelist),...,(filename,slicelist)] [(filename,slicelist),...,(filename,slicelist)] [(filename,slicelist),...,(filename,slicelist)]] Notes ----- - A filename of None indicates that no file was found with data corresponding to the slicelist. - If partitionSlices is None, the slicelist does not intersect the domain. - An empty partitionSlices [] means that the variable is zero-dimensional. """ # slicelist gets modified, slist doesn't slicelist = copy.copy(slist) template = self.getTemplate() # Use the name_in_file attribute to access files if hasattr(self, 'name_in_file'): realid = self.name_in_file else: realid = self.id # Handle rank-0 variables separately if self.rank() == 0: matchnames = [realid, None, None, None, None, None, None] filename = self.getFilePath(matchnames, template) result = (0, (), (filename, [])) return result # Find the number of partitioned axes npart = 0 ndim = 0 for (axis, start, length, true_length) in self.domain: if hasattr(axis, 'partition'): npart = npart + 1 if npart == 1: # part1 = axis npart1 = ndim elif npart == 2: # part2 = axis npart2 = ndim else: raise CDMSError(TooManyPartitions + self.id) ndim = ndim + 1 # If no partitioned axes, just read the data if npart == 0: matchnames = [realid, None, None, None, None, None, None] filename = self.getFilePath(matchnames, template) result = (0, (), (filename, slicelist)) # If one partitioned axes: elif npart == 1: # intersect the slice and partition for that axis slice1 = slicelist[npart1] (axis, startelem, length, true_length) = self.domain[npart1] partition = slicePartition(slice1, self.getPartition(axis)) if partition == []: return (1, (npart1,), None) # For each (interval, partslice) in the partition: resultlist = [] (firstinterval, firstslice) = partition[0] prevhigh = firstinterval[0] for (interval, partslice) in partition: # If the previous interval high is less than # the current interval low value, interpose # missing data. low = interval[0] if prevhigh < low: missing_interval = (prevhigh, low) missing_slice = sliceIntersect(slice1, missing_interval) # Note: if the slice has a stride>1, it might not intersect, # so don't interpose missing data in this case. if missing_slice is not None: slicelist[npart1] = missing_slice resultlist.append((None, copy.copy(slicelist))) prevhigh = interval[1] # generate the filename matchnames = [realid, None, None, None, None, None, None] matchnames = self.genMatch(axis, interval, matchnames) filename = self.getFilePath(matchnames, template) # adjust the partslice for the interval offset # and replace in the slice list filestart = partslice.start - interval[0] filestop = partslice.stop - interval[0] fileslice = slice(filestart, filestop, partslice.step) slicelist[npart1] = fileslice resultlist.append((filename, copy.copy(slicelist))) result = (1, (npart1,), resultlist) # If two partitioned axes, 2-D version of previous case if npart == 2: slice1 = slicelist[npart1] slice2 = slicelist[npart2] (axis1, startelem1, length1, true_length1) = self.domain[npart1] (axis2, startelem2, length2, true_length2) = self.domain[npart2] partition1 = slicePartition(slice1, self.getPartition(axis1)) partition2 = slicePartition(slice2, self.getPartition(axis2)) if partition1 == [] or partition2 == []: return (2, (npart1, npart2), None) # For each (interval, partslice) in the partition: resultlist = [] (firstinterval1, firstslice1) = partition1[0] prevhigh1 = firstinterval1[0] for (interval1, partslice1) in partition1: # If the previous interval high is less than # the current interval low value, interpose # missing data. low = interval1[0] if prevhigh1 < low: missing_interval = (prevhigh1, low) missing_slice = sliceIntersect(slice1, missing_interval) if missing_slice is not None: slicelist[npart1] = missing_slice resultlist.append([(None, copy.copy(slicelist))]) prevhigh1 = interval1[1] # generate matchnames matchnames = [realid, None, None, None, None, None, None] matchnames = self.genMatch(axis1, interval1, matchnames) # adjust the partslice for the interval offset # and replace in the slice list filestart = partslice1.start - interval1[0] filestop = partslice1.stop - interval1[0] fileslice = slice(filestart, filestop, partslice1.step) slicelist[npart1] = fileslice chunklist = [] (firstinterval2, firstslice2) = partition2[0] prevhigh2 = firstinterval2[0] for (interval2, partslice2) in partition2: # If the previous interval high is less than # the current interval low value, interpose # missing data. low = interval2[0] if prevhigh2 < low: missing_interval = (prevhigh2, low) missing_slice = sliceIntersect( slice1, missing_interval) if missing_slice is not None: slicelist[npart2] = missing_slice chunklist.append((None, copy.copy(slicelist))) prevhigh2 = interval2[1] # generate the filename matchnames = self.genMatch(axis2, interval2, matchnames) filename = self.getFilePath(matchnames, template) filestart = partslice2.start - interval2[0] filestop = partslice2.stop - interval2[0] fileslice = slice(filestart, filestop, partslice2.step) slicelist[npart2] = fileslice chunklist.append((filename, copy.copy(slicelist))) resultlist.append(chunklist) result = (2, (npart1, npart2), resultlist) return result
[docs] def expertSlice(self, initslist): # Handle negative slices revlist = [] # Slices to apply to result if reversals needed slist = [] # Slices with positive strides haveReversals = 0 # True iff result array needs reversing i = 0 for s in initslist: if s.step < 0: axislen = self.shape[i] slist.append(reverseSlice(s, axislen)) revlist.append(slice(None, None, -1)) haveReversals = 1 else: slist.append(s) revlist.append(slice(None, None, 1)) i += 1 # This does most of the work npart, idims, partitionSlices = self.expertPaths(slist) # If the dataset includes a forecast axis, find it now, as well # as this slice's corresponding index in that direction. fci = None for i in range(len(self.domain)): if self.domain[i][0].isForecast(): fci = i # fcv = initslist[i].start break # If no intersection, return an 'empty' array. if partitionSlices is None: return numpy.ma.zeros((0,), self._numericType_) # Handle rank-0 variables separately if self.rank() == 0: filename, dumlist = partitionSlices f = self.parent.openFile(filename, 'r') try: var = f.variables[self.name_in_file] result = var.getValue() finally: f.close() return result # If no partitioned axes, just read the data if npart == 0: filename, slicelist = partitionSlices f = self.parent.openFile(filename, 'r') try: var = f.variables[self.name_in_file] if fci is None: result = self._returnArray( var.getitem(*tuple(slicelist)), 0) else: # If there's a forecast axis, the file doesn't know about it so # don't use it in slicing data out of the file. result = self._returnArray(var.getitem(*tuple(slicelist[0:fci] + slicelist[fci + 1:])), 0) # But the result still needs an index in the forecast direction, # which is simple to do because there is only one forecast # per file: result.resize(list(map(lenSlice, slicelist))) finally: f.close() sh = result.shape if 0 in sh: raise CDMSError(IndexError + 'Coordinates out of Domain') # If one partitioned axes: elif npart == 1: npart1 = idims[0] resultlist = [] for filename, slicelist in partitionSlices: # If the slice is missing, interpose missing data if filename is None: shapelist = list(map(lenSlice, slicelist)) chunk = numpy.ma.zeros( tuple(shapelist), self._numericType_) chunk[...] = numpy.ma.masked # else read the data and close the file else: f = self.parent.openFile(filename, 'r') try: var = f.variables[self.name_in_file] if fci is None: chunk = var.getitem(*tuple(slicelist)) else: # If there's a forecast axis, the file doesn't know about it so # don't use it in slicing data out of the file. chunk = var.getitem( *tuple(slicelist[0:fci] + slicelist[fci + 1:])) # But the chunk still needs an index in the forecast direction, # which is simple to do because there is only one # forecast per file: chunk.resize(list(map(lenSlice, slicelist))) finally: f.close() sh = chunk.shape if 0 in sh: raise CDMSError('Coordinates out of Domain') resultlist.append(self._returnArray(chunk, 0)) # Combine the chunks into a single array # Note: This works because slicelist is the same length # as the domain, and var.getitem returns a chunk # with singleton dimensions included. This means that # npart1 corresponds to the correct dimension of chunk. result = numpy.ma.concatenate(resultlist, axis=npart1) for chunk in resultlist: del(chunk) # If two partitioned axes, 2-D version of previous case if npart == 2: npart1, npart2 = idims resultlist = [] for filelist in partitionSlices: chunklist = [] for filename, slicelist in filelist: # If the slice is missing, interpose missing data if filename is None: shapelist = list(map(lenSlice, slicelist)) chunk = numpy.ma.zeros( tuple(shapelist), self._numericType_) chunk[...] = numpy.ma.masked # else read the data and close the file else: f = self.parent.openFile(filename, 'r') try: var = f.variables[self.name_in_file] if fci is None: chunk = var.getitem(*tuple(slicelist)) else: # If there's a forecast axis, the file doesn't know about it so # don't use it in slicing data out of the file. chunk = var.getitem( *tuple(slicelist[0:fci] + slicelist[fci + 1:])) # But the chunk still needs an index in the forecast direction, # which is simple to do because there is only # one forecast per file: chunk.resize(list(map(lenSlice, slicelist))) finally: f.close() sh = chunk.shape if 0 in sh: raise CDMSError('Coordinates out of Domain') chunklist.append(self._returnArray(chunk, 0)) # Note: This works because slicelist is the same length # as the domain, and var.getitem returns a chunk # with singleton dimensions included. This means that # npart1 corresponds to the correct dimension of chunk. bigchunk = numpy.ma.concatenate(chunklist, axis=npart2) for chunk in chunklist: del(chunk) resultlist.append(bigchunk) result = numpy.ma.concatenate(resultlist, axis=npart1) for bigchunk in resultlist: del(bigchunk) # If slices with negative strides were input, apply the appropriate # reversals. if haveReversals: result = result[revlist] return result
shape = property(_getShape, None) # shape = _getShape dtype = property(_getdtype, None)
# PropertiedClasses.set_property (DatasetVariable, 'shape', # DatasetVariable._getShape, nowrite=1, # nodelete=1) # PropertiedClasses.set_property (DatasetVariable, 'dtype', # DatasetVariable._getdtype, nowrite=1, # nodelete=1) # internattr.add_internal_attribute(DatasetVariable, 'domain')