Source code for cdms2.variable

# Automatically adapted for numpy.oldnumeric Aug 01, 2007 by

"""
DatasetVariable: Dataset-based variables
"""
# from cdms2 import Cdunif
import numpy
from . import cdmsNode
import cdtime
import copy
# import os
import string
# import sys
# import types
# from . import cdmsobj
from .cdmsobj import getPathFromTemplate, Max32int
from .avariable import AbstractVariable
from .sliceut import slicePartition, sliceIntersect, reverseSlice, lenSlice
from .error import CDMSError

InvalidGridElement = "Grid domain elements are not yet implemented: "
InvalidRegion = "Invalid region: "
NoSuchAxisOrGrid = "No such axis or grid: "
OutOfRange = "Coordinate interval is out of range: "
TooManyPartitions = "Variable has too many partitioned axes, max is two: "
WriteNotImplemented = "Dataset write operation not implemented"
FileClosed = "Cannot read from closed file or dataset, variable: "


[docs]def timeindex(value, units, basetime, delta, delunits, calendar):
    """ Calculate (t - basetime)/delu

    Parameters
    ----------
    where t :  = reltime(value, units) and

    delu :  is the time interval (delta, delunits) (e.g., 1 month).
    """
    tval = cdtime.reltime(value, units)
    tounits = "%s since %s" % (delunits, basetime)
    newval = tval.torel(tounits, calendar)
    return int(newval.value / delta)


[docs]class DatasetVariable(AbstractVariable):
    """Variable (parent, variableNode=None)

       Parameters
       ----------
       variableNode : is the variable tree node, if any.

       parent : is the containing dataset instance.

    """

[docs]    def __init__(self, parent, id, variableNode=None):
        """
        """
        AbstractVariable.__init__(self, parent, variableNode)
        val = self.__cdms_internals__ + ['domain', 'name_in_file']
        self.___cdms_internals__ = val
        self.id = id
        self.domain = []
        # Get self.name_in_file from the .xml file if present
        if not hasattr(self, 'name_in_file'):
            self.name_in_file = id

        # if self.attributes.has_key('name_in_file'):
        #     self.name_in_file = self.attributes['name_in_file']
        if variableNode is not None:
            self._numericType_ = cdmsNode.CdToNumericType.get(
                variableNode.datatype)
        else:
            self._numericType_ = numpy.float
        assert self.id is not None

[docs]    def __len__(self):
        "Length of first dimension"
        if len(self.domain) > 0:
            (axis, start, length, true_length) = self.domain[0]
        else:
            length = 0

        return length

#    def __repr__(self):
#        if self.parent is not None:
#            parentid = self.parent.id
#        else:
#            parentid = "**CLOSED**"
# return "<Variable: %s, dataset: %s, shape: %s>"%(self.id, parentid,
# `self.shape`)

    def __getitem__(self, key):
        if self.parent is None:
            raise CDMSError(FileClosed + str(self.id))
        return AbstractVariable.__getitem__(self, key)

[docs]    def getValue(self, squeeze=1):
        """Return the entire set of values."""
        if self.parent is None:
            raise CDMSError(FileClosed + self.id)
        return self.getSlice(Ellipsis, squeeze=squeeze)

    def __getslice__(self, low, high):
        if self.parent is None:
            raise CDMSError(FileClosed + self.id)

        # Hack to prevent netCDF overflow error on 64-bit architectures
        high = min(Max32int, high)

        return AbstractVariable.__getslice__(self, low, high)

    def __setitem__(self, index, value):
        raise CDMSError(WriteNotImplemented)

    def __setslice__(self, low, high, value):
        raise CDMSError(WriteNotImplemented)

    def _getShape(self):
        return self.getShape()

    def _getdtype(self):
        tc = self.typecode()
        return numpy.dtype(tc)

[docs]    def getShape(self):
        shape = []
        for (axis, start, length, true_length) in self.domain:
            shape.append(length)
        return tuple(shape)

[docs]    def typecode(self):
        return numpy.dtype(self._numericType_).char

[docs]    def size(self):
        "Number of elements."
        n = 1
        for k in self.shape:
            n = k * n
        return n

[docs]    def initDomain(self, axisdict, griddict):
        "Must be called by whoever made this Variable to set up axes, grids."
        self.domain = []
        domnode = self._node_.getDomain()
        for denode in domnode.children():
            dename = denode.getName()
            domelem = axisdict.get(dename)
            if domelem is None:
                domelem = griddict.get(dename)
#                if grid is None:
#                    raise CDMSError(NoSuchAxisOrGrid + dename)
#                else:
#                    raise CDMSError(InvalidGridElement + dename)
            partlenstr = denode.getExternalAttr('partition_length')
            if partlenstr is not None:
                truelen = int(partlenstr)
            else:
                truelen = denode.length
            self.domain.append((domelem, denode.start, denode.length, truelen))

    # Get the template
[docs]    def getTemplate(self):
        if hasattr(self, 'template'):
            template = self.template
        elif hasattr(self.parent, 'template'):
            template = self.parent.template
        else:
            template = None
        return template

[docs]    def getAxis(self, n):
        if n < 0:
            n = n + self.rank()
        return self.domain[n][0]

[docs]    def getDomain(self):
        return self.domain

    # Get the paths associated with the interval region specified
    # by 'intervals'. This incorporates most of the logic of __getitem__,
    # without actually reading the data.
    #
    # 'specs' is a list of interval range specifications as defined
    # for getSlice.
    #
    # The function returns a list of tuples of the form (path,slicelist),
    # where path is the path of a file, and slicetuple is a tuple of
    # slices, of the same length as the rank of the variable, representing the
    # region of the variable which is contained in the file. The following
    # would retrieve the data for that file:
    #
    #   f = Cdunif.CdunifFile(path,'r')
    #   var = f.variables[self.name_in_file]
    #   data = apply(var.getitem,slicelist)
    #
[docs]    def getPaths(self, *specs, **keys):

        # Create an equivalent list of slices
        speclist = self._process_specs(specs, keys)
        slicelist = self.specs2slices(speclist)

        print(slicelist)
        # Generate the filelist
        npart, idims, partitionSlices = self.expertPaths(slicelist)

        # Flatten the list
        result = []
        if partitionSlices is None:
            pass
        elif npart == 0:
            filename, slicelist = partitionSlices
            if filename is not None:
                result.append((filename, tuple(slicelist)))
        elif npart == 1:
            for filename, slicelist in partitionSlices:
                if filename is not None:
                    result.append((filename, tuple(slicelist)))
        elif npart == 2:
            for filelist in partitionSlices:
                for filename, slicelist in filelist:
                    if filename is not None:
                        result.append((filename, tuple(slicelist)))

        return result

[docs]    def genMatch(self, axis, interval, matchnames):
        """Helper function for expertPaths.

        Parameters
        ----------

        axis : is a partitioned axis, either time or vertical level or forecast.

        interval : is an index interval (istart, iend).

        matchnames : is a partially filled list [id, timestart, timeend, levstart, levend, fc] If a filemap
                is used, matchnames has indices, otherwise has coordinates.

        Function : modifies matchnames based on axis and interval, returns the modified matchnames tuple.
        """
        if axis.isTime():
            if hasattr(self.parent, 'cdms_filemap'):
                start = interval[0]
                end = interval[1]
            else:                       # Use template method
                time0 = axis[interval[0]]
                time1 = axis[interval[1] - 1]
                isabs = (string.find(axis.units, " as ") != -1)
                if isabs:
                    start = cdtime.abstime(time0, axis.units)
                    end = cdtime.abstime(time1, axis.units)
                else:
                    cal = axis.getCalendar()
                    start = cdtime.reltime(time0, axis.units).tocomp(cal)
                    end = cdtime.reltime(time1, axis.units).tocomp(cal)
            matchnames[1] = start
            matchnames[2] = end
        elif axis.isForecast():
            start = axis.getValue()[interval[0]]
            end = axis.getValue()[interval[1] - 1]
            matchnames[5] = start
            matchnames[6] = end
        else:
            if hasattr(self.parent, 'cdms_filemap'):
                start = interval[0]
                end = interval[1]
            else:
                start = int(axis[interval[0]])
                end = int(axis[interval[1] - 1])
            matchnames[3] = start
            matchnames[4] = end

        return matchnames

[docs]    def getFilePath(self, matchnames, template):
        """Lookup or generate the file path, depending on whether a filemap
        or template is present.
        """
        if hasattr(self.parent, 'cdms_filemap'):
            id, tstart, tend, levstart, levend, fcstart, fcend = matchnames
            filename = self.parent._filemap_[
                (self.id, tstart, levstart, fcstart)]
            # ... filemap uses dataset IDs
        else:
            filename = getPathFromTemplate(template, matchnames)
        return filename

[docs]    def getPartition(self, axis):
        """Get the partition attribute for this variable, axis.

        Parameters
        ----------

        axis : is either a time or level axis. If cdms_filemap is being used, get the
              partition from the _varpart_ attribute, otherwise (for templating) use
              axis.partition.
        """
        if hasattr(self.parent, 'cdms_filemap'):
            if axis.isTime():
                partition = self._varpart_[0]
            elif axis.isForecast():
                partition = axis.partition
            else:                  # level
                partition = self._varpart_[1]
        else:                           # Template method
            partition = axis.partition
        return partition

[docs]    def expertPaths(self, slist):
        """
        Expert Paths

        Parameters
        ----------

        expertPaths : (self, slicelist) takes a list of slices


        Returns
        -------
        a 3-tuple (npart, dimensionlist, partitionSlices)

        Where
        npart : is the number of partitioned dimensions: 0, 1, or 2;
        dimensionlist : is a tuple of length npart, having the dimension numbers of the partitioned dimensions;
        partitionSlices : is the list of file-specific (filename, slice) corresponding
        to the paths and slices within the files to be read.

        The exact form of partitionSlices depends on the value of npart

        npart : partitionSlices

        0 : (filename,slicelist)
        1 : [(filename,slicelist),...,(filename,slicelist)]
        2 : [[(filename,slicelist),...,(filename,slicelist)]
            [(filename,slicelist),...,(filename,slicelist)]
            [(filename,slicelist),...,(filename,slicelist)]]

        Notes
        -----
        - A filename of None indicates that no file was found with data corresponding to the slicelist.
        - If partitionSlices is None, the slicelist does not intersect the domain.
        - An empty partitionSlices [] means that the variable is zero-dimensional.
        """

        # slicelist gets modified, slist doesn't
        slicelist = copy.copy(slist)
        template = self.getTemplate()

        # Use the name_in_file attribute to access files
        if hasattr(self, 'name_in_file'):
            realid = self.name_in_file
        else:
            realid = self.id

        # Handle rank-0 variables separately
        if self.rank() == 0:
            matchnames = [realid, None, None, None, None, None, None]
            filename = self.getFilePath(matchnames, template)

            result = (0, (), (filename, []))
            return result

        # Find the number of partitioned axes
        npart = 0
        ndim = 0
        for (axis, start, length, true_length) in self.domain:
            if hasattr(axis, 'partition'):
                npart = npart + 1
                if npart == 1:
                    # part1 = axis
                    npart1 = ndim
                elif npart == 2:
                    # part2 = axis
                    npart2 = ndim
                else:
                    raise CDMSError(TooManyPartitions + self.id)
            ndim = ndim + 1

        # If no partitioned axes, just read the data
        if npart == 0:
            matchnames = [realid, None, None, None, None, None, None]
            filename = self.getFilePath(matchnames, template)
            result = (0, (), (filename, slicelist))

        # If one partitioned axes:
        elif npart == 1:

            # intersect the slice and partition for that axis
            slice1 = slicelist[npart1]
            (axis, startelem, length, true_length) = self.domain[npart1]
            partition = slicePartition(slice1, self.getPartition(axis))
            if partition == []:
                return (1, (npart1,), None)

            # For each (interval, partslice) in the partition:
            resultlist = []
            (firstinterval, firstslice) = partition[0]
            prevhigh = firstinterval[0]
            for (interval, partslice) in partition:

                # If the previous interval high is less than
                # the current interval low value, interpose
                # missing data.
                low = interval[0]
                if prevhigh < low:
                    missing_interval = (prevhigh, low)
                    missing_slice = sliceIntersect(slice1, missing_interval)

                    # Note: if the slice has a stride>1, it might not intersect,
                    # so don't interpose missing data in this case.
                    if missing_slice is not None:
                        slicelist[npart1] = missing_slice
                        resultlist.append((None, copy.copy(slicelist)))
                prevhigh = interval[1]

                # generate the filename
                matchnames = [realid, None, None, None, None, None, None]
                matchnames = self.genMatch(axis, interval, matchnames)
                filename = self.getFilePath(matchnames, template)

                # adjust the partslice for the interval offset
                # and replace in the slice list
                filestart = partslice.start - interval[0]
                filestop = partslice.stop - interval[0]
                fileslice = slice(filestart, filestop, partslice.step)
                slicelist[npart1] = fileslice

                resultlist.append((filename, copy.copy(slicelist)))

            result = (1, (npart1,), resultlist)

        # If two partitioned axes, 2-D version of previous case
        if npart == 2:
            slice1 = slicelist[npart1]
            slice2 = slicelist[npart2]
            (axis1, startelem1, length1, true_length1) = self.domain[npart1]
            (axis2, startelem2, length2, true_length2) = self.domain[npart2]
            partition1 = slicePartition(slice1, self.getPartition(axis1))
            partition2 = slicePartition(slice2, self.getPartition(axis2))
            if partition1 == [] or partition2 == []:
                return (2, (npart1, npart2), None)

            # For each (interval, partslice) in the partition:
            resultlist = []
            (firstinterval1, firstslice1) = partition1[0]
            prevhigh1 = firstinterval1[0]
            for (interval1, partslice1) in partition1:

                # If the previous interval high is less than
                # the current interval low value, interpose
                # missing data.
                low = interval1[0]
                if prevhigh1 < low:
                    missing_interval = (prevhigh1, low)
                    missing_slice = sliceIntersect(slice1, missing_interval)
                    if missing_slice is not None:
                        slicelist[npart1] = missing_slice
                        resultlist.append([(None, copy.copy(slicelist))])
                prevhigh1 = interval1[1]

                # generate matchnames
                matchnames = [realid, None, None, None, None, None, None]
                matchnames = self.genMatch(axis1, interval1, matchnames)

                # adjust the partslice for the interval offset
                # and replace in the slice list
                filestart = partslice1.start - interval1[0]
                filestop = partslice1.stop - interval1[0]
                fileslice = slice(filestart, filestop, partslice1.step)
                slicelist[npart1] = fileslice

                chunklist = []
                (firstinterval2, firstslice2) = partition2[0]
                prevhigh2 = firstinterval2[0]
                for (interval2, partslice2) in partition2:

                    # If the previous interval high is less than
                    # the current interval low value, interpose
                    # missing data.
                    low = interval2[0]
                    if prevhigh2 < low:
                        missing_interval = (prevhigh2, low)
                        missing_slice = sliceIntersect(
                            slice1, missing_interval)
                        if missing_slice is not None:
                            slicelist[npart2] = missing_slice
                            chunklist.append((None, copy.copy(slicelist)))
                    prevhigh2 = interval2[1]

                    # generate the filename
                    matchnames = self.genMatch(axis2, interval2, matchnames)
                    filename = self.getFilePath(matchnames, template)

                    filestart = partslice2.start - interval2[0]
                    filestop = partslice2.stop - interval2[0]
                    fileslice = slice(filestart, filestop, partslice2.step)
                    slicelist[npart2] = fileslice

                    chunklist.append((filename, copy.copy(slicelist)))

                resultlist.append(chunklist)

            result = (2, (npart1, npart2), resultlist)

        return result

[docs]    def expertSlice(self, initslist):

        # Handle negative slices
        revlist = []                    # Slices to apply to result if reversals needed
        slist = []                      # Slices with positive strides
        haveReversals = 0               # True iff result array needs reversing
        i = 0
        for s in initslist:
            if s.step < 0:
                axislen = self.shape[i]
                slist.append(reverseSlice(s, axislen))
                revlist.append(slice(None, None, -1))
                haveReversals = 1
            else:
                slist.append(s)
                revlist.append(slice(None, None, 1))
            i += 1

        # This does most of the work
        npart, idims, partitionSlices = self.expertPaths(slist)

        # If the dataset includes a forecast axis, find it now, as well
        # as this slice's corresponding index in that direction.
        fci = None
        for i in range(len(self.domain)):
            if self.domain[i][0].isForecast():
                fci = i
                # fcv = initslist[i].start
                break

        # If no intersection, return an 'empty' array.
        if partitionSlices is None:
            return numpy.ma.zeros((0,), self._numericType_)

        # Handle rank-0 variables separately
        if self.rank() == 0:
            filename, dumlist = partitionSlices

            f = self.parent.openFile(filename, 'r')
            try:
                var = f.variables[self.name_in_file]
                result = var.getValue()
            finally:
                f.close()
            return result

        # If no partitioned axes, just read the data
        if npart == 0:
            filename, slicelist = partitionSlices

            f = self.parent.openFile(filename, 'r')
            try:
                var = f.variables[self.name_in_file]
                if fci is None:
                    result = self._returnArray(
                        var.getitem(*tuple(slicelist)), 0)
                else:
                    # If there's a forecast axis, the file doesn't know about it so
                    # don't use it in slicing data out of the file.
                    result = self._returnArray(var.getitem(*tuple(slicelist[0:fci] + slicelist[fci + 1:])),
                                               0)
                    # But the result still needs an index in the forecast direction,
                    # which is simple to do because there is only one forecast
                    # per file:
                    result.resize(list(map(lenSlice, slicelist)))

            finally:
                f.close()
            sh = result.shape
            if 0 in sh:
                raise CDMSError(IndexError + 'Coordinates out of Domain')

        # If one partitioned axes:
        elif npart == 1:

            npart1 = idims[0]
            resultlist = []
            for filename, slicelist in partitionSlices:

                # If the slice is missing, interpose missing data
                if filename is None:
                    shapelist = list(map(lenSlice, slicelist))
                    chunk = numpy.ma.zeros(
                        tuple(shapelist), self._numericType_)
                    chunk[...] = numpy.ma.masked

                # else read the data and close the file
                else:
                    f = self.parent.openFile(filename, 'r')
                    try:
                        var = f.variables[self.name_in_file]
                        if fci is None:
                            chunk = var.getitem(*tuple(slicelist))
                        else:
                            # If there's a forecast axis, the file doesn't know about it so
                            # don't use it in slicing data out of the file.
                            chunk = var.getitem(
                                *tuple(slicelist[0:fci] + slicelist[fci + 1:]))
                            # But the chunk still needs an index in the forecast direction,
                            # which is simple to do because there is only one
                            # forecast per file:
                            chunk.resize(list(map(lenSlice, slicelist)))

                    finally:
                        f.close()
                    sh = chunk.shape
                    if 0 in sh:
                        raise CDMSError('Coordinates out of Domain')

                resultlist.append(self._returnArray(chunk, 0))

            # Combine the chunks into a single array
            # Note: This works because slicelist is the same length
            # as the domain, and var.getitem returns a chunk
            # with singleton dimensions included. This means that
            # npart1 corresponds to the correct dimension of chunk.
            result = numpy.ma.concatenate(resultlist, axis=npart1)
            for chunk in resultlist:
                del(chunk)

        # If two partitioned axes, 2-D version of previous case
        if npart == 2:
            npart1, npart2 = idims

            resultlist = []
            for filelist in partitionSlices:
                chunklist = []
                for filename, slicelist in filelist:

                    # If the slice is missing, interpose missing data
                    if filename is None:
                        shapelist = list(map(lenSlice, slicelist))
                        chunk = numpy.ma.zeros(
                            tuple(shapelist), self._numericType_)
                        chunk[...] = numpy.ma.masked

                    # else read the data and close the file
                    else:
                        f = self.parent.openFile(filename, 'r')
                        try:
                            var = f.variables[self.name_in_file]
                            if fci is None:
                                chunk = var.getitem(*tuple(slicelist))
                            else:
                                # If there's a forecast axis, the file doesn't know about it so
                                # don't use it in slicing data out of the file.
                                chunk = var.getitem(
                                    *tuple(slicelist[0:fci] + slicelist[fci + 1:]))
                                # But the chunk still needs an index in the forecast direction,
                                # which is simple to do because there is only
                                # one forecast per file:
                                chunk.resize(list(map(lenSlice, slicelist)))

                        finally:
                            f.close()
                        sh = chunk.shape
                        if 0 in sh:
                            raise CDMSError('Coordinates out of Domain')
                    chunklist.append(self._returnArray(chunk, 0))

                # Note: This works because slicelist is the same length
                # as the domain, and var.getitem returns a chunk
                # with singleton dimensions included. This means that
                # npart1 corresponds to the correct dimension of chunk.
                bigchunk = numpy.ma.concatenate(chunklist, axis=npart2)
                for chunk in chunklist:
                    del(chunk)
                resultlist.append(bigchunk)

            result = numpy.ma.concatenate(resultlist, axis=npart1)
            for bigchunk in resultlist:
                del(bigchunk)

        # If slices with negative strides were input, apply the appropriate
        # reversals.
        if haveReversals:
            result = result[revlist]

        return result

    shape = property(_getShape, None)
#     shape = _getShape
    dtype = property(_getdtype, None)

# PropertiedClasses.set_property (DatasetVariable, 'shape',
#                                   DatasetVariable._getShape, nowrite=1,
# nodelete=1)
# PropertiedClasses.set_property (DatasetVariable, 'dtype',
#                                   DatasetVariable._getdtype, nowrite=1,
# nodelete=1)
# internattr.add_internal_attribute(DatasetVariable, 'domain')
Source code for cdms2.variable

Table of Contents

Search