"""
Parse a CDML/XML file
"""
from .cdxmllib import XMLParser
from . import CDML
import re
from . import cdmsNode
# Error constants
InvalidAttribute = "Invalid attribute"
# Regular expressions
_S = re.compile('[ \t\r\n]+$')
_opS = '[ \t\r\n]*'
_Integer = re.compile(_opS + '[0-9]+$' + _opS)
[docs]class CDMLParser(XMLParser):
def __init__(self, verbose=0):
XMLParser.__init__(self)
self.root = None
self.currentPath = [] # Current path, a stack
self.dtd = CDML.CDML().dtd
self.verbose = verbose
# Push current node on the stack
[docs] def pushCurrentNode(self, node):
self.currentPath.append(node)
if not self.root:
self.root = node
# Pop the current node off the stack
[docs] def popCurrentNode(self):
node = self.currentPath[-1]
self.currentPath = self.currentPath[:-1]
return node
# Get the current parent node
[docs] def getCurrentNode(self):
return self.currentPath[-1]
# Get the root node
[docs] def getRoot(self):
return self.root
# Handle content
# discard data which is just whitespace,
# and strip other data
[docs] def handle_data(self, data):
matchObj = _S.match(data)
if not matchObj:
if self.verbose:
print(('data:', data))
if self.root:
self.getCurrentNode().setContentFromString(data.strip())
[docs] def handle_cdata(self, data):
if self.verbose:
print(('cdata:', repr(data)))
[docs] def handle_proc(self, name, data):
if self.verbose:
print(('processing:', name, repr(data)))
[docs] def handle_special(self, data):
if self.verbose:
print(('special:', repr(data)))
[docs] def handle_starttag(self, tag, method, attrs):
if tag in self.dtd:
# Check that attributes are valid
validDict = self.dtd[tag]
validAttrs = list(validDict.keys())
attrnames = list(attrs.keys())
for attrname in attrnames:
if attrname not in validAttrs:
self.cdml_syntax_error(self.lineno,
'unknown attribute %s of element %s' %
(attrname, tag))
else:
(atttype, attdefault) = validDict[attrname]
if isinstance(atttype, type((0,))):
attrval = attrs[attrname]
if attrval not in atttype:
self.cdml_syntax_error(self.lineno,
'invalid attribute value %s=%s of element %s, must be one of %s' %
(attrname, attrval, tag, atttype))
# Check that required attributes are present,
# and add default values
for attrname in validAttrs:
(atttype, attdefault) = validDict[attrname]
if attdefault == CDML.Required and attrname not in attrnames:
self.cdml_syntax_error(self.lineno,
'element %s requires an attribute %s' %
(tag, attrname))
if isinstance(attdefault, type(
"")) and attrname not in attrnames:
attrs[attrname] = attdefault
method(attrs)
# ------------------------------------------------------------------------
# CDML tags
[docs] def start_attr(self, attrs):
if self.verbose:
print(('attr:', attrs))
name = attrs['name']
datatype = attrs['datatype']
attr = cdmsNode.AttrNode(name, None)
attr.datatype = datatype
self.pushCurrentNode(attr)
# Now the value if any as stored in self.content
[docs] def end_attr(self):
attr = self.popCurrentNode()
var = self.getCurrentNode()
attr.setValueFromString(attr.getContent(), attr.datatype)
var.setExternalAttrFromAttr(attr)
# ------------------------------------------------------------------------
[docs] def start_axis(self, attrs):
if self.verbose:
print(('axis:', attrs))
id = attrs['id']
length_s = attrs['length']
datatype = attrs.get('datatype')
if _Integer.match(length_s) is None:
raise InvalidAttribute('length=' + length_s)
length = int(length_s)
axis = cdmsNode.AxisNode(id, length, datatype)
partstring = attrs.get('partition')
if partstring is not None:
axis.setPartitionFromString(partstring)
axis.setExternalDict(attrs)
self.getCurrentNode().addId(id, axis)
self.pushCurrentNode(axis)
[docs] def end_axis(self):
self.popCurrentNode()
# ------------------------------------------------------------------------
[docs] def start_cdml(self, attrs):
if self.verbose:
print(('cdml:', attrs))
[docs] def end_cdml(self):
pass
# ------------------------------------------------------------------------
[docs] def start_component(self, attrs):
if self.verbose:
print(('component:', attrs))
[docs] def end_component(self):
pass
# ------------------------------------------------------------------------
[docs] def start_compoundAxis(self, attrs):
if self.verbose:
print(('compoundAxis:', attrs))
[docs] def end_compoundAxis(self):
pass
# ------------------------------------------------------------------------
[docs] def start_data(self, attrs):
if self.verbose:
print(('data:', attrs))
[docs] def end_data(self):
pass
# ------------------------------------------------------------------------
[docs] def start_dataset(self, attrs):
if self.verbose:
print(('dataset:', attrs))
id = attrs['id']
dataset = cdmsNode.DatasetNode(id)
dataset.setExternalDict(attrs)
if self.root:
self.getCurrentNode().addId(id, dataset)
self.pushCurrentNode(dataset)
[docs] def end_dataset(self):
dataset = self.popCurrentNode()
dataset.validate()
# ------------------------------------------------------------------------
[docs] def start_doclink(self, attrs):
if self.verbose:
print(('docLink:', attrs))
uri = attrs['href']
doclink = cdmsNode.DocLinkNode(uri)
doclink.setExternalDict(attrs)
self.getCurrentNode().add(doclink)
self.pushCurrentNode(doclink)
[docs] def end_doclink(self):
self.popCurrentNode()
# ------------------------------------------------------------------------
[docs] def start_domElem(self, attrs):
if self.verbose:
print(('domElem:', attrs))
name = attrs['name']
start_s = attrs.get('start')
length_s = attrs.get('length')
if start_s is not None:
start = int(start_s)
else:
start = None
if length_s is not None:
length = int(length_s)
else:
length = None
domElem = cdmsNode.DomElemNode(name, start, length)
domElem.setExternalDict(attrs)
self.getCurrentNode().add(domElem)
[docs] def end_domElem(self):
pass
# ------------------------------------------------------------------------
[docs] def start_domain(self, attrs):
if self.verbose:
print(('domain:', attrs))
domain = cdmsNode.DomainNode()
self.getCurrentNode().setDomain(domain)
self.pushCurrentNode(domain)
[docs] def end_domain(self):
self.popCurrentNode()
# ------------------------------------------------------------------------
[docs] def start_rectGrid(self, attrs):
if self.verbose:
print(('rectGrid:', attrs))
id = attrs['id']
gridtype = attrs['type']
latitude = attrs['latitude']
longitude = attrs['longitude']
grid = cdmsNode.RectGridNode(id, latitude, longitude, gridtype)
grid.setExternalDict(attrs)
self.getCurrentNode().addId(id, grid)
self.pushCurrentNode(grid)
[docs] def end_rectGrid(self):
self.popCurrentNode()
# ------------------------------------------------------------------------
[docs] def start_linear(self, attrs):
if self.verbose:
print(('linear:', attrs))
start_s = attrs['start']
delta_s = attrs['delta']
length_s = attrs['length']
try:
start = float(start_s)
except ValueError:
raise InvalidAttribute('start=' + start_s)
try:
delta = float(delta_s)
except ValueError:
raise InvalidAttribute('delta=' + delta_s)
try:
length = int(length_s)
except ValueError:
raise InvalidAttribute('length=' + length_s)
linear = cdmsNode.LinearDataNode(start, delta, length)
self.getCurrentNode().setLinearData(linear)
[docs] def end_linear(self):
pass
# ------------------------------------------------------------------------
[docs] def start_variable(self, attrs):
if self.verbose:
print(('variable:', attrs))
id = attrs['id']
datatype = attrs['datatype']
variable = cdmsNode.VariableNode(id, datatype, None)
variable.setExternalDict(attrs)
self.getCurrentNode().addId(id, variable)
self.pushCurrentNode(variable)
[docs] def end_variable(self):
self.popCurrentNode()
# ------------------------------------------------------------------------
[docs] def start_xlink(self, attrs):
if self.verbose:
print(('xlink:', attrs))
id = attrs['id']
uri = attrs['href']
contentRole = attrs['content-role']
xlink = cdmsNode.XLinkNode(id, uri, contentRole)
xlink.setExternalDict(attrs)
self.getCurrentNode().addId(id, xlink)
self.pushCurrentNode(xlink)
[docs] def end_xlink(self):
self.popCurrentNode()
# ------------------------------------------------------------------------
[docs] def cdml_syntax_error(self, lineno, message):
print(('error near line %d:' % lineno, message))
[docs] def unknown_starttag(self, tag, attrs):
if self.verbose:
print(('**' + tag + '**:', attrs))
[docs] def unknown_endtag(self, tag):
pass
[docs] def unknown_entityref(self, ref):
self.flush()
if self.verbose:
print(('*** unknown entity ref: &' + ref + ';'))
[docs] def unknown_charref(self, ref):
self.flush()
if self.verbose:
print(('*** unknown char ref: &#' + ref + ';'))
[docs] def close(self):
XMLParser.close(self)
if __name__ == '__main__':
import sys
sampfile = open(sys.argv[1])
text = sampfile.read()
sampfile.close()
if len(sys.argv) == 2:
verbose = 0
else:
verbose = 1
p = CDMLParser(verbose)
p.feed(text)
p.close()
p.root.dump()