1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27 """
28 This module provides class definitions corresponding to the elements that
29 can be found in a LIGO Light Weight XML file. It also provides a class
30 representing an entire LIGO Light Weight XML document, a ContentHandler
31 class for use with SAX2 parsers, and a convenience function for
32 constructing a parser.
33 """
34
35
36 import sys
37 from xml import sax
38 from xml.sax.xmlreader import AttributesImpl
39 from xml.sax.saxutils import escape as xmlescape
40 from xml.sax.saxutils import unescape as xmlunescape
41
42
43 from glue import git_version
44 from . import types as ligolwtypes
45 import six
46 from functools import reduce
47
48
49 __author__ = "Kipp Cannon <kipp.cannon@ligo.org>"
50 __version__ = "git id %s" % git_version.id
51 __date__ = git_version.date
52
53
54
55
56
57
58
59
60
61
62
63 NameSpace = u"http://ldas-sw.ligo.caltech.edu/doc/ligolwAPI/html/ligolw_dtd.txt"
64
65
66 Header = u"""<?xml version='1.0' encoding='utf-8'?>
67 <!DOCTYPE LIGO_LW SYSTEM "%s">""" % NameSpace
68
69
70 Indent = u"\t"
71
72
73
74
75
76
77
78
79
80
81
82 -class ElementError(Exception):
83 """
84 Base class for exceptions generated by elements.
85 """
86 pass
87
90 """
91 Expose an XML attribute of an Element subclass as Python instance
92 attribute with support for an optional default value.
93
94 The .getAttribute() and .setAttribute() methods of the instance to
95 which this is attached are used to retrieve and set the unicode
96 attribute value, respectively.
97
98 When retrieving a value, the function given via the dec keyword
99 argument will be used to convert the unicode into a native Python
100 object (the default is to leave the unicode value as unicode).
101 When setting a value, the function given via the enc keyword
102 argument will be used to convert a native Python object to a
103 unicode string.
104
105 When retrieving a value, if .getAttribute() raises KeyError then
106 AttributeError is raised unless a default value is provided in
107 which case it is returned instead.
108
109 If doc is provided it will be used as the documentation string,
110 otherwise a default documentation string will be constructed
111 identifying the attribute's name and explaining the default value
112 if one is set.
113
114 NOTE: If an XML document is parsed and an element is encountered
115 that does not have a value set for an attribute whose corresponding
116 attributeproxy has a default value defined, then Python codes will
117 be told the default value. Therefore, the default value given here
118 must match what the XML DTD says the default value is for that
119 attribute. Likewise, attributes for which the DTD does not define
120 a default must not have a default defined here. These conditions
121 must both be met to not create a discrepancy between the behaviour
122 of Python codes relying on this I/O library and other interfaces to
123 the same document.
124
125 Example:
126
127 >>> class Test(Element):
128 ... Scale = attributeproxy(u"Scale", enc = u"%.17g".__mod__, dec = float, default = 1.0, doc = "This is the scale (default = 1).")
129 ...
130 >>> x = Test()
131 >>> # have not set value, default will be returned
132 >>> x.Scale
133 1.0
134 >>> x.Scale = 16
135 >>> x.Scale
136 16.0
137 >>> # default can be retrieved via the .default attribute of the
138 >>> # class attribute
139 >>> Test.Scale.default
140 1.0
141 >>> # default is read-only
142 >>> Test.Scale.default = 2.
143 Traceback (most recent call last):
144 File "<stdin>", line 1, in <module>
145 AttributeError: can't set attribute
146 >>> # internally, value is stored as unicode (for XML)
147 >>> x.getAttribute("Scale")
148 u'16'
149 >>> # deleting an attribute restores the default value if defined
150 >>> del x.Scale
151 >>> x.Scale
152 1.0
153 """
154 - def __init__(self, name, enc = six.text_type, dec = six.text_type, default = None, doc = None):
155
156
157
158 def getter(self):
159 try:
160 val = self.getAttribute(name)
161 except KeyError:
162 if default is not None:
163 return default
164 raise AttributeError("attribute '%s' is not set" % name)
165 return dec(val)
166 def setter(self, value):
167 self.setAttribute(name, enc(value))
168 def deleter(self):
169 self.removeAttribute(name)
170
171 if doc is None:
172 doc = "The \"%s\" attribute." % name
173 if default is not None:
174 doc += " Default is \"%s\" if not set." % str(default)
175
176 super(attributeproxy, self).__init__(getter, (setter if enc is not None else None), (deleter if enc is not None else None), doc)
177
178 self.__doc__ = doc
179
180
181 if default is not None:
182 self._default = default
183
184 @property
186 """
187 Default value. AttributeError is raised if no default
188 value is set.
189 """
190 return self._default
191
194 """
195 Base class for all element types. This class is inspired by the
196 class of the same name in the Python standard library's xml.dom
197 package. One important distinction is that the standard DOM
198 element is used to represent the structure of a document at a much
199 finer level of detail than here. For example, in the case of the
200 standard DOM element, each XML attribute is its own element being a
201 child node of its tag, while here they are simply stored as
202 attributes of the tag element itself.
203
204 Despite the differences, the documentation for the xml.dom package,
205 particularly that of the Element class and it's parent, the Node
206 class, is useful as supplementary material in understanding how to
207 use this class.
208 """
209
210 tagName = None
211 validchildren = frozenset()
212
213 @classmethod
216
218 """
219 Construct an element. The argument is a
220 sax.xmlreader.AttributesImpl object (see the xml.sax
221 documentation, but it's basically a dictionary-like thing)
222 used to set the element attributes.
223 """
224 self.parentNode = None
225 if attrs is None:
226 self.attributes = AttributesImpl({})
227 elif set(attrs.keys()) <= self.validattributes():
228 self.attributes = attrs
229 else:
230 raise ElementError("%s element: invalid attribute(s) %s" % (self.tagName, ", ".join("'%s'" % key for key in set(attrs.keys()) - self.validattributes())))
231 self.childNodes = []
232 self.pcdata = None
233
235 """
236 Generate the string for the element's start tag.
237 """
238 return u"%s<%s%s>" % (indent, self.tagName, u"".join(u" %s=\"%s\"" % keyvalue for keyvalue in self.attributes.items()))
239
241 """
242 Generate the string for the element's end tag.
243 """
244 return u"%s</%s>" % (indent, self.tagName)
245
247 """
248 Add a child to this element. The child's parentNode
249 attribute is updated, too.
250 """
251 self.childNodes.append(child)
252 child.parentNode = self
253 self._verifyChildren(len(self.childNodes) - 1)
254 return child
255
257 """
258 Insert a new child node before an existing child. It must
259 be the case that refchild is a child of this node; if not,
260 ValueError is raised. newchild is returned.
261 """
262 for i, childNode in enumerate(self.childNodes):
263 if childNode is refchild:
264 self.childNodes.insert(i, newchild)
265 newchild.parentNode = self
266 self._verifyChildren(i)
267 return newchild
268 raise ValueError(refchild)
269
271 """
272 Remove a child from this element. The child element is
273 returned, and it's parentNode element is reset. If the
274 child will not be used any more, you should call its
275 unlink() method to promote garbage collection.
276 """
277 for i, childNode in enumerate(self.childNodes):
278 if childNode is child:
279 del self.childNodes[i]
280 child.parentNode = None
281 return child
282 raise ValueError(child)
283
285 """
286 Break internal references within the document tree rooted
287 on this element to promote garbage collection.
288 """
289 self.parentNode = None
290 for child in self.childNodes:
291 child.unlink()
292 del self.childNodes[:]
293
295 """
296 Replace an existing node with a new node. It must be the
297 case that oldchild is a child of this node; if not,
298 ValueError is raised. newchild is returned.
299 """
300
301
302
303 for i, childNode in enumerate(self.childNodes):
304 if childNode is oldchild:
305 self.childNodes[i].parentNode = None
306 self.childNodes[i] = newchild
307 newchild.parentNode = self
308 self._verifyChildren(i)
309 return newchild
310 raise ValueError(oldchild)
311
313 """
314 Return a list of elements below and including this element
315 for which filter(element) returns True.
316 """
317 l = reduce(lambda l, e: l + e.getElements(filter), self.childNodes, [])
318 if filter(self):
319 l.append(self)
320 return l
321
324
326 l = []
327 for c in self.childNodes:
328 try:
329 if reduce(lambda t, kv: t and (c.getAttribute(kv[0]) == kv[1]), six.iteritems(attrs), True):
330 l.append(c)
331 except KeyError:
332 pass
333 return l
334
337
340
342
343
344
345
346 self.attributes._attrs[attrname] = six.text_type(value)
347
349
350
351
352
353 try:
354 del self.attributes._attrs[attrname]
355 except KeyError:
356 pass
357
359 """
360 Add characters to the element's pcdata.
361 """
362 if self.pcdata is not None:
363 self.pcdata += content
364 else:
365 self.pcdata = content
366
368 """
369 Method used internally by some elements to verify that
370 their children are from the allowed set and in the correct
371 order following modifications to their child list. i is
372 the index of the child that has just changed.
373 """
374 pass
375
377 """
378 Method invoked by document parser when it encounters the
379 end-of-element event.
380 """
381 pass
382
383 - def write(self, fileobj = sys.stdout, indent = u""):
398
401 """
402 Parent class for Elements that cannot contain text.
403 """
405 if not content.isspace():
406 raise TypeError("%s does not hold text" % type(self))
407
410 """
411 Walk the XML tree of children below elem, returning each in order.
412 """
413 for child in elem.childNodes:
414 yield child
415 for elem in WalkChildren(child):
416 yield elem
417
418
419
420
421
422
423
424
425
426
427
428 -class LLWNameAttr(six.text_type):
429 """
430 Baseclass to hide pattern-matching of various element names.
431 Subclasses must provide a .dec_pattern compiled regular expression
432 defining a group "Name" that identifies the meaningful portion of
433 the string, and a .enc_pattern that gives a format string to be
434 used with "%" to reconstrct the full string.
435
436 This is intended to be used to provide the enc and dec functions
437 for an attributeproxy instance.
438
439 Example:
440
441 >>> import re
442 >>> class Test(Element):
443 ... class TestName(LLWNameAttr):
444 ... dec_pattern = re.compile(r"(?P<Name>[a-z0-9_]+):test\Z")
445 ... enc_pattern = u"%s:test"
446 ...
447 ... Name = attributeproxy(u"Name", enc = TestName.enc, dec = TestName)
448 ...
449 >>> x = Test()
450 >>> x.Name = u"blah"
451 >>> # internally, suffix has been appended
452 >>> x.getAttribute("Name")
453 u'blah:test'
454 >>> # but attributeproxy reports original value
455 >>> x.Name
456 u'blah'
457 """
464
465 @classmethod
466 - def enc(cls, name):
468
469
470
471
472
473
474
475
476
477
478
479 -class LIGO_LW(EmptyElement):
480 """
481 LIGO_LW element.
482 """
483 tagName = u"LIGO_LW"
484 validchildren = frozenset([u"LIGO_LW", u"Comment", u"Param", u"Table", u"Array", u"Stream", u"IGWDFrame", u"AdcData", u"AdcInterval", u"Time", u"Detector"])
485
486 Name = attributeproxy(u"Name")
487 Type = attributeproxy(u"Type")
488
502
503
504 -class Param(Element):
517
518
519 -class Table(EmptyElement):
520 """
521 Table element.
522 """
523 tagName = u"Table"
524 validchildren = frozenset([u"Comment", u"Column", u"Stream"])
525
526 Name = attributeproxy(u"Name")
527 Type = attributeproxy(u"Type")
528
530 ncomment = 0
531 ncolumn = 0
532 nstream = 0
533 for child in self.childNodes:
534 if child.tagName == Comment.tagName:
535 if ncomment:
536 raise ElementError("only one Comment allowed in Table")
537 if ncolumn or nstream:
538 raise ElementError("Comment must come before Column(s) and Stream in Table")
539 ncomment += 1
540 elif child.tagName == Column.tagName:
541 if nstream:
542 raise ElementError("Column(s) must come before Stream in Table")
543 ncolumn += 1
544 else:
545 if nstream:
546 raise ElementError("only one Stream allowed in Table")
547 nstream += 1
548
549
550 -class Column(EmptyElement):
551 """
552 Column element.
553 """
554 tagName = u"Column"
555
556 Name = attributeproxy(u"Name")
557 Type = attributeproxy(u"Type")
558 Unit = attributeproxy(u"Unit")
559
561 """
562 Generate the string for the element's start tag.
563 """
564 return u"%s<%s%s/>" % (indent, self.tagName, u"".join(u" %s=\"%s\"" % keyvalue for keyvalue in self.attributes.items()))
565
567 """
568 Generate the string for the element's end tag.
569 """
570 return u""
571
572 - def write(self, fileobj = sys.stdout, indent = u""):
573 """
574 Recursively write an element and it's children to a file.
575 """
576 fileobj.write(self.start_tag(indent))
577 fileobj.write(u"\n")
578
579
580 -class Array(EmptyElement):
601
602
603 -class Dim(Element):
632
650
653 """
654 IGWDFrame element.
655 """
656 tagName = u"IGWDFrame"
657 validchildren = frozenset([u"Comment", u"Param", u"Time", u"Detector", u"AdcData", u"LIGO_LW", u"Stream", u"Array", u"IGWDFrame"])
658
659 Name = attributeproxy(u"Name")
660
670
673 """
674 AdcData element.
675 """
676 tagName = u"AdcData"
677 validchildren = frozenset([u"AdcData", u"Comment", u"Param", u"Time", u"LIGO_LW", u"Array"])
678
679 Name = attributeproxy(u"Name")
680
692
693
694 -class Time(Element):
695 """
696 Time element.
697 """
698 tagName = u"Time"
699
700 Name = attributeproxy(u"Name")
701 Type = attributeproxy(u"Type", default = u"ISO-8601")
702
707
709 if self.Type == u"ISO-8601":
710 import dateutil.parser
711 self.pcdata = dateutil.parser.parse(self.pcdata)
712 elif self.Type == u"GPS":
713 from lal import LIGOTimeGPS
714
715
716 self.pcdata = LIGOTimeGPS(str(self.pcdata))
717 elif self.Type == u"Unix":
718 self.pcdata = float(self.pcdata)
719 else:
720
721
722
723 pass
724
725 - def write(self, fileobj = sys.stdout, indent = u""):
726 fileobj.write(self.start_tag(indent))
727 if self.pcdata is not None:
728 if self.Type == u"ISO-8601":
729 fileobj.write(xmlescape(six.text_type(self.pcdata.isoformat())))
730 elif self.Type == u"GPS":
731 fileobj.write(xmlescape(six.text_type(self.pcdata)))
732 elif self.Type == u"Unix":
733 fileobj.write(xmlescape(u"%.16g" % self.pcdata))
734 else:
735
736
737
738
739
740 fileobj.write(xmlescape(six.text_type(self.pcdata)))
741 fileobj.write(self.end_tag(u""))
742 fileobj.write(u"\n")
743
744 @classmethod
745 - def now(cls, Name = None):
746 """
747 Instantiate a Time element initialized to the current UTC
748 time in the default format (ISO-8601). The Name attribute
749 will be set to the value of the Name parameter if given.
750 """
751 import datetime
752 self = cls()
753 if Name is not None:
754 self.Name = Name
755 self.pcdata = datetime.datetime.utcnow()
756 return self
757
758 @classmethod
760 """
761 Instantiate a Time element initialized to the value of the
762 given GPS time. The Name attribute will be set to the
763 value of the Name parameter if given.
764
765 Note: the new Time element holds a reference to the GPS
766 time, not a copy of it. Subsequent modification of the GPS
767 time object will be reflected in what gets written to disk.
768 """
769 self = cls(AttributesImpl({u"Type": u"GPS"}))
770 if Name is not None:
771 self.Name = Name
772 self.pcdata = gps
773 return self
774
777 """
778 Description of a LIGO LW file.
779 """
780 tagName = u"Document"
781 validchildren = frozenset([u"LIGO_LW"])
782
783 - def write(self, fileobj = sys.stdout, xsl_file = None):
784 """
785 Write the document.
786 """
787 fileobj.write(Header)
788 fileobj.write(u"\n")
789 if xsl_file is not None:
790 fileobj.write(u'<?xml-stylesheet type="text/xsl" href="%s" ?>\n' % xsl_file)
791 for c in self.childNodes:
792 if c.tagName not in self.validchildren:
793 raise ElementError("invalid child %s for %s" % (c.tagName, self.tagName))
794 c.write(fileobj)
795
796
797
798
799
800
801
802
803
804
805
806 -class LIGOLWContentHandler(sax.handler.ContentHandler, object):
807 """
808 ContentHandler class for parsing LIGO Light Weight documents with a
809 SAX2-compliant parser.
810
811 Example:
812
813 >>> # initialize empty Document tree into which parsed XML tree
814 >>> # will be inserted
815 >>> xmldoc = Document()
816 >>> # create handler instance attached to Document object
817 >>> handler = LIGOLWContentHandler(xmldoc)
818 >>> # open file and parse
819 >>> make_parser(handler).parse(open("demo.xml"))
820 >>> # write XML (default to stdout)
821 >>> xmldoc.write()
822
823 NOTE: this example is for illustration only. Most users will wish
824 to use the .load_*() functions in the glue.ligolw.utils subpackage
825 to load documents, and the .write_*() functions to write documents.
826 Those functions provide additional features such as support for
827 gzip'ed documents, MD5 hash computation, and Condor eviction
828 trapping to avoid writing broken documents to disk.
829
830 See also: PartialLIGOLWContentHandler,
831 FilteringLIGOLWContentHandler.
832 """
833
834 - def __init__(self, document, start_handlers = {}):
835 """
836 Initialize the handler by pointing it to the Document object
837 into which the parsed file will be loaded.
838 """
839 self.current = self.document = document
840
841 self._startElementHandlers = {
842 (None, AdcData.tagName): self.startAdcData,
843 (None, AdcInterval.tagName): self.startAdcInterval,
844 (None, Array.tagName): self.startArray,
845 (None, Column.tagName): self.startColumn,
846 (None, Comment.tagName): self.startComment,
847 (None, Detector.tagName): self.startDetector,
848 (None, Dim.tagName): self.startDim,
849 (None, IGWDFrame.tagName): self.startIGWDFrame,
850 (None, LIGO_LW.tagName): self.startLIGO_LW,
851 (None, Param.tagName): self.startParam,
852 (None, Stream.tagName): self.startStream,
853 (None, Table.tagName): self.startTable,
854 (None, Time.tagName): self.startTime,
855 }
856 self._startElementHandlers.update(start_handlers)
857
858 - def startAdcData(self, parent, attrs):
859 return AdcData(attrs)
860
861 - def startAdcInterval(self, parent, attrs):
862 return AdcInterval(attrs)
863
864 - def startArray(self, parent, attrs):
866
867 - def startColumn(self, parent, attrs):
869
872
873 - def startDetector(self, parent, attrs):
874 return Detector(attrs)
875
876 - def startDim(self, parent, attrs):
878
879 - def startIGWDFrame(self, parent, attrs):
880 return IGWDFrame(attrs)
881
882 - def startLIGO_LW(self, parent, attrs):
883 return LIGO_LW(attrs)
884
885 - def startParam(self, parent, attrs):
887
888 - def startStream(self, parent, attrs):
890
891 - def startTable(self, parent, attrs):
893
894 - def startTime(self, parent, attrs):
896
897 - def startElementNS(self, uri_localname, qname, attrs):
898 (uri, localname) = uri_localname
899 try:
900 start_handler = self._startElementHandlers[(uri, localname)]
901 except KeyError:
902 raise ElementError("unknown element %s for namespace %s" % (localname, uri or NameSpace))
903 attrs = AttributesImpl(dict((attrs.getQNameByName(name), value) for name, value in attrs.items()))
904 try:
905 self.current = self.current.appendChild(start_handler(self.current, attrs))
906 except Exception as e:
907 raise type(e)("line %d: %s" % (self._locator.getLineNumber(), str(e)))
908
909 - def endElementNS(self, uri_localname, qname):
910 (uri, localname) = uri_localname
911 try:
912 self.current.endElement()
913 except Exception as e:
914 raise type(e)("line %d: %s" % (self._locator.getLineNumber(), str(e)))
915 self.current = self.current.parentNode
916
917 - def characters(self, content):
918 try:
919 self.current.appendData(xmlunescape(content))
920 except Exception as e:
921 raise type(e)("line %d: %s" % (self._locator.getLineNumber(), str(e)))
922
923
924 -class PartialLIGOLWContentHandler(LIGOLWContentHandler):
925 """
926 LIGO LW content handler object that loads only those parts of the
927 document matching some criteria. Useful, for example, when one
928 wishes to read only a single table from a file.
929
930 Example:
931
932 >>> from glue.ligolw import utils as ligolw_utils
933 >>> def contenthandler(document):
934 ... return PartialLIGOLWContentHandler(document, lambda name, attrs: name == Table.tagName)
935 ...
936 >>> xmldoc = ligolw_utils.load_filename("demo.xml", contenthandler = contenthandler)
937
938 This parses "demo.xml" and returns an XML tree containing only the
939 Table elements and their children.
940 """
941 - def __init__(self, document, element_filter):
942 """
943 Only those elements for which element_filter(name, attrs)
944 evaluates to True, and the children of those elements, will
945 be loaded.
946 """
947 super(PartialLIGOLWContentHandler, self).__init__(document)
948 self.element_filter = element_filter
949 self.depth = 0
950
951 - def startElementNS(self, uri_localname, qname, attrs):
952 (uri, localname) = uri_localname
953 filter_attrs = AttributesImpl(dict((attrs.getQNameByName(name), value) for name, value in attrs.items()))
954 if self.depth > 0 or self.element_filter(localname, filter_attrs):
955 super(PartialLIGOLWContentHandler, self).startElementNS((uri, localname), qname, attrs)
956 self.depth += 1
957
958 - def endElementNS(self, *args):
959 if self.depth > 0:
960 self.depth -= 1
961 super(PartialLIGOLWContentHandler, self).endElementNS(*args)
962
963 - def characters(self, content):
964 if self.depth > 0:
965 super(PartialLIGOLWContentHandler, self).characters(content)
966
967
968 -class FilteringLIGOLWContentHandler(LIGOLWContentHandler):
969 """
970 LIGO LW content handler that loads everything but those parts of a
971 document that match some criteria. Useful, for example, when one
972 wishes to read everything except a single table from a file.
973
974 Example:
975
976 >>> from glue.ligolw import utils as ligolw_utils
977 >>> def contenthandler(document):
978 ... return FilteringLIGOLWContentHandler(document, lambda name, attrs: name != Table.tagName)
979 ...
980 >>> xmldoc = ligolw_utils.load_filename("demo.xml", contenthandler = contenthandler)
981
982 This parses "demo.xml" and returns an XML tree with all the Table
983 elements and their children removed.
984 """
985 - def __init__(self, document, element_filter):
986 """
987 Those elements for which element_filter(name, attrs)
988 evaluates to False, and the children of those elements,
989 will not be loaded.
990 """
991 super(FilteringLIGOLWContentHandler, self).__init__(document)
992 self.element_filter = element_filter
993 self.depth = 0
994
995 - def startElementNS(self, uri_localname, qname, attrs):
996 (uri, localname) = uri_localname
997 filter_attrs = AttributesImpl(dict((attrs.getQNameByName(name), value) for name, value in attrs.items()))
998 if self.depth == 0 and self.element_filter(localname, filter_attrs):
999 super(FilteringLIGOLWContentHandler, self).startElementNS((uri, localname), qname, attrs)
1000 else:
1001 self.depth += 1
1002
1003 - def endElementNS(self, *args):
1004 if self.depth == 0:
1005 super(FilteringLIGOLWContentHandler, self).endElementNS(*args)
1006 else:
1007 self.depth -= 1
1008
1009 - def characters(self, content):
1010 if self.depth == 0:
1011 super(FilteringLIGOLWContentHandler, self).characters(content)
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023 -def make_parser(handler):
1024 """
1025 Convenience function to construct a document parser with namespaces
1026 enabled and validation disabled. Document validation is a nice
1027 feature, but enabling validation can require the LIGO LW DTD to be
1028 downloaded from the LDAS document server if the DTD is not included
1029 inline in the XML. This requires a working connection to the
1030 internet and the server to be up.
1031 """
1032 parser = sax.make_parser()
1033 parser.setContentHandler(handler)
1034 parser.setFeature(sax.handler.feature_namespaces, True)
1035 parser.setFeature(sax.handler.feature_validation, False)
1036 parser.setFeature(sax.handler.feature_external_ges, False)
1037 return parser
1038