glue.ligolw.ligolw

1 # Copyright (C) 2006--2016 Kipp Cannon 2 # 3 # This program is free software; you can redistribute it and/or modify it 4 # under the terms of the GNU General Public License as published by the 5 # Free Software Foundation; either version 3 of the License, or (at your 6 # option) any later version. 7 # 8 # This program is distributed in the hope that it will be useful, but 9 # WITHOUT ANY WARRANTY; without even the implied warranty of 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General 11 # Public License for more details. 12 # 13 # You should have received a copy of the GNU General Public License along 14 # with this program; if not, write to the Free Software Foundation, Inc., 15 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 16 17 18 # 19 # ============================================================================= 20 # 21 # Preamble 22 # 23 # ============================================================================= 24 # 25 26 27 """ 28 This module provides class definitions corresponding to the elements that 29 can be found in a LIGO Light Weight XML file. It also provides a class 30 representing an entire LIGO Light Weight XML document, a ContentHandler 31 class for use with SAX2 parsers, and a convenience function for 32 constructing a parser. 33 """ 34 35 36 import sys 37 from xml import sax 38 from xml.sax.xmlreader import AttributesImpl 39 from xml.sax.saxutils import escape as xmlescape 40 from xml.sax.saxutils import unescape as xmlunescape 41 42 43 from glue import git_version 44 from . import types as ligolwtypes 45 import six 46 from functools import reduce 47 48 49 __author__ = "Kipp Cannon <kipp.cannon@ligo.org>" 50 __version__ = "git id %s" % git_version.id 51 __date__ = git_version.date 52 53 54 # 55 # ============================================================================= 56 # 57 # Document Header, and Indent 58 # 59 # ============================================================================= 60 # 61 62 63 NameSpace = u"http://ldas-sw.ligo.caltech.edu/doc/ligolwAPI/html/ligolw_dtd.txt" 64 65 66 Header = u"""<?xml version='1.0' encoding='utf-8'?> 67 <!DOCTYPE LIGO_LW SYSTEM "%s">""" % NameSpace 68 69 70 Indent = u"\t"

71 72 73 # 74 # ============================================================================= 75 # 76 # Element Class 77 # 78 # ============================================================================= 79 # 80 81 82 -class ElementError(Exception):

83 """ 84 Base class for exceptions generated by elements. 85 """ 86 pass

87

88 89 -class attributeproxy(property):

90 """ 91 Expose an XML attribute of an Element subclass as Python instance 92 attribute with support for an optional default value. 93 94 The .getAttribute() and .setAttribute() methods of the instance to 95 which this is attached are used to retrieve and set the unicode 96 attribute value, respectively. 97 98 When retrieving a value, the function given via the dec keyword 99 argument will be used to convert the unicode into a native Python 100 object (the default is to leave the unicode value as unicode). 101 When setting a value, the function given via the enc keyword 102 argument will be used to convert a native Python object to a 103 unicode string. 104 105 When retrieving a value, if .getAttribute() raises KeyError then 106 AttributeError is raised unless a default value is provided in 107 which case it is returned instead. 108 109 If doc is provided it will be used as the documentation string, 110 otherwise a default documentation string will be constructed 111 identifying the attribute's name and explaining the default value 112 if one is set. 113 114 NOTE: If an XML document is parsed and an element is encountered 115 that does not have a value set for an attribute whose corresponding 116 attributeproxy has a default value defined, then Python codes will 117 be told the default value. Therefore, the default value given here 118 must match what the XML DTD says the default value is for that 119 attribute. Likewise, attributes for which the DTD does not define 120 a default must not have a default defined here. These conditions 121 must both be met to not create a discrepancy between the behaviour 122 of Python codes relying on this I/O library and other interfaces to 123 the same document. 124 125 Example: 126 127 >>> class Test(Element): 128 ... Scale = attributeproxy(u"Scale", enc = u"%.17g".__mod__, dec = float, default = 1.0, doc = "This is the scale (default = 1).") 129 ... 130 >>> x = Test() 131 >>> # have not set value, default will be returned 132 >>> x.Scale 133 1.0 134 >>> x.Scale = 16 135 >>> x.Scale 136 16.0 137 >>> # default can be retrieved via the .default attribute of the 138 >>> # class attribute 139 >>> Test.Scale.default 140 1.0 141 >>> # default is read-only 142 >>> Test.Scale.default = 2. 143 Traceback (most recent call last): 144 File "<stdin>", line 1, in <module> 145 AttributeError: can't set attribute 146 >>> # internally, value is stored as unicode (for XML) 147 >>> x.getAttribute("Scale") 148 u'16' 149 >>> # deleting an attribute restores the default value if defined 150 >>> del x.Scale 151 >>> x.Scale 152 1.0 153 """

154 - def __init__(self, name, enc = six.text_type, dec = six.text_type, default = None, doc = None):

155 # define get/set/del implementations, relying on Python's 156 # closure mechanism to remember values for name, default, 157 # etc. 158 def getter(self): 159 try: 160 val = self.getAttribute(name) 161 except KeyError: 162 if default is not None: 163 return default 164 raise AttributeError("attribute '%s' is not set" % name) 165 return dec(val)

166 def setter(self, value): 167 self.setAttribute(name, enc(value))

168 def deleter(self): 169 self.removeAttribute(name) 170 # construct a default documentation string if needed 171 if doc is None: 172 doc = "The \"%s\" attribute." % name 173 if default is not None: 174 doc += " Default is \"%s\" if not set." % str(default) 175 # initialize the property object 176 super(attributeproxy, self).__init__(getter, (setter if enc is not None else None), (deleter if enc is not None else None), doc) 177 # documentation is not inherited, need to set it explicitly 178 self.__doc__ = doc 179 # record default attribute. if no value is supplied, 180 # AttributeError will be raised on attempts to retrieve it 181 if default is not None: 182 self._default = default 183 184 @property

185 - def default(self):

186 """ 187 Default value. AttributeError is raised if no default 188 value is set. 189 """ 190 return self._default

191

192 193 -class Element(object):

194 """ 195 Base class for all element types. This class is inspired by the 196 class of the same name in the Python standard library's xml.dom 197 package. One important distinction is that the standard DOM 198 element is used to represent the structure of a document at a much 199 finer level of detail than here. For example, in the case of the 200 standard DOM element, each XML attribute is its own element being a 201 child node of its tag, while here they are simply stored as 202 attributes of the tag element itself. 203 204 Despite the differences, the documentation for the xml.dom package, 205 particularly that of the Element class and it's parent, the Node 206 class, is useful as supplementary material in understanding how to 207 use this class. 208 """ 209 # XML tag names are case sensitive: compare with ==, !=, etc. 210 tagName = None 211 validchildren = frozenset() 212 213 @classmethod

214 - def validattributes(cls):

215 return frozenset(name for name in dir(cls) if isinstance(getattr(cls, name), attributeproxy))

216

217 - def __init__(self, attrs = None):

218 """ 219 Construct an element. The argument is a 220 sax.xmlreader.AttributesImpl object (see the xml.sax 221 documentation, but it's basically a dictionary-like thing) 222 used to set the element attributes. 223 """ 224 self.parentNode = None 225 if attrs is None: 226 self.attributes = AttributesImpl({}) 227 elif set(attrs.keys()) <= self.validattributes(): 228 self.attributes = attrs 229 else: 230 raise ElementError("%s element: invalid attribute(s) %s" % (self.tagName, ", ".join("'%s'" % key for key in set(attrs.keys()) - self.validattributes()))) 231 self.childNodes = [] 232 self.pcdata = None

233

234 - def start_tag(self, indent):

235 """ 236 Generate the string for the element's start tag. 237 """ 238 return u"%s<%s%s>" % (indent, self.tagName, u"".join(u" %s=\"%s\"" % keyvalue for keyvalue in self.attributes.items()))

239

240 - def end_tag(self, indent):

241 """ 242 Generate the string for the element's end tag. 243 """ 244 return u"%s</%s>" % (indent, self.tagName)

245

246 - def appendChild(self, child):

247 """ 248 Add a child to this element. The child's parentNode 249 attribute is updated, too. 250 """ 251 self.childNodes.append(child) 252 child.parentNode = self 253 self._verifyChildren(len(self.childNodes) - 1) 254 return child

255

256 - def insertBefore(self, newchild, refchild):

257 """ 258 Insert a new child node before an existing child. It must 259 be the case that refchild is a child of this node; if not, 260 ValueError is raised. newchild is returned. 261 """ 262 for i, childNode in enumerate(self.childNodes): 263 if childNode is refchild: 264 self.childNodes.insert(i, newchild) 265 newchild.parentNode = self 266 self._verifyChildren(i) 267 return newchild 268 raise ValueError(refchild)

269

270 - def removeChild(self, child):

271 """ 272 Remove a child from this element. The child element is 273 returned, and it's parentNode element is reset. If the 274 child will not be used any more, you should call its 275 unlink() method to promote garbage collection. 276 """ 277 for i, childNode in enumerate(self.childNodes): 278 if childNode is child: 279 del self.childNodes[i] 280 child.parentNode = None 281 return child 282 raise ValueError(child)

283

284 - def unlink(self):

285 """ 286 Break internal references within the document tree rooted 287 on this element to promote garbage collection. 288 """ 289 self.parentNode = None 290 for child in self.childNodes: 291 child.unlink() 292 del self.childNodes[:]

293

294 - def replaceChild(self, newchild, oldchild):

295 """ 296 Replace an existing node with a new node. It must be the 297 case that oldchild is a child of this node; if not, 298 ValueError is raised. newchild is returned. 299 """ 300 # .index() would use compare-by-value, we want 301 # compare-by-id because we want to find the exact object, 302 # not something equivalent to it. 303 for i, childNode in enumerate(self.childNodes): 304 if childNode is oldchild: 305 self.childNodes[i].parentNode = None 306 self.childNodes[i] = newchild 307 newchild.parentNode = self 308 self._verifyChildren(i) 309 return newchild 310 raise ValueError(oldchild)

311

312 - def getElements(self, filter):

313 """ 314 Return a list of elements below and including this element 315 for which filter(element) returns True. 316 """ 317 l = reduce(lambda l, e: l + e.getElements(filter), self.childNodes, []) 318 if filter(self): 319 l.append(self) 320 return l

321

322 - def getElementsByTagName(self, tagName):

323 return self.getElements(lambda e: e.tagName == tagName)

324

325 - def getChildrenByAttributes(self, attrs):

326 l = [] 327 for c in self.childNodes: 328 try: 329 if reduce(lambda t, kv: t and (c.getAttribute(kv[0]) == kv[1]), six.iteritems(attrs), True): 330 l.append(c) 331 except KeyError: 332 pass 333 return l

334

335 - def hasAttribute(self, attrname):

336 return attrname in self.attributes

337

338 - def getAttribute(self, attrname):

339 return self.attributes[attrname]

340

341 - def setAttribute(self, attrname, value):

342 # cafeful: this digs inside an AttributesImpl object and 343 # modifies its internal data. probably not a good idea, 344 # but I don't know how else to edit an attribute because 345 # the stupid things don't export a method to do it. 346 self.attributes._attrs[attrname] = six.text_type(value)

347

348 - def removeAttribute(self, attrname):

349 # cafeful: this digs inside an AttributesImpl object and 350 # modifies its internal data. probably not a good idea, 351 # but I don't know how else to edit an attribute because 352 # the stupid things don't export a method to do it. 353 try: 354 del self.attributes._attrs[attrname] 355 except KeyError: 356 pass

357

358 - def appendData(self, content):

359 """ 360 Add characters to the element's pcdata. 361 """ 362 if self.pcdata is not None: 363 self.pcdata += content 364 else: 365 self.pcdata = content

366

367 - def _verifyChildren(self, i):

368 """ 369 Method used internally by some elements to verify that 370 their children are from the allowed set and in the correct 371 order following modifications to their child list. i is 372 the index of the child that has just changed. 373 """ 374 pass

375

376 - def endElement(self):

377 """ 378 Method invoked by document parser when it encounters the 379 end-of-element event. 380 """ 381 pass

382

383 - def write(self, fileobj = sys.stdout, indent = u""):

384 """ 385 Recursively write an element and it's children to a file. 386 """ 387 fileobj.write(self.start_tag(indent)) 388 fileobj.write(u"\n") 389 for c in self.childNodes: 390 if c.tagName not in self.validchildren: 391 raise ElementError("invalid child %s for %s" % (c.tagName, self.tagName)) 392 c.write(fileobj, indent + Indent) 393 if self.pcdata is not None: 394 fileobj.write(xmlescape(self.pcdata)) 395 fileobj.write(u"\n") 396 fileobj.write(self.end_tag(indent)) 397 fileobj.write(u"\n")

398

399 400 -class EmptyElement(Element):

401 """ 402 Parent class for Elements that cannot contain text. 403 """

404 - def appendData(self, content):

405 if not content.isspace(): 406 raise TypeError("%s does not hold text" % type(self))

407

408 409 -def WalkChildren(elem):

410 """ 411 Walk the XML tree of children below elem, returning each in order. 412 """ 413 for child in elem.childNodes: 414 yield child 415 for elem in WalkChildren(child): 416 yield elem

417

418 419 # 420 # ============================================================================= 421 # 422 # Name Attribute Manipulation 423 # 424 # ============================================================================= 425 # 426 427 428 -class LLWNameAttr(six.text_type):

429 """ 430 Baseclass to hide pattern-matching of various element names. 431 Subclasses must provide a .dec_pattern compiled regular expression 432 defining a group "Name" that identifies the meaningful portion of 433 the string, and a .enc_pattern that gives a format string to be 434 used with "%" to reconstrct the full string. 435 436 This is intended to be used to provide the enc and dec functions 437 for an attributeproxy instance. 438 439 Example: 440 441 >>> import re 442 >>> class Test(Element): 443 ... class TestName(LLWNameAttr): 444 ... dec_pattern = re.compile(r"(?P<Name>[a-z0-9_]+):test\Z") 445 ... enc_pattern = u"%s:test" 446 ... 447 ... Name = attributeproxy(u"Name", enc = TestName.enc, dec = TestName) 448 ... 449 >>> x = Test() 450 >>> x.Name = u"blah" 451 >>> # internally, suffix has been appended 452 >>> x.getAttribute("Name") 453 u'blah:test' 454 >>> # but attributeproxy reports original value 455 >>> x.Name 456 u'blah' 457 """

458 - def __new__(cls, name):

459 try: 460 name = cls.dec_pattern.search(name).group(u"Name") 461 except AttributeError: 462 pass 463 return six.text_type.__new__(cls, name)

464 465 @classmethod

466 - def enc(cls, name):

467 return cls.enc_pattern % name

468

469 470 # 471 # ============================================================================= 472 # 473 # LIGO Light Weight XML Elements 474 # 475 # ============================================================================= 476 # 477 478 479 -class LIGO_LW(EmptyElement):

480 """ 481 LIGO_LW element. 482 """ 483 tagName = u"LIGO_LW" 484 validchildren = frozenset([u"LIGO_LW", u"Comment", u"Param", u"Table", u"Array", u"Stream", u"IGWDFrame", u"AdcData", u"AdcInterval", u"Time", u"Detector"]) 485 486 Name = attributeproxy(u"Name") 487 Type = attributeproxy(u"Type")

488

489 490 -class Comment(Element):

491 """ 492 Comment element. 493 """ 494 tagName = u"Comment" 495

496 - def write(self, fileobj = sys.stdout, indent = u""):

497 fileobj.write(self.start_tag(indent)) 498 if self.pcdata is not None: 499 fileobj.write(xmlescape(self.pcdata)) 500 fileobj.write(self.end_tag(u"")) 501 fileobj.write(u"\n")

502

503 504 -class Param(Element):

505 """ 506 Param element. 507 """ 508 tagName = u"Param" 509 validchildren = frozenset([u"Comment"]) 510 511 DataUnit = attributeproxy(u"DataUnit") 512 Name = attributeproxy(u"Name") 513 Scale = attributeproxy(u"Scale") 514 Start = attributeproxy(u"Start") 515 Type = attributeproxy(u"Type") 516 Unit = attributeproxy(u"Unit")

517

518 519 -class Table(EmptyElement):

520 """ 521 Table element. 522 """ 523 tagName = u"Table" 524 validchildren = frozenset([u"Comment", u"Column", u"Stream"]) 525 526 Name = attributeproxy(u"Name") 527 Type = attributeproxy(u"Type") 528

529 - def _verifyChildren(self, i):

530 ncomment = 0 531 ncolumn = 0 532 nstream = 0 533 for child in self.childNodes: 534 if child.tagName == Comment.tagName: 535 if ncomment: 536 raise ElementError("only one Comment allowed in Table") 537 if ncolumn or nstream: 538 raise ElementError("Comment must come before Column(s) and Stream in Table") 539 ncomment += 1 540 elif child.tagName == Column.tagName: 541 if nstream: 542 raise ElementError("Column(s) must come before Stream in Table") 543 ncolumn += 1 544 else: 545 if nstream: 546 raise ElementError("only one Stream allowed in Table") 547 nstream += 1

548

549 550 -class Column(EmptyElement):

551 """ 552 Column element. 553 """ 554 tagName = u"Column" 555 556 Name = attributeproxy(u"Name") 557 Type = attributeproxy(u"Type") 558 Unit = attributeproxy(u"Unit") 559

560 - def start_tag(self, indent):

561 """ 562 Generate the string for the element's start tag. 563 """ 564 return u"%s<%s%s/>" % (indent, self.tagName, u"".join(u" %s=\"%s\"" % keyvalue for keyvalue in self.attributes.items()))

565

566 - def end_tag(self, indent):

567 """ 568 Generate the string for the element's end tag. 569 """ 570 return u""

571

572 - def write(self, fileobj = sys.stdout, indent = u""):

573 """ 574 Recursively write an element and it's children to a file. 575 """ 576 fileobj.write(self.start_tag(indent)) 577 fileobj.write(u"\n")

578

579 580 -class Array(EmptyElement):

581 """ 582 Array element. 583 """ 584 tagName = u"Array" 585 validchildren = frozenset([u"Dim", u"Stream"]) 586 587 Name = attributeproxy(u"Name") 588 Type = attributeproxy(u"Type") 589 Unit = attributeproxy(u"Unit") 590

591 - def _verifyChildren(self, i):

592 nstream = 0 593 for child in self.childNodes: 594 if child.tagName == Dim.tagName: 595 if nstream: 596 raise ElementError("Dim(s) must come before Stream in Array") 597 else: 598 if nstream: 599 raise ElementError("only one Stream allowed in Array") 600 nstream += 1

601

602 603 -class Dim(Element):

604 """ 605 Dim element. 606 """ 607 tagName = u"Dim" 608 609 Name = attributeproxy(u"Name") 610 Scale = attributeproxy(u"Scale", enc = ligolwtypes.FormatFunc[u"real_8"], dec = ligolwtypes.ToPyType[u"real_8"]) 611 Start = attributeproxy(u"Start", enc = ligolwtypes.FormatFunc[u"real_8"], dec = ligolwtypes.ToPyType[u"real_8"]) 612 Unit = attributeproxy(u"Unit") 613 614 @property

615 - def n(self):

616 return ligolwtypes.ToPyType[u"int_8s"](self.pcdata) if self.pcdata is not None else None

617 618 @n.setter

619 - def n(self, val):

620 self.pcdata = ligolwtypes.FormatFunc[u"int_8s"](val) if val is not None else None

621 622 @n.deleter

623 - def n(self):

624 self.pcdata = None

625

626 - def write(self, fileobj = sys.stdout, indent = u""):

627 fileobj.write(self.start_tag(indent)) 628 if self.pcdata is not None: 629 fileobj.write(xmlescape(self.pcdata)) 630 fileobj.write(self.end_tag(u"")) 631 fileobj.write(u"\n")

632

633 634 -class Stream(Element):

635 """ 636 Stream element. 637 """ 638 tagName = u"Stream" 639 640 Content = attributeproxy(u"Content") 641 Delimiter = attributeproxy(u"Delimiter", default = u",") 642 Encoding = attributeproxy(u"Encoding") 643 Name = attributeproxy(u"Name") 644 Type = attributeproxy(u"Type", default = u"Local") 645

646 - def __init__(self, *args):

647 super(Stream, self).__init__(*args) 648 if self.Type not in (u"Remote", u"Local"): 649 raise ElementError("invalid Type for Stream: '%s'" % self.Type)

650

651 652 -class IGWDFrame(EmptyElement):

653 """ 654 IGWDFrame element. 655 """ 656 tagName = u"IGWDFrame" 657 validchildren = frozenset([u"Comment", u"Param", u"Time", u"Detector", u"AdcData", u"LIGO_LW", u"Stream", u"Array", u"IGWDFrame"]) 658 659 Name = attributeproxy(u"Name")

660

661 662 -class Detector(EmptyElement):

663 """ 664 Detector element. 665 """ 666 tagName = u"Detector" 667 validchildren = frozenset([u"Comment", u"Param", u"LIGO_LW"]) 668 669 Name = attributeproxy(u"Name")

670

671 672 -class AdcData(EmptyElement):

673 """ 674 AdcData element. 675 """ 676 tagName = u"AdcData" 677 validchildren = frozenset([u"AdcData", u"Comment", u"Param", u"Time", u"LIGO_LW", u"Array"]) 678 679 Name = attributeproxy(u"Name")

680

681 682 -class AdcInterval(EmptyElement):

683 """ 684 AdcInterval element. 685 """ 686 tagName = u"AdcInterval" 687 validchildren = frozenset([u"AdcData", u"Comment", u"Time"]) 688 689 DeltaT = attributeproxy(u"DeltaT", enc = ligolwtypes.FormatFunc[u"real_8"], dec = ligolwtypes.ToPyType[u"real_8"]) 690 Name = attributeproxy(u"Name") 691 StartTime = attributeproxy(u"StartTime")

692

693 694 -class Time(Element):

695 """ 696 Time element. 697 """ 698 tagName = u"Time" 699 700 Name = attributeproxy(u"Name") 701 Type = attributeproxy(u"Type", default = u"ISO-8601") 702

703 - def __init__(self, *args):

704 super(Time, self).__init__(*args) 705 if self.Type not in ligolwtypes.TimeTypes: 706 raise ElementError("invalid Type for Time: '%s'" % self.Type)

707

708 - def endElement(self):

709 if self.Type == u"ISO-8601": 710 import dateutil.parser 711 self.pcdata = dateutil.parser.parse(self.pcdata) 712 elif self.Type == u"GPS": 713 from lal import LIGOTimeGPS 714 # FIXME: remove cast to string when lal swig 715 # can cast from unicode 716 self.pcdata = LIGOTimeGPS(str(self.pcdata)) 717 elif self.Type == u"Unix": 718 self.pcdata = float(self.pcdata) 719 else: 720 # unsupported time type. not impossible that 721 # calling code has overridden TimeTypes set in 722 # glue.ligolw.types; just accept it as a string 723 pass

724

725 - def write(self, fileobj = sys.stdout, indent = u""):

726 fileobj.write(self.start_tag(indent)) 727 if self.pcdata is not None: 728 if self.Type == u"ISO-8601": 729 fileobj.write(xmlescape(six.text_type(self.pcdata.isoformat()))) 730 elif self.Type == u"GPS": 731 fileobj.write(xmlescape(six.text_type(self.pcdata))) 732 elif self.Type == u"Unix": 733 fileobj.write(xmlescape(u"%.16g" % self.pcdata)) 734 else: 735 # unsupported time type. not impossible. 736 # assume correct thing to do is cast to 737 # unicode and let calling code figure out 738 # how to ensure that does the correct 739 # thing. 740 fileobj.write(xmlescape(six.text_type(self.pcdata))) 741 fileobj.write(self.end_tag(u"")) 742 fileobj.write(u"\n")

743 744 @classmethod

745 - def now(cls, Name = None):

746 """ 747 Instantiate a Time element initialized to the current UTC 748 time in the default format (ISO-8601). The Name attribute 749 will be set to the value of the Name parameter if given. 750 """ 751 import datetime 752 self = cls() 753 if Name is not None: 754 self.Name = Name 755 self.pcdata = datetime.datetime.utcnow() 756 return self

757 758 @classmethod

759 - def from_gps(cls, gps, Name = None):

760 """ 761 Instantiate a Time element initialized to the value of the 762 given GPS time. The Name attribute will be set to the 763 value of the Name parameter if given. 764 765 Note: the new Time element holds a reference to the GPS 766 time, not a copy of it. Subsequent modification of the GPS 767 time object will be reflected in what gets written to disk. 768 """ 769 self = cls(AttributesImpl({u"Type": u"GPS"})) 770 if Name is not None: 771 self.Name = Name 772 self.pcdata = gps 773 return self

774

775 776 -class Document(EmptyElement):

777 """ 778 Description of a LIGO LW file. 779 """ 780 tagName = u"Document" 781 validchildren = frozenset([u"LIGO_LW"]) 782

783 - def write(self, fileobj = sys.stdout, xsl_file = None):

784 """ 785 Write the document. 786 """ 787 fileobj.write(Header) 788 fileobj.write(u"\n") 789 if xsl_file is not None: 790 fileobj.write(u'<?xml-stylesheet type="text/xsl" href="%s" ?>\n' % xsl_file) 791 for c in self.childNodes: 792 if c.tagName not in self.validchildren: 793 raise ElementError("invalid child %s for %s" % (c.tagName, self.tagName)) 794 c.write(fileobj)

795

796 797 # 798 # ============================================================================= 799 # 800 # SAX Content Handler 801 # 802 # ============================================================================= 803 # 804 805 806 -class LIGOLWContentHandler(sax.handler.ContentHandler, object):

807 """ 808 ContentHandler class for parsing LIGO Light Weight documents with a 809 SAX2-compliant parser. 810 811 Example: 812 813 >>> # initialize empty Document tree into which parsed XML tree 814 >>> # will be inserted 815 >>> xmldoc = Document() 816 >>> # create handler instance attached to Document object 817 >>> handler = LIGOLWContentHandler(xmldoc) 818 >>> # open file and parse 819 >>> make_parser(handler).parse(open("demo.xml")) 820 >>> # write XML (default to stdout) 821 >>> xmldoc.write() 822 823 NOTE: this example is for illustration only. Most users will wish 824 to use the .load_*() functions in the glue.ligolw.utils subpackage 825 to load documents, and the .write_*() functions to write documents. 826 Those functions provide additional features such as support for 827 gzip'ed documents, MD5 hash computation, and Condor eviction 828 trapping to avoid writing broken documents to disk. 829 830 See also: PartialLIGOLWContentHandler, 831 FilteringLIGOLWContentHandler. 832 """ 833

834 - def __init__(self, document, start_handlers = {}):

835 """ 836 Initialize the handler by pointing it to the Document object 837 into which the parsed file will be loaded. 838 """ 839 self.current = self.document = document 840 841 self._startElementHandlers = { 842 (None, AdcData.tagName): self.startAdcData, 843 (None, AdcInterval.tagName): self.startAdcInterval, 844 (None, Array.tagName): self.startArray, 845 (None, Column.tagName): self.startColumn, 846 (None, Comment.tagName): self.startComment, 847 (None, Detector.tagName): self.startDetector, 848 (None, Dim.tagName): self.startDim, 849 (None, IGWDFrame.tagName): self.startIGWDFrame, 850 (None, LIGO_LW.tagName): self.startLIGO_LW, 851 (None, Param.tagName): self.startParam, 852 (None, Stream.tagName): self.startStream, 853 (None, Table.tagName): self.startTable, 854 (None, Time.tagName): self.startTime, 855 } 856 self._startElementHandlers.update(start_handlers)

857

858 - def startAdcData(self, parent, attrs):

859 return AdcData(attrs)

860

861 - def startAdcInterval(self, parent, attrs):

862 return AdcInterval(attrs)

863

864 - def startArray(self, parent, attrs):

865 return Array(attrs)

866

867 - def startColumn(self, parent, attrs):

868 return Column(attrs)

869

870 - def startComment(self, parent, attrs):

871 return Comment(attrs)

872

873 - def startDetector(self, parent, attrs):

874 return Detector(attrs)

875

876 - def startDim(self, parent, attrs):

877 return Dim(attrs)

878

879 - def startIGWDFrame(self, parent, attrs):

880 return IGWDFrame(attrs)

881

882 - def startLIGO_LW(self, parent, attrs):

883 return LIGO_LW(attrs)

884

885 - def startParam(self, parent, attrs):

886 return Param(attrs)

887

888 - def startStream(self, parent, attrs):

889 return Stream(attrs)

890

891 - def startTable(self, parent, attrs):

892 return Table(attrs)

893

894 - def startTime(self, parent, attrs):

895 return Time(attrs)

896

897 - def startElementNS(self, uri_localname, qname, attrs):

898 (uri, localname) = uri_localname 899 try: 900 start_handler = self._startElementHandlers[(uri, localname)] 901 except KeyError: 902 raise ElementError("unknown element %s for namespace %s" % (localname, uri or NameSpace)) 903 attrs = AttributesImpl(dict((attrs.getQNameByName(name), value) for name, value in attrs.items())) 904 try: 905 self.current = self.current.appendChild(start_handler(self.current, attrs)) 906 except Exception as e: 907 raise type(e)("line %d: %s" % (self._locator.getLineNumber(), str(e)))

908

909 - def endElementNS(self, uri_localname, qname):

910 (uri, localname) = uri_localname 911 try: 912 self.current.endElement() 913 except Exception as e: 914 raise type(e)("line %d: %s" % (self._locator.getLineNumber(), str(e))) 915 self.current = self.current.parentNode

916

917 - def characters(self, content):

918 try: 919 self.current.appendData(xmlunescape(content)) 920 except Exception as e: 921 raise type(e)("line %d: %s" % (self._locator.getLineNumber(), str(e)))

922

923 924 -class PartialLIGOLWContentHandler(LIGOLWContentHandler):

925 """ 926 LIGO LW content handler object that loads only those parts of the 927 document matching some criteria. Useful, for example, when one 928 wishes to read only a single table from a file. 929 930 Example: 931 932 >>> from glue.ligolw import utils as ligolw_utils 933 >>> def contenthandler(document): 934 ... return PartialLIGOLWContentHandler(document, lambda name, attrs: name == Table.tagName) 935 ... 936 >>> xmldoc = ligolw_utils.load_filename("demo.xml", contenthandler = contenthandler) 937 938 This parses "demo.xml" and returns an XML tree containing only the 939 Table elements and their children. 940 """

941 - def __init__(self, document, element_filter):

942 """ 943 Only those elements for which element_filter(name, attrs) 944 evaluates to True, and the children of those elements, will 945 be loaded. 946 """ 947 super(PartialLIGOLWContentHandler, self).__init__(document) 948 self.element_filter = element_filter 949 self.depth = 0

950

951 - def startElementNS(self, uri_localname, qname, attrs):

952 (uri, localname) = uri_localname 953 filter_attrs = AttributesImpl(dict((attrs.getQNameByName(name), value) for name, value in attrs.items())) 954 if self.depth > 0 or self.element_filter(localname, filter_attrs): 955 super(PartialLIGOLWContentHandler, self).startElementNS((uri, localname), qname, attrs) 956 self.depth += 1

957

958 - def endElementNS(self, *args):

959 if self.depth > 0: 960 self.depth -= 1 961 super(PartialLIGOLWContentHandler, self).endElementNS(*args)

962

963 - def characters(self, content):

964 if self.depth > 0: 965 super(PartialLIGOLWContentHandler, self).characters(content)

966

967 968 -class FilteringLIGOLWContentHandler(LIGOLWContentHandler):

969 """ 970 LIGO LW content handler that loads everything but those parts of a 971 document that match some criteria. Useful, for example, when one 972 wishes to read everything except a single table from a file. 973 974 Example: 975 976 >>> from glue.ligolw import utils as ligolw_utils 977 >>> def contenthandler(document): 978 ... return FilteringLIGOLWContentHandler(document, lambda name, attrs: name != Table.tagName) 979 ... 980 >>> xmldoc = ligolw_utils.load_filename("demo.xml", contenthandler = contenthandler) 981 982 This parses "demo.xml" and returns an XML tree with all the Table 983 elements and their children removed. 984 """

985 - def __init__(self, document, element_filter):

986 """ 987 Those elements for which element_filter(name, attrs) 988 evaluates to False, and the children of those elements, 989 will not be loaded. 990 """ 991 super(FilteringLIGOLWContentHandler, self).__init__(document) 992 self.element_filter = element_filter 993 self.depth = 0

994

995 - def startElementNS(self, uri_localname, qname, attrs):

996 (uri, localname) = uri_localname 997 filter_attrs = AttributesImpl(dict((attrs.getQNameByName(name), value) for name, value in attrs.items())) 998 if self.depth == 0 and self.element_filter(localname, filter_attrs): 999 super(FilteringLIGOLWContentHandler, self).startElementNS((uri, localname), qname, attrs) 1000 else: 1001 self.depth += 1

1002

1003 - def endElementNS(self, *args):

1004 if self.depth == 0: 1005 super(FilteringLIGOLWContentHandler, self).endElementNS(*args) 1006 else: 1007 self.depth -= 1

1008

1009 - def characters(self, content):

1010 if self.depth == 0: 1011 super(FilteringLIGOLWContentHandler, self).characters(content)

1012

1013 1014 # 1015 # ============================================================================= 1016 # 1017 # Convenience Functions 1018 # 1019 # ============================================================================= 1020 # 1021 1022 1023 -def make_parser(handler):

1024 """ 1025 Convenience function to construct a document parser with namespaces 1026 enabled and validation disabled. Document validation is a nice 1027 feature, but enabling validation can require the LIGO LW DTD to be 1028 downloaded from the LDAS document server if the DTD is not included 1029 inline in the XML. This requires a working connection to the 1030 internet and the server to be up. 1031 """ 1032 parser = sax.make_parser() 1033 parser.setContentHandler(handler) 1034 parser.setFeature(sax.handler.feature_namespaces, True) 1035 parser.setFeature(sax.handler.feature_validation, False) 1036 parser.setFeature(sax.handler.feature_external_ges, False) 1037 return parser

1038

Source Code for Module glue.ligolw.ligolw