1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27 """
28 Ask Kipp to document this!
29 """
30
31
32 import itertools
33
34
35 from glue import git_version
36 from glue import iterutils
37 from glue import segments
38 from glue import segmentsUtils
39 from .. import ligolw
40 from .. import lsctables
41
42
43 __author__ = "Kipp Cannon <kipp.cannon@ligo.org>"
44 __version__ = "git id %s" % git_version.id
45 __date__ = git_version.date
58 """
59 A description of a LIGO Light-Weight XML segment list. Instances
60 of this class carry all the metadata associated with a LIGO Light-
61 Weight XML segment list including its name, version number, a
62 comment, and so on.
63
64 LIGO Light-Weight XML segment lists are three-state objects. A
65 segment list can be on, off, or undefined. Two separate sequences
66 of segments are used for this: the "valid" list defines the
67 intervals when the state of the segment list is known, and the
68 "active" list defines the intervals when the segment list is on.
69 It is not an error for the active list to be on during times when
70 the segment lists state is unknown, this code does not impose any
71 policy in that regard, but it should be expected that consumers of
72 the segment list will treat all times when the segment list's state
73 is unknown the same way.
74
75 Example:
76
77 >>> from glue.segments import *
78 >>> segs = segmentlist([segment(0, 10), segment(20, 30)])
79 >>> validity = segmentlist([segment(0, 10), segment(25, 100)])
80 >>> x = LigolwSegmentList(active = segs, valid = validity, instruments = set(("H1",)), name = "test")
81 >>> # x made copies of arguments
82 >>> del segs[:]
83 >>> segs
84 []
85 >>> x.active
86 [segment(0, 10), segment(20, 30)]
87 >>> # some typical operations
88 >>> x.active & x.valid # known true
89 [segment(0, 10), segment(25, 30)]
90 >>> ~x.active & x.valid # known false
91 [segment(30, 100)]
92 >>> x.active & ~x.valid # not an error for this to be non-null
93 [segment(20, 25)]
94 >>> # make a copy
95 >>> y = LigolwSegmentList(x)
96 >>> del y.active[:]
97 >>> y.active
98 []
99 >>> x.active
100 [segment(0, 10), segment(20, 30)]
101
102 The arithmetic operators on this class implement Kleene's strong
103 ternary logic, taking "true" to be (active & valid), "false" to be
104 (~active & valid), and "unknown" to be ~valid.
105
106 Example:
107
108 >>> from glue.segments import *
109 >>> segs = segmentlist([segment(0, 10), segment(20, 30)])
110 >>> validity = segmentlist([segment(0, 35)])
111 >>> x = LigolwSegmentList(active = segs, valid = validity, instruments = set(("H1",)), name = "test")
112 >>> segs = segmentlist([segment(40, 50), segment(60, 70)])
113 >>> validity = segmentlist([segment(35, 100)])
114 >>> y = LigolwSegmentList(active = segs, valid = validity, instruments = set(("H1",)), name = "test")
115 >>> (x | y).active
116 [segment(0, 10), segment(20, 30), segment(40, 50), segment(60, 70)]
117 >>> (x | y).valid
118 [segment(0, 10), segment(20, 30), segment(40, 50), segment(60, 70)]
119 >>> (x & y).active
120 []
121 >>> (x & y).valid
122 [segment(10, 20), segment(30, 40), segment(50, 60), segment(70, 100)]
123 >>> (~x).active
124 [segment(10, 20), segment(30, 35)]
125 >>> (~x).valid
126 [segment(0, 35)]
127
128 With ternary logic the three basic Boolean operations AND, OR, and
129 NOT, do not form a complete set of operations. That is, there
130 exist algebraic functions that cannot be implemented using
131 combinations of these three operators alone. One additional
132 operator is required to construct a complete basis of logic
133 operations, and we provide one: .isfalse(). This operation
134 inverts intervals of known state, and maps intervals of unknown
135 state to false.
136
137 >>> x.isfalse().active
138 [segment(10, 20), segment(30, 35)]
139 >>> x.isfalse().valid
140 [segment(-infinity, infinity)]
141
142 Unfortunately, one example of a function that cannot be constructed
143 from the three basic Boolean operators is perhaps the most common
144 operation we wish to perform with our tri-state segment lists.
145 Often we wish to construct a tri-state list from two tri-state
146 lists such that the final list's interval of validity is the union
147 of the intervals of validity of the two source lists, and the state
148 of the final list in that interval is the union the states of the
149 source lists in that interval. For example if from one source we
150 know the state of some process spanning some time, and from another
151 source we know the state of the same process spanning some other
152 time, taken together we know the state of that process over the
153 union of those times. This function is given by
154
155 >>> z = ~(x.isfalse() | y.isfalse() | (x & ~x & y & ~y))
156 >>> z.active
157 [segment(0, 10), segment(20, 30), segment(40, 50), segment(60, 70)]
158 >>> z.valid
159 [segment(0, 100)]
160
161 Because this is inconvenient to type, slow, and not readable, a
162 special in-place arithmetic operation named .update() is provided
163 to implement this operation.
164
165 >>> z = LigolwSegmentList(x).update(y)
166 >>> z.active
167 [segment(0, 10), segment(20, 30), segment(40, 50), segment(60, 70)]
168 >>> z.valid
169 [segment(0, 100)]
170
171 The .update() method is not exactly equivalent to the operation
172 above. The .update() method demands that the two input lists'
173 states be identical where their intervals of validity intersect.
174 """
175
176
177
178
179
180 segment_def_columns = (u"process_id", u"segment_def_id", u"ifos", u"name", u"version", u"comment")
181 segment_sum_columns = (u"process_id", u"segment_sum_id", u"start_time", u"start_time_ns", u"end_time", u"end_time_ns", u"segment_def_id", u"comment")
182 segment_columns = (u"process_id", u"segment_id", u"start_time", u"start_time_ns", u"end_time", u"end_time_ns", u"segment_def_id")
183
184 - def __init__(self, active = (), valid = (), instruments = (), name = None, version = None, comment = None):
217
218 - def sort(self, *args):
219 """
220 Sort the internal segment lists. The optional args are
221 passed to the .sort() method of the segment lists. This
222 can be used to control the sort order by providing an
223 alternate comparison function. The default is to sort by
224 start time with ties broken by end time.
225 """
226 self.valid.sort(*args)
227 self.active.sort(*args)
228
230 """
231 Coalesce the internal segment lists. Returns self.
232 """
233 self.valid.coalesce()
234 self.active.coalesce()
235 return self
236
238 """
239 If either is true the result is true, if both are false the
240 result is false, otherwise the result is unknown.
241 """
242 if self.instruments != other.instruments:
243 raise ValueError("incompatible metadata")
244
245 self.active &= self.valid
246 self.active |= other.active & other.valid
247
248 self.valid = (self.valid & other.valid) | self.active
249 return self
250
252 """
253 If either is false the result is false, if both are true
254 the result is true, otherwise the result is unknown.
255 """
256 if self.instruments != other.instruments:
257 raise ValueError("incompatible metadata")
258
259 false = (self.valid & ~self.active) | (other.valid & ~other.active)
260
261 self.active &= self.valid
262 self.active &= other.active & other.valid
263
264 self.valid = false | self.active
265 return self
266
268 """
269 If either is true the result is true, if both are false the
270 result is false, otherwise the result is unknown.
271 """
272 result = type(self)(self)
273 result |= other
274 return result
275
277 """
278 If either is false the result is false, if both are true
279 the result is true, otherwise the result is unknown.
280 """
281 result = type(self)(self)
282 result &= other
283 return result
284
286 """
287 If unknown the result is unknown, otherwise the state is
288 inverted.
289 """
290 result = type(self)(self)
291 result.active = ~result.active & result.valid
292 return result
293
303
305 if self.instruments != other.instruments:
306 raise ValueError("incompatible metadata")
307 if (self.valid & other.valid).intersects(self.active ^ other.active):
308 raise ValueError("result over-determined")
309 self.active &= self.valid
310 self.active |= other.active & other.valid
311 self.valid |= other.valid
312 return self
313
325 """
326 An interface shim between code that makes use of segments in
327 glue.segments form, and LIGO Light-Weight XML I/O code.
328
329 This class is "attached" to an XML document object, at which time
330 it parses and extracts the segment lists from the document, and
331 clears the document's segment tables (preventing a second
332 LigolwSegments object from being meaningfully attached to the same
333 document). When the application is finished manipulating the
334 segment lists, they can be inserted into the XML document at which
335 time the contents of the LigolwSegments object are cleared
336 (preventing any further manipulations).
337
338 This class is a subclass of the Python set builtin. Each element
339 of the set is a LigolwSegmentList instance describing one of the
340 segment lists in the original XML document.
341
342 This class may be used as a context manager to automate the
343 replacement of segments back into the XML document, including in
344 the event of an untrapped exception. When used as a context
345 manager, the process parameter of the .__init__() method is not
346 optional.
347
348 Example:
349
350 >>> import sys
351 >>> from glue.segments import *
352 >>> from lal import LIGOTimeGPS
353 >>> from glue.ligolw import ligolw, lsctables
354 >>> xmldoc = ligolw.Document()
355 >>> xmldoc.appendChild(ligolw.LIGO_LW()) # doctest: +ELLIPSIS
356 <glue.ligolw.ligolw.LIGO_LW object at ...>
357 >>> process = lsctables.Process(process_id = lsctables.ProcessTable.get_next_id())
358 >>> with LigolwSegments(xmldoc, process) as xmlsegments:
359 ... h1segs = segmentlist([segment(LIGOTimeGPS(0), LIGOTimeGPS(10))])
360 ... xmlsegments.insert_from_segmentlistdict({"H1": h1segs}, "test")
361 ... l1segs = h1segs.shift(5)
362 ... xmlsegments.add(LigolwSegmentList(active = l1segs, valid = segmentlist([segment(-infinity(), infinity())]), instruments = set(["L1"]), name = "test"))
363 >>> xmldoc.write(sys.stdout) # doctest: +NORMALIZE_WHITESPACE
364 <?xml version='1.0' encoding='utf-8'?>
365 <!DOCTYPE LIGO_LW SYSTEM "http://ldas-sw.ligo.caltech.edu/doc/ligolwAPI/html/ligolw_dtd.txt">
366 <LIGO_LW>
367 <Table Name="segment_definer:table">
368 <Column Type="ilwd:char" Name="segment_definer:process_id"/>
369 <Column Type="ilwd:char" Name="segment_definer:segment_def_id"/>
370 <Column Type="lstring" Name="segment_definer:ifos"/>
371 <Column Type="lstring" Name="segment_definer:name"/>
372 <Column Type="int_4s" Name="segment_definer:version"/>
373 <Column Type="lstring" Name="segment_definer:comment"/>
374 <Stream Delimiter="," Type="Local" Name="segment_definer:table">
375 "process:process_id:0","segment_definer:segment_def_id:0","H1","test",,,
376 "process:process_id:0","segment_definer:segment_def_id:1","L1","test",,,
377 </Stream>
378 </Table>
379 <Table Name="segment_summary:table">
380 <Column Type="ilwd:char" Name="segment_summary:process_id"/>
381 <Column Type="ilwd:char" Name="segment_summary:segment_sum_id"/>
382 <Column Type="int_4s" Name="segment_summary:start_time"/>
383 <Column Type="int_4s" Name="segment_summary:start_time_ns"/>
384 <Column Type="int_4s" Name="segment_summary:end_time"/>
385 <Column Type="int_4s" Name="segment_summary:end_time_ns"/>
386 <Column Type="ilwd:char" Name="segment_summary:segment_def_id"/>
387 <Column Type="lstring" Name="segment_summary:comment"/>
388 <Stream Delimiter="," Type="Local" Name="segment_summary:table">
389 "process:process_id:0","segment_summary:segment_sum_id:0",4294967295,4294967295,2147483647,4294967295,"segment_definer:segment_def_id:1",,
390 </Stream>
391 </Table>
392 <Table Name="segment:table">
393 <Column Type="ilwd:char" Name="segment:process_id"/>
394 <Column Type="ilwd:char" Name="segment:segment_id"/>
395 <Column Type="int_4s" Name="segment:start_time"/>
396 <Column Type="int_4s" Name="segment:start_time_ns"/>
397 <Column Type="int_4s" Name="segment:end_time"/>
398 <Column Type="int_4s" Name="segment:end_time_ns"/>
399 <Column Type="ilwd:char" Name="segment:segment_def_id"/>
400 <Stream Delimiter="," Type="Local" Name="segment:table">
401 "process:process_id:0","segment:segment_id:0",0,0,10,0,"segment_definer:segment_def_id:0",
402 "process:process_id:0","segment:segment_id:1",5,0,15,0,"segment_definer:segment_def_id:1"
403 </Stream>
404 </Table>
405 </LIGO_LW>
406 >>> xmlsegments = LigolwSegments(xmldoc)
407 >>> xmlsegments.get_by_name("test")
408 {u'H1': [segment(LIGOTimeGPS(0, 0), LIGOTimeGPS(10, 0))], u'L1': [segment(LIGOTimeGPS(5, 0), LIGOTimeGPS(15, 0))]}
409 >>> xmlsegments.get_by_name("wrong name")
410 Traceback (most recent call last):
411 ...
412 KeyError: "no segmentlists named 'wrong name'"
413
414 NOTE: the process of extracting and re-inserting the contents of
415 the segment tables will, in general, randomize the IDs assigned to
416 the rows of these tables. If there are references to segment,
417 segment_summary, or segment_definer row IDs in other tables in the
418 document, those references will be broken by this process.
419 """
420 - def __init__(self, xmldoc, process = None):
489
490
491
492
493
494
496 """
497 Parse the contents of the file object fileobj as a
498 segwizard-format segment list, and insert the result as a
499 new list of "active" segments into this LigolwSegments
500 object. A new entry will be created in the segment_definer
501 table for the segment list, and instruments, name and
502 comment are used to populate the entry's metadata. Note
503 that the "valid" segments are left empty, nominally
504 indicating that there are no periods of validity. Returns
505 the newly created LigolwSegmentList object.
506 """
507 ligolw_segment_list = LigolwSegmentList(active = segmentsUtils.fromsegwizard(fileobj, coltype = lsctables.LIGOTimeGPS), instruments = instruments, name = name, version = version, comment = comment)
508 self.add(ligolw_segment_list)
509 return ligolw_segment_list
510
511
513 """
514 Insert the segments from the segmentlistdict object
515 seglists as a new list of "active" segments into this
516 LigolwSegments object. The dictionary's keys are assumed
517 to provide the instrument name for each segment list. A
518 new entry will be created in the segment_definer table for
519 the segment lists, and the dictionary's keys, the name, and
520 comment will be used to populate the entry's metadata.
521 """
522 for instrument, segments in seglists.items():
523 self.add(LigolwSegmentList(active = segments, instruments = set([instrument]), name = name, version = version, comment = comment))
524
525
527 """
528 Coalesce the segment lists. Returns self.
529 """
530 for ligolw_segment_list in self:
531 ligolw_segment_list.coalesce()
532 return self
533
534
535 - def sort(self, *args):
536 """
537 Sort the segment lists. The optional args are passed to
538 the .sort() methods of the segment lists. This can be used
539 to control the sort order by providing an alternate
540 comparison function (the default is to sort all lists by
541 segment start time with ties broken by end time).
542 """
543 for ligolw_segment_list in self:
544 ligolw_segment_list.sort(*args)
545
546
548 """
549 Identifies segment lists that differ only in their
550 instruments --- they have the same valid and active
551 segments, the same name, version and the same comment ---
552 and then deletes all but one of them, leaving just a single
553 list having the union of the instruments.
554 """
555 self.sort()
556 segment_lists = dict(enumerate(self))
557 for target, source in [(idx_a, idx_b) for (idx_a, seglist_a), (idx_b, seglist_b) in itertools.combinations(segment_lists.items(), 2) if seglist_a.valid == seglist_b.valid and seglist_a.active == seglist_b.active and seglist_a.name == seglist_b.name and seglist_a.version == seglist_b.version and seglist_a.comment == seglist_b.comment]:
558 try:
559 source = segment_lists.pop(source)
560 except KeyError:
561 continue
562 segment_lists[target].instruments |= source.instruments
563 self.clear()
564 self.update(segment_lists.values())
565
566
568 """
569 Retrieve the active segmentlists whose name equals name.
570 The result is a segmentlistdict indexed by instrument. All
571 segmentlist objects within it will be copies of the
572 contents of this object, modifications will not affect the
573 contents of this object. If clip_to_valid is True then the
574 segmentlists will be intersected with their respective
575 intervals of validity, otherwise they will be the verbatim
576 active segments.
577
578 NOTE: the intersection operation required by clip_to_valid
579 will yield undefined results unless the active and valid
580 segmentlist objects are coalesced.
581 """
582 result = segments.segmentlistdict()
583 for seglist in self:
584 if seglist.name != name:
585 continue
586 segs = seglist.active
587 if clip_to_valid:
588
589 segs = segs & seglist.valid
590 for instrument in seglist.instruments:
591 if instrument in result:
592 raise ValueError("multiple '%s' segmentlists for instrument '%s'" % (name, instrument))
593
594
595
596
597 result[instrument] = segments.segmentlist(segs)
598 if not result:
599 raise KeyError("no segmentlists named '%s'" % name)
600 return result
601
602
603 - def finalize(self, process_row = None):
604 """
605 Restore the LigolwSegmentList objects to the XML tables in
606 preparation for output. All segments from all segment
607 lists are inserted into the tables in time order, but this
608 is NOT behaviour external applications should rely on.
609 This is done simply in the belief that it might assist in
610 constructing well balanced indexed databases from the
611 resulting files. If that proves not to be the case, or for
612 some reason this behaviour proves inconvenient to preserve,
613 then it might be discontinued without notice. You've been
614 warned.
615 """
616 if process_row is not None:
617 process_id = process_row.process_id
618 elif self.process is not None:
619 process_id = self.process.process_id
620 else:
621 raise ValueError("must supply a process row to .__init__()")
622
623
624
625
626
627 self.segment_def_table.sync_next_id()
628 self.segment_table.sync_next_id()
629 self.segment_sum_table.sync_next_id()
630
631
632
633
634
635 self.sort()
636
637
638
639
640
641
642
643 def row_generator(segs, target_table, process_id, segment_def_id):
644 id_column = target_table.next_id.column_name
645 for seg in segs:
646 row = target_table.RowType()
647 row.segment = seg
648 row.process_id = process_id
649 row.segment_def_id = segment_def_id
650 if isinstance(row, lsctables.SegmentSum):
651 row.comment = None
652 yield row, target_table, id_column
653
654
655
656
657
658
659
660
661 row_generators = []
662 for ligolw_segment_list in sorted(self, key = lambda l: (l.name, sorted(l.instruments), l.version)):
663 self.remove(ligolw_segment_list)
664 segment_def_row = self.segment_def_table.RowType()
665 segment_def_row.process_id = process_id
666 segment_def_row.segment_def_id = self.segment_def_table.get_next_id()
667 segment_def_row.instruments = ligolw_segment_list.instruments
668 segment_def_row.name = ligolw_segment_list.name
669 segment_def_row.version = ligolw_segment_list.version
670 segment_def_row.comment = ligolw_segment_list.comment
671 self.segment_def_table.append(segment_def_row)
672
673 row_generators.append(row_generator(ligolw_segment_list.valid, self.segment_sum_table, process_id, segment_def_row.segment_def_id))
674 row_generators.append(row_generator(ligolw_segment_list.active, self.segment_table, process_id, segment_def_row.segment_def_id))
675
676
677
678
679
680
681 for row, target_table, id_column in iterutils.inorder(*row_generators):
682 setattr(row, id_column, target_table.get_next_id())
683 target_table.append(row)
684
685
687 if self.process is None:
688 raise ValueError("must supply a process row to .__init__()")
689 return self
690
691
695
696
697
698
699
700
701
702
703
704
705
706 @lsctables.use_in
707 -class LIGOLWContentHandler(ligolw.LIGOLWContentHandler):
708 """
709 Minimal content handler suitable for loading documents containg
710 segment tables.
711 """
712 pass
713
729
779