1   
  2   
  3   
  4   
  5   
  6   
  7   
  8   
  9   
 10   
 11   
 12   
 13   
 14   
 15   
 16   
 17   
 18   
 19   
 20   
 21   
 22   
 23   
 24   
 25   
 26   
 27  """ 
 28  Ask Kipp to document this! 
 29  """ 
 30   
 31   
 32  import itertools 
 33   
 34   
 35  from glue import git_version 
 36  from glue import iterutils 
 37  from glue import segments 
 38  from glue import segmentsUtils 
 39  from .. import ligolw 
 40  from .. import lsctables 
 41   
 42   
 43  __author__ = "Kipp Cannon <kipp.cannon@ligo.org>" 
 44  __version__ = "git id %s" % git_version.id 
 45  __date__ = git_version.date 
 58          """ 
 59          A description of a LIGO Light-Weight XML segment list.  Instances 
 60          of this class carry all the metadata associated with a LIGO Light- 
 61          Weight XML segment list including its name, version number, a 
 62          comment, and so on. 
 63   
 64          LIGO Light-Weight XML segment lists are three-state objects.  A 
 65          segment list can be on, off, or undefined.  Two separate sequences 
 66          of segments are used for this:  the "valid" list defines the 
 67          intervals when the state of the segment list is known, and the 
 68          "active" list defines the intervals when the segment list is on. 
 69          It is not an error for the active list to be on during times when 
 70          the segment lists state is unknown, this code does not impose any 
 71          policy in that regard, but it should be expected that consumers of 
 72          the segment list will treat all times when the segment list's state 
 73          is unknown the same way. 
 74   
 75          Example: 
 76   
 77          >>> from glue.segments import * 
 78          >>> segs = segmentlist([segment(0, 10), segment(20, 30)]) 
 79          >>> validity = segmentlist([segment(0, 10), segment(25, 100)]) 
 80          >>> x = LigolwSegmentList(active = segs, valid = validity, instruments = set(("H1",)), name = "test") 
 81          >>> # x made copies of arguments 
 82          >>> del segs[:] 
 83          >>> segs 
 84          [] 
 85          >>> x.active 
 86          [segment(0, 10), segment(20, 30)] 
 87          >>> # some typical operations 
 88          >>> x.active & x.valid  # known true 
 89          [segment(0, 10), segment(25, 30)] 
 90          >>> ~x.active & x.valid # known false 
 91          [segment(30, 100)] 
 92          >>> x.active & ~x.valid # not an error for this to be non-null 
 93          [segment(20, 25)] 
 94          >>> # make a copy 
 95          >>> y = LigolwSegmentList(x) 
 96          >>> del y.active[:] 
 97          >>> y.active 
 98          [] 
 99          >>> x.active 
100          [segment(0, 10), segment(20, 30)] 
101   
102          The arithmetic operators on this class implement Kleene's strong 
103          ternary logic, taking "true" to be (active & valid), "false" to be 
104          (~active & valid), and "unknown" to be ~valid. 
105   
106          Example: 
107   
108          >>> from glue.segments import * 
109          >>> segs = segmentlist([segment(0, 10), segment(20, 30)]) 
110          >>> validity = segmentlist([segment(0, 35)]) 
111          >>> x = LigolwSegmentList(active = segs, valid = validity, instruments = set(("H1",)), name = "test") 
112          >>> segs = segmentlist([segment(40, 50), segment(60, 70)]) 
113          >>> validity = segmentlist([segment(35, 100)]) 
114          >>> y = LigolwSegmentList(active = segs, valid = validity, instruments = set(("H1",)), name = "test") 
115          >>> (x | y).active 
116          [segment(0, 10), segment(20, 30), segment(40, 50), segment(60, 70)] 
117          >>> (x | y).valid 
118          [segment(0, 10), segment(20, 30), segment(40, 50), segment(60, 70)] 
119          >>> (x & y).active 
120          [] 
121          >>> (x & y).valid 
122          [segment(10, 20), segment(30, 40), segment(50, 60), segment(70, 100)] 
123          >>> (~x).active 
124          [segment(10, 20), segment(30, 35)] 
125          >>> (~x).valid 
126          [segment(0, 35)] 
127   
128          With ternary logic the three basic Boolean operations AND, OR, and 
129          NOT, do not form a complete set of operations.  That is, there 
130          exist algebraic functions that cannot be implemented using 
131          combinations of these three operators alone.  One additional 
132          operator is required to construct a complete basis of logic 
133          operations, and we provide one:  .isfalse().  This operation 
134          inverts intervals of known state, and maps intervals of unknown 
135          state to false. 
136   
137          >>> x.isfalse().active 
138          [segment(10, 20), segment(30, 35)] 
139          >>> x.isfalse().valid 
140          [segment(-infinity, infinity)] 
141   
142          Unfortunately, one example of a function that cannot be constructed 
143          from the three basic Boolean operators is perhaps the most common 
144          operation we wish to perform with our tri-state segment lists. 
145          Often we wish to construct a tri-state list from two tri-state 
146          lists such that the final list's interval of validity is the union 
147          of the intervals of validity of the two source lists, and the state 
148          of the final list in that interval is the union the states of the 
149          source lists in that interval.  For example if from one source we 
150          know the state of some process spanning some time, and from another 
151          source we know the state of the same process spanning some other 
152          time, taken together we know the state of that process over the 
153          union of those times.  This function is given by 
154   
155          >>> z = ~(x.isfalse() | y.isfalse() | (x & ~x & y & ~y)) 
156          >>> z.active 
157          [segment(0, 10), segment(20, 30), segment(40, 50), segment(60, 70)] 
158          >>> z.valid 
159          [segment(0, 100)] 
160   
161          Because this is inconvenient to type, slow, and not readable, a 
162          special in-place arithmetic operation named .update() is provided 
163          to implement this operation. 
164   
165          >>> z = LigolwSegmentList(x).update(y) 
166          >>> z.active 
167          [segment(0, 10), segment(20, 30), segment(40, 50), segment(60, 70)] 
168          >>> z.valid 
169          [segment(0, 100)] 
170   
171          The .update() method is not exactly equivalent to the operation 
172          above.  The .update() method demands that the two input lists' 
173          states be identical where their intervals of validity intersect. 
174          """ 
175           
176           
177           
178           
179   
180          segment_def_columns = (u"process_id", u"segment_def_id", u"ifos", u"name", u"version", u"comment") 
181          segment_sum_columns = (u"process_id", u"segment_sum_id", u"start_time", u"start_time_ns", u"end_time", u"end_time_ns", u"segment_def_id", u"comment") 
182          segment_columns = (u"process_id", u"segment_id", u"start_time", u"start_time_ns", u"end_time", u"end_time_ns", u"segment_def_id") 
183   
184 -        def __init__(self, active = (), valid = (), instruments = (), name = None, version = None, comment = None): 
 217   
218 -        def sort(self, *args): 
 219                  """ 
220                  Sort the internal segment lists.  The optional args are 
221                  passed to the .sort() method of the segment lists.  This 
222                  can be used to control the sort order by providing an 
223                  alternate comparison function.  The default is to sort by 
224                  start time with ties broken by end time. 
225                  """ 
226                  self.valid.sort(*args) 
227                  self.active.sort(*args) 
 228   
230                  """ 
231                  Coalesce the internal segment lists.  Returns self. 
232                  """ 
233                  self.valid.coalesce() 
234                  self.active.coalesce() 
235                  return self 
 236   
238                  """ 
239                  If either is true the result is true, if both are false the 
240                  result is false, otherwise the result is unknown. 
241                  """ 
242                  if self.instruments != other.instruments: 
243                          raise ValueError("incompatible metadata") 
244                   
245                  self.active &= self.valid 
246                  self.active |= other.active & other.valid 
247                   
248                  self.valid = (self.valid & other.valid) | self.active 
249                  return self 
 250   
252                  """ 
253                  If either is false the result is false, if both are true 
254                  the result is true, otherwise the result is unknown. 
255                  """ 
256                  if self.instruments != other.instruments: 
257                          raise ValueError("incompatible metadata") 
258                   
259                  false = (self.valid & ~self.active) | (other.valid & ~other.active) 
260                   
261                  self.active &= self.valid 
262                  self.active &= other.active & other.valid 
263                   
264                  self.valid = false | self.active 
265                  return self 
 266   
268                  """ 
269                  If either is true the result is true, if both are false the 
270                  result is false, otherwise the result is unknown. 
271                  """ 
272                  result = type(self)(self) 
273                  result |= other 
274                  return result 
 275   
277                  """ 
278                  If either is false the result is false, if both are true 
279                  the result is true, otherwise the result is unknown. 
280                  """ 
281                  result = type(self)(self) 
282                  result &= other 
283                  return result 
 284   
286                  """ 
287                  If unknown the result is unknown, otherwise the state is 
288                  inverted. 
289                  """ 
290                  result = type(self)(self) 
291                  result.active = ~result.active & result.valid 
292                  return result 
 293   
303   
305                  if self.instruments != other.instruments: 
306                          raise ValueError("incompatible metadata") 
307                  if (self.valid & other.valid).intersects(self.active ^ other.active): 
308                          raise ValueError("result over-determined") 
309                  self.active &= self.valid 
310                  self.active |= other.active & other.valid 
311                  self.valid |= other.valid 
312                  return self 
  313   
325          """ 
326          An interface shim between code that makes use of segments in 
327          glue.segments form, and LIGO Light-Weight XML I/O code. 
328   
329          This class is "attached" to an XML document object, at which time 
330          it parses and extracts the segment lists from the document, and 
331          clears the document's segment tables (preventing a second 
332          LigolwSegments object from being meaningfully attached to the same 
333          document).  When the application is finished manipulating the 
334          segment lists, they can be inserted into the XML document at which 
335          time the contents of the LigolwSegments object are cleared 
336          (preventing any further manipulations). 
337   
338          This class is a subclass of the Python set builtin.  Each element 
339          of the set is a LigolwSegmentList instance describing one of the 
340          segment lists in the original XML document. 
341   
342          This class may be used as a context manager to automate the 
343          replacement of segments back into the XML document, including in 
344          the event of an untrapped exception.  When used as a context 
345          manager, the process parameter of the .__init__() method is not 
346          optional. 
347   
348          Example: 
349   
350          >>> import sys 
351          >>> from glue.segments import * 
352          >>> from lal import LIGOTimeGPS 
353          >>> from glue.ligolw import ligolw, lsctables 
354          >>> xmldoc = ligolw.Document() 
355          >>> xmldoc.appendChild(ligolw.LIGO_LW())        # doctest: +ELLIPSIS 
356          <glue.ligolw.ligolw.LIGO_LW object at ...> 
357          >>> process = lsctables.Process(process_id = lsctables.ProcessTable.get_next_id()) 
358          >>> with LigolwSegments(xmldoc, process) as xmlsegments: 
359          ...     h1segs = segmentlist([segment(LIGOTimeGPS(0), LIGOTimeGPS(10))]) 
360          ...     xmlsegments.insert_from_segmentlistdict({"H1": h1segs}, "test") 
361          ...     l1segs = h1segs.shift(5) 
362          ...     xmlsegments.add(LigolwSegmentList(active = l1segs, valid = segmentlist([segment(-infinity(), infinity())]), instruments = set(["L1"]), name = "test")) 
363          >>> xmldoc.write(sys.stdout)            # doctest: +NORMALIZE_WHITESPACE 
364          <?xml version='1.0' encoding='utf-8'?> 
365          <!DOCTYPE LIGO_LW SYSTEM "http://ldas-sw.ligo.caltech.edu/doc/ligolwAPI/html/ligolw_dtd.txt"> 
366          <LIGO_LW> 
367                  <Table Name="segment_definer:table"> 
368                          <Column Type="ilwd:char" Name="segment_definer:process_id"/> 
369                          <Column Type="ilwd:char" Name="segment_definer:segment_def_id"/> 
370                          <Column Type="lstring" Name="segment_definer:ifos"/> 
371                          <Column Type="lstring" Name="segment_definer:name"/> 
372                          <Column Type="int_4s" Name="segment_definer:version"/> 
373                          <Column Type="lstring" Name="segment_definer:comment"/> 
374                          <Stream Delimiter="," Type="Local" Name="segment_definer:table"> 
375                                  "process:process_id:0","segment_definer:segment_def_id:0","H1","test",,, 
376                                  "process:process_id:0","segment_definer:segment_def_id:1","L1","test",,, 
377                          </Stream> 
378                  </Table> 
379                  <Table Name="segment_summary:table"> 
380                          <Column Type="ilwd:char" Name="segment_summary:process_id"/> 
381                          <Column Type="ilwd:char" Name="segment_summary:segment_sum_id"/> 
382                          <Column Type="int_4s" Name="segment_summary:start_time"/> 
383                          <Column Type="int_4s" Name="segment_summary:start_time_ns"/> 
384                          <Column Type="int_4s" Name="segment_summary:end_time"/> 
385                          <Column Type="int_4s" Name="segment_summary:end_time_ns"/> 
386                          <Column Type="ilwd:char" Name="segment_summary:segment_def_id"/> 
387                          <Column Type="lstring" Name="segment_summary:comment"/> 
388                          <Stream Delimiter="," Type="Local" Name="segment_summary:table"> 
389                                  "process:process_id:0","segment_summary:segment_sum_id:0",4294967295,4294967295,2147483647,4294967295,"segment_definer:segment_def_id:1",, 
390                          </Stream> 
391                  </Table> 
392                  <Table Name="segment:table"> 
393                          <Column Type="ilwd:char" Name="segment:process_id"/> 
394                          <Column Type="ilwd:char" Name="segment:segment_id"/> 
395                          <Column Type="int_4s" Name="segment:start_time"/> 
396                          <Column Type="int_4s" Name="segment:start_time_ns"/> 
397                          <Column Type="int_4s" Name="segment:end_time"/> 
398                          <Column Type="int_4s" Name="segment:end_time_ns"/> 
399                          <Column Type="ilwd:char" Name="segment:segment_def_id"/> 
400                          <Stream Delimiter="," Type="Local" Name="segment:table"> 
401                                  "process:process_id:0","segment:segment_id:0",0,0,10,0,"segment_definer:segment_def_id:0", 
402                                  "process:process_id:0","segment:segment_id:1",5,0,15,0,"segment_definer:segment_def_id:1" 
403                          </Stream> 
404                  </Table> 
405          </LIGO_LW> 
406          >>> xmlsegments = LigolwSegments(xmldoc) 
407          >>> xmlsegments.get_by_name("test") 
408          {u'H1': [segment(LIGOTimeGPS(0, 0), LIGOTimeGPS(10, 0))], u'L1': [segment(LIGOTimeGPS(5, 0), LIGOTimeGPS(15, 0))]} 
409          >>> xmlsegments.get_by_name("wrong name") 
410          Traceback (most recent call last): 
411                  ... 
412          KeyError: "no segmentlists named 'wrong name'" 
413   
414          NOTE:  the process of extracting and re-inserting the contents of 
415          the segment tables will, in general, randomize the IDs assigned to 
416          the rows of these tables.  If there are references to segment, 
417          segment_summary, or segment_definer row IDs in other tables in the 
418          document, those references will be broken by this process. 
419          """ 
420 -        def __init__(self, xmldoc, process = None): 
 489   
490                   
491                   
492                   
493   
494   
496                  """ 
497                  Parse the contents of the file object fileobj as a 
498                  segwizard-format segment list, and insert the result as a 
499                  new list of "active" segments into this LigolwSegments 
500                  object.  A new entry will be created in the segment_definer 
501                  table for the segment list, and instruments, name and 
502                  comment are used to populate the entry's metadata.  Note 
503                  that the "valid" segments are left empty, nominally 
504                  indicating that there are no periods of validity.  Returns 
505                  the newly created LigolwSegmentList object. 
506                  """ 
507                  ligolw_segment_list = LigolwSegmentList(active = segmentsUtils.fromsegwizard(fileobj, coltype = lsctables.LIGOTimeGPS), instruments = instruments, name = name, version = version, comment = comment) 
508                  self.add(ligolw_segment_list) 
509                  return ligolw_segment_list 
 510   
511   
513                  """ 
514                  Insert the segments from the segmentlistdict object 
515                  seglists as a new list of "active" segments into this 
516                  LigolwSegments object.  The dictionary's keys are assumed 
517                  to provide the instrument name for each segment list.  A 
518                  new entry will be created in the segment_definer table for 
519                  the segment lists, and the dictionary's keys, the name, and 
520                  comment will be used to populate the entry's metadata. 
521                  """ 
522                  for instrument, segments in seglists.items(): 
523                          self.add(LigolwSegmentList(active = segments, instruments = set([instrument]), name = name, version = version, comment = comment)) 
 524   
525   
527                  """ 
528                  Coalesce the segment lists.  Returns self. 
529                  """ 
530                  for ligolw_segment_list in self: 
531                          ligolw_segment_list.coalesce() 
532                  return self 
 533   
534   
535 -        def sort(self, *args): 
 536                  """ 
537                  Sort the segment lists.  The optional args are passed to 
538                  the .sort() methods of the segment lists.  This can be used 
539                  to control the sort order by providing an alternate 
540                  comparison function (the default is to sort all lists by 
541                  segment start time with ties broken by end time). 
542                  """ 
543                  for ligolw_segment_list in self: 
544                          ligolw_segment_list.sort(*args) 
 545   
546   
548                  """ 
549                  Identifies segment lists that differ only in their 
550                  instruments --- they have the same valid and active 
551                  segments, the same name, version and the same comment --- 
552                  and then deletes all but one of them, leaving just a single 
553                  list having the union of the instruments. 
554                  """ 
555                  self.sort() 
556                  segment_lists = dict(enumerate(self)) 
557                  for target, source in [(idx_a, idx_b) for (idx_a, seglist_a), (idx_b, seglist_b) in itertools.combinations(segment_lists.items(), 2) if seglist_a.valid == seglist_b.valid and seglist_a.active == seglist_b.active and seglist_a.name == seglist_b.name and seglist_a.version == seglist_b.version and seglist_a.comment == seglist_b.comment]: 
558                          try: 
559                                  source = segment_lists.pop(source) 
560                          except KeyError: 
561                                  continue 
562                          segment_lists[target].instruments |= source.instruments 
563                  self.clear() 
564                  self.update(segment_lists.values()) 
 565   
566   
568                  """ 
569                  Retrieve the active segmentlists whose name equals name. 
570                  The result is a segmentlistdict indexed by instrument.  All 
571                  segmentlist objects within it will be copies of the 
572                  contents of this object, modifications will not affect the 
573                  contents of this object.  If clip_to_valid is True then the 
574                  segmentlists will be intersected with their respective 
575                  intervals of validity, otherwise they will be the verbatim 
576                  active segments. 
577   
578                  NOTE:  the intersection operation required by clip_to_valid 
579                  will yield undefined results unless the active and valid 
580                  segmentlist objects are coalesced. 
581                  """ 
582                  result = segments.segmentlistdict() 
583                  for seglist in self: 
584                          if seglist.name != name: 
585                                  continue 
586                          segs = seglist.active 
587                          if clip_to_valid: 
588                                   
589                                  segs = segs & seglist.valid 
590                          for instrument in seglist.instruments: 
591                                  if instrument in result: 
592                                          raise ValueError("multiple '%s' segmentlists for instrument '%s'" % (name, instrument)) 
593                                   
594                                   
595                                   
596                                   
597                                  result[instrument] = segments.segmentlist(segs) 
598                  if not result: 
599                          raise KeyError("no segmentlists named '%s'" % name) 
600                  return result 
 601   
602   
603 -        def finalize(self, process_row = None): 
 604                  """ 
605                  Restore the LigolwSegmentList objects to the XML tables in 
606                  preparation for output.  All segments from all segment 
607                  lists are inserted into the tables in time order, but this 
608                  is NOT behaviour external applications should rely on. 
609                  This is done simply in the belief that it might assist in 
610                  constructing well balanced indexed databases from the 
611                  resulting files.  If that proves not to be the case, or for 
612                  some reason this behaviour proves inconvenient to preserve, 
613                  then it might be discontinued without notice.  You've been 
614                  warned. 
615                  """ 
616                  if process_row is not None: 
617                          process_id = process_row.process_id 
618                  elif self.process is not None: 
619                          process_id = self.process.process_id 
620                  else: 
621                          raise ValueError("must supply a process row to .__init__()") 
622   
623                   
624                   
625                   
626   
627                  self.segment_def_table.sync_next_id() 
628                  self.segment_table.sync_next_id() 
629                  self.segment_sum_table.sync_next_id() 
630   
631                   
632                   
633                   
634   
635                  self.sort() 
636   
637                   
638                   
639                   
640                   
641                   
642   
643                  def row_generator(segs, target_table, process_id, segment_def_id): 
644                          id_column = target_table.next_id.column_name 
645                          for seg in segs: 
646                                  row = target_table.RowType() 
647                                  row.segment = seg 
648                                  row.process_id = process_id 
649                                  row.segment_def_id = segment_def_id 
650                                  if isinstance(row, lsctables.SegmentSum): 
651                                          row.comment = None 
652                                  yield row, target_table, id_column 
 653   
654                   
655                   
656                   
657                   
658                   
659                   
660   
661                  row_generators = [] 
662                  for ligolw_segment_list in sorted(self, key = lambda l: (l.name, sorted(l.instruments), l.version)): 
663                          self.remove(ligolw_segment_list) 
664                          segment_def_row = self.segment_def_table.RowType() 
665                          segment_def_row.process_id = process_id 
666                          segment_def_row.segment_def_id = self.segment_def_table.get_next_id() 
667                          segment_def_row.instruments = ligolw_segment_list.instruments 
668                          segment_def_row.name = ligolw_segment_list.name 
669                          segment_def_row.version = ligolw_segment_list.version 
670                          segment_def_row.comment = ligolw_segment_list.comment 
671                          self.segment_def_table.append(segment_def_row) 
672   
673                          row_generators.append(row_generator(ligolw_segment_list.valid, self.segment_sum_table, process_id, segment_def_row.segment_def_id)) 
674                          row_generators.append(row_generator(ligolw_segment_list.active, self.segment_table, process_id, segment_def_row.segment_def_id)) 
675   
676                   
677                   
678                   
679                   
680   
681                  for row, target_table, id_column in iterutils.inorder(*row_generators): 
682                          setattr(row, id_column, target_table.get_next_id()) 
683                          target_table.append(row) 
 684   
685   
687                  if self.process is None: 
688                          raise ValueError("must supply a process row to .__init__()") 
689                  return self 
 690   
691   
695   
696   
697   
698   
699   
700   
701   
702   
703   
704   
705   
706  @lsctables.use_in 
707 -class LIGOLWContentHandler(ligolw.LIGOLWContentHandler): 
 708          """ 
709          Minimal content handler suitable for loading documents containg 
710          segment tables. 
711          """ 
712          pass 
 713   
729   
779