1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27 """
28 This module provides additional utilities for use with segments.segmentlist
29 objects.
30 """
31
32
33 import re
34
35
36 from glue import git_version
37 from glue.lal import CacheEntry
38 from .lal import LIGOTimeGPS
39 from glue import segments
40 from six.moves import range
41
42
43 __author__ = "Kipp Cannon <kipp.cannon@ligo.org>"
44 __version__ = "git id %s" % git_version.id
45 __date__ = git_version.date
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
63 """
64 Return a segmentlist describing the intervals spanned by the files
65 whose names are given in the list filenames. The segmentlist is
66 constructed by parsing the file names, and the boundaries of each
67 segment are coerced to type coltype.
68
69 The file names are parsed using a generalization of the format
70 described in Technical Note LIGO-T010150-00-E, which allows the
71 start time and duration appearing in the file name to be
72 non-integers.
73
74 NOTE: the output is a segmentlist as described by the file names;
75 if the file names are not in time order, or describe overlaping
76 segments, then thusly shall be the output of this function. It is
77 recommended that this function's output be coalesced before use.
78 """
79 pattern = re.compile(r"-([\d.]+)-([\d.]+)\.[\w_+#]+\Z")
80 l = segments.segmentlist()
81 for name in filenames:
82 [(s, d)] = pattern.findall(name.strip().rstrip(".gz"))
83 s = coltype(s)
84 d = coltype(d)
85 l.append(segments.segment(s, s + d))
86 return l
87
88
89
90
91
92
93
95 """
96 Construct a segmentlist representing the times spanned by the files
97 identified in the LAL cache contained in the file object file. The
98 segmentlist will be created with segments whose boundaries are of
99 type coltype, which should raise ValueError if it cannot convert
100 its string argument.
101
102 Example:
103
104 >>> from lal import LIGOTimeGPS
105 >>> cache_seglists = fromlalcache(open(filename), coltype = LIGOTimeGPS).coalesce()
106
107 See also:
108
109 glue.lal.CacheEntry
110 """
111 return segments.segmentlist(CacheEntry(l, coltype = coltype).segment for l in cachefile)
112
113
114
115
116
117
118
120 """
121 Read a segmentlist from the file object file containing a segwizard
122 compatible segment list. Parsing stops on the first line that
123 cannot be parsed (which is consumed). The segmentlist will be
124 created with segment whose boundaries are of type coltype, which
125 should raise ValueError if it cannot convert its string argument.
126 Two-column, three-column, and four-column segwizard files are
127 recognized, but the entire file must be in the same format, which
128 is decided by the first parsed line. If strict is True and the
129 file is in three- or four-column format, then each segment's
130 duration is checked against that column in the input file.
131
132 NOTE: the output is a segmentlist as described by the file; if
133 the segments in the input file are not coalesced or out of order,
134 then thusly shall be the output of this function. It is
135 recommended that this function's output be coalesced before use.
136 """
137 commentpat = re.compile(r"\s*([#;].*)?\Z", re.DOTALL)
138 twocolsegpat = re.compile(r"\A\s*([\d.+-eE]+)\s+([\d.+-eE]+)\s*\Z")
139 threecolsegpat = re.compile(r"\A\s*([\d.+-eE]+)\s+([\d.+-eE]+)\s+([\d.+-eE]+)\s*\Z")
140 fourcolsegpat = re.compile(r"\A\s*([\d]+)\s+([\d.+-eE]+)\s+([\d.+-eE]+)\s+([\d.+-eE]+)\s*\Z")
141 format = None
142 l = segments.segmentlist()
143 for line in file:
144 line = commentpat.split(line)[0]
145 if not line:
146 continue
147 try:
148 [tokens] = fourcolsegpat.findall(line)
149 num = int(tokens[0])
150 seg = segments.segment(list(map(coltype, tokens[1:3])))
151 duration = coltype(tokens[3])
152 this_line_format = 4
153 except ValueError:
154 try:
155 [tokens] = threecolsegpat.findall(line)
156 seg = segments.segment(list(map(coltype, tokens[0:2])))
157 duration = coltype(tokens[2])
158 this_line_format = 3
159 except ValueError:
160 try:
161 [tokens] = twocolsegpat.findall(line)
162 seg = segments.segment(list(map(coltype, tokens[0:2])))
163 duration = abs(seg)
164 this_line_format = 2
165 except ValueError:
166 break
167 if strict:
168 if abs(seg) != duration:
169 raise ValueError("segment '%s' has incorrect duration" % line)
170 if format is None:
171 format = this_line_format
172 elif format != this_line_format:
173 raise ValueError("segment '%s' format mismatch" % line)
174 l.append(seg)
175 return l
176
177
178 -def tosegwizard(file, seglist, header = True, coltype = int):
179 """
180 Write the segmentlist seglist to the file object file in a
181 segwizard compatible format. If header is True, then the output
182 will begin with a comment line containing column names. The
183 segment boundaries will be coerced to type coltype and then passed
184 to str() before output.
185 """
186 if header:
187 file.write("# seg\tstart \tstop \tduration\n")
188 for n, seg in enumerate(seglist):
189 file.write("%d\t%s\t%s\t%s\n" % (n, str(coltype(seg[0])), str(coltype(seg[1])), str(coltype(abs(seg)))))
190
191
192
193
194
195
196
198 """
199 Read a segmentlist from the file object file containing TAMA
200 locked-segments data. Parsing stops on the first line that cannot
201 be parsed (which is consumed). The segmentlist will be created
202 with segments whose boundaries are of type coltype, which should
203 raise ValueError if it cannot convert its string argument.
204
205 NOTE: TAMA locked-segments files contain non-integer start and end
206 times, so the default column type is set to LIGOTimeGPS.
207
208 NOTE: the output is a segmentlist as described by the file; if
209 the segments in the input file are not coalesced or out of order,
210 then thusly shall be the output of this function. It is
211 recommended that this function's output be coalesced before use.
212 """
213 segmentpat = re.compile(r"\A\s*\S+\s+\S+\s+\S+\s+([\d.+-eE]+)\s+([\d.+-eE]+)")
214 l = segments.segmentlist()
215 for line in file:
216 try:
217 [tokens] = segmentpat.findall(line)
218 l.append(segments.segment(list(map(coltype, tokens[0:2]))))
219 except ValueError:
220 break
221 return l
222
223
224
225
226
227
228
230 """
231 Parse a list of ranges expressed as strings in the form "value" or
232 "first:last" into an equivalent glue.segments.segmentlist. In the
233 latter case, an empty string for "first" and(or) "last" indicates a
234 (semi)infinite range. A typical use for this function is in
235 parsing command line options or entries in configuration files.
236
237 NOTE: the output is a segmentlist as described by the strings; if
238 the segments in the input file are not coalesced or out of order,
239 then thusly shall be the output of this function. It is
240 recommended that this function's output be coalesced before use.
241
242 Example:
243
244 >>> text = "0:10,35,100:"
245 >>> from_range_strings(text.split(","))
246 [segment(0, 10), segment(35, 35), segment(100, infinity)]
247 """
248
249 segs = segments.segmentlist([None] * len(ranges))
250
251
252 for i, range in enumerate(ranges):
253 parts = range.split(":")
254 if len(parts) == 1:
255 parts = boundtype(parts[0])
256 segs[i] = segments.segment(parts, parts)
257 continue
258 if len(parts) != 2:
259 raise ValueError(range)
260 if parts[0] == "":
261 parts[0] = segments.NegInfinity
262 else:
263 parts[0] = boundtype(parts[0])
264 if parts[1] == "":
265 parts[1] = segments.PosInfinity
266 else:
267 parts[1] = boundtype(parts[1])
268 segs[i] = segments.segment(parts[0], parts[1])
269
270
271 return segs
272
273
275 """
276 Turn a segment list into a list of range strings as could be parsed
277 by from_range_strings(). A typical use for this function is in
278 machine-generating configuration files or command lines for other
279 programs.
280
281 Example:
282
283 >>> from glue.segments import *
284 >>> segs = segmentlist([segment(0, 10), segment(35, 35), segment(100, infinity())])
285 >>> ",".join(to_range_strings(segs))
286 '0:10,35,100:'
287 """
288
289 ranges = [None] * len(seglist)
290
291
292 for i, seg in enumerate(seglist):
293 if not seg:
294 ranges[i] = str(seg[0])
295 elif (seg[0] is segments.NegInfinity) and (seg[1] is segments.PosInfinity):
296 ranges[i] = ":"
297 elif (seg[0] is segments.NegInfinity) and (seg[1] is not segments.PosInfinity):
298 ranges[i] = ":%s" % str(seg[1])
299 elif (seg[0] is not segments.NegInfinity) and (seg[1] is segments.PosInfinity):
300 ranges[i] = "%s:" % str(seg[0])
301 elif (seg[0] is not segments.NegInfinity) and (seg[1] is not segments.PosInfinity):
302 ranges[i] = "%s:%s" % (str(seg[0]), str(seg[1]))
303 else:
304 raise ValueError(seg)
305
306
307 return ranges
308
309
311 """
312 Return a string representation of a segmentlistdict object. Each
313 segmentlist in the dictionary is encoded using to_range_strings()
314 with "," used to delimit segments. The keys are converted to
315 strings and paired with the string representations of their
316 segmentlists using "=" as a delimiter. Finally the key=value
317 strings are combined using "/" to delimit them.
318
319 Example:
320
321 >>> from glue.segments import *
322 >>> segs = segmentlistdict({"H1": segmentlist([segment(0, 10), segment(35, 35), segment(100, infinity())]), "L1": segmentlist([segment(5, 15), segment(45, 60)])})
323 >>> segmentlistdict_to_short_string(segs)
324 'H1=0:10,35,100:/L1=5:15,45:60'
325
326 This function, and its inverse segmentlistdict_from_short_string(),
327 are intended to be used to allow small segmentlistdict objects to
328 be encoded in command line options and config files. For large
329 segmentlistdict objects or when multiple sets of segmentlists are
330 required, the LIGO Light Weight XML encoding available through the
331 glue.ligolw library should be used.
332 """
333 return "/".join(["%s=%s" % (str(key), ",".join(to_range_strings(value))) for key, value in seglists.items()])
334
335
337 """
338 Parse a string representation of a set of named segmentlists into a
339 segmentlistdict object. The string encoding is that generated by
340 segmentlistdict_to_short_string(). The optional boundtype argument
341 will be passed to from_range_strings() when parsing the segmentlist
342 objects from the string.
343
344 Example:
345
346 >>> segmentlistdict_from_short_string("H1=0:10,35,100:/L1=5:15,45:60")
347 {'H1': [segment(0, 10), segment(35, 35), segment(100, infinity)], 'L1': [segment(5, 15), segment(45, 60)]}
348
349 This function, and its inverse segmentlistdict_to_short_string(),
350 are intended to be used to allow small segmentlistdict objects to
351 be encoded in command line options and config files. For large
352 segmentlistdict objects or when multiple sets of segmentlists are
353 required, the LIGO Light Weight XML encoding available through the
354 glue.ligolw library should be used.
355 """
356 d = segments.segmentlistdict()
357 for token in s.strip().split("/"):
358 key, ranges = token.strip().split("=")
359 d[key.strip()] = from_range_strings(ranges.strip().split(","), boundtype = boundtype)
360 return d
361
362
364 """
365 Convert consecutive True values in a bit stream (boolean-castable
366 iterable) to a stream of segments. Require minlen consecutive True
367 samples to comprise a segment.
368
369 Example:
370
371 >>> list(from_bitstream((True, True, False, True, False), 0, 1))
372 [segment(0, 2), segment(3, 4)]
373 >>> list(from_bitstream([[], [[]], [[]], [], []], 1013968613, 0.125))
374 [segment(1013968613.125, 1013968613.375)]
375 """
376 bitstream = iter(bitstream)
377 i = 0
378 while 1:
379 if next(bitstream):
380
381 j = i + 1
382 try:
383 while next(bitstream):
384 j += 1
385 finally:
386 if j - i >= minlen:
387 yield segments.segment(start + i * dt, start + j * dt)
388 i = j
389 i += 1
390
391
392
393
394
395
396
397
398
399
400
402 """
403 Return a segmentlist identifying the S2 playground times within the
404 interval defined by the segment extent.
405
406 Example:
407
408 >>> from glue import segments
409 >>> S2playground(segments.segment(874000000, 874010000))
410 [segment(874000013, 874000613), segment(874006383, 874006983)]
411 """
412 lo = int(extent[0])
413 lo -= (lo - 729273613) % 6370
414 hi = int(extent[1]) + 1
415 return segments.segmentlist(segments.segment(t, t + 600) for t in range(lo, hi, 6370)) & segments.segmentlist([extent])
416
417
419 """
420 Analogous to Python's range() builtin, this generator yields a
421 sequence of continuous adjacent segments each of length "period"
422 with the first starting at "start" and the last ending not after
423 "stop". Note that the segments generated do not form a coalesced
424 list (they are not disjoint). start, stop, and period can be any
425 objects which support basic arithmetic operations.
426
427 Example:
428
429 >>> from glue.segments import *
430 >>> segmentlist(segmentlist_range(0, 15, 5))
431 [segment(0, 5), segment(5, 10), segment(10, 15)]
432 >>> segmentlist(segmentlist_range('', 'xxx', 'x'))
433 [segment('', 'x'), segment('x', 'xx'), segment('xx', 'xxx')]
434 """
435 n = 1
436 b = start
437 while True:
438 a, b = b, start + n * period
439 if b > stop:
440 break
441 yield segments.segment(a, b)
442 n += 1
443
444
445
446
447
448
449
450
451
452
453
454 -def Fold(seglist1, seglist2):
455 """
456 An iterator that generates the results of taking the intersection
457 of seglist1 with each segment in seglist2 in turn. In each result,
458 the segment start and stop values are adjusted to be with respect
459 to the start of the corresponding segment in seglist2. See also
460 the segmentlist_range() function.
461
462 This has use in applications that wish to convert ranges of values
463 to ranges relative to epoch boundaries. Below, a list of time
464 intervals in hours is converted to a sequence of daily interval
465 lists with times relative to midnight.
466
467 Example:
468
469 >>> from glue.segments import *
470 >>> x = segmentlist([segment(0, 13), segment(14, 20), segment(22, 36)])
471 >>> for y in Fold(x, segmentlist_range(0, 48, 24)): print y
472 ...
473 [segment(0, 13), segment(14, 20), segment(22, 24)]
474 [segment(0, 12)]
475 """
476 for seg in seglist2:
477 yield (seglist1 & segments.segmentlist([seg])).shift(-seg[0])
478
479
480 -def vote(seglists, n):
481 """
482 Given a sequence of segmentlists, returns the intervals during
483 which at least n of them intersect. The input segmentlists must be
484 coalesced, the output is coalesced.
485
486 Example:
487
488 >>> from glue.segments import *
489 >>> w = segmentlist([segment(0, 15)])
490 >>> x = segmentlist([segment(5, 20)])
491 >>> y = segmentlist([segment(10, 25)])
492 >>> z = segmentlist([segment(15, 30)])
493 >>> vote((w, x, y, z), 3)
494 [segment(10, 20)]
495
496 The sequence of segmentlists is only iterated over once, and the
497 segmentlists within it are only iterated over once; they can all
498 be generators. If there are a total of N segments in M segment
499 lists and the final result has L segments the algorithm is O(N M) +
500 O(L).
501 """
502
503
504 if n < 1:
505 return segments.segmentlist()
506
507
508
509
510
511
512 def pop_min(l):
513
514 val = min(l)
515 for i in range(len(l) - 1, -1, -1):
516 if l[i] is val:
517 return l.pop(i)
518 assert False
519
520 def vote_generator(seglists):
521 queue = []
522 for seglist in seglists:
523 segiter = iter(seglist)
524 try:
525 seg = next(segiter)
526 except StopIteration:
527 continue
528
529
530 queue.append((seg[1], -1, segiter))
531 queue.append((seg[0], +1, None))
532 if not queue:
533 return
534 queue.sort(reverse = True)
535 bound = queue[-1][0]
536 votes = 0
537 while queue:
538 this_bound, delta, segiter = pop_min(queue)
539 if this_bound == bound:
540 votes += delta
541 else:
542 yield bound, votes
543 bound = this_bound
544 votes = delta
545 if segiter is not None:
546 try:
547 seg = next(segiter)
548 except StopIteration:
549 continue
550 queue.append((seg[1], -1, segiter))
551 queue.append((seg[0], +1, None))
552 yield bound, votes
553
554
555
556
557
558 result = segments.segmentlist()
559 votes = 0
560 for bound, delta in vote_generator(seglists):
561 if delta > 0 and n - delta <= votes < n:
562 start = bound
563 elif delta < 0 and n <= votes < n - delta:
564 result.append(segments.segment(start, bound))
565 del start
566 votes += delta
567 assert votes == 0
568
569 return result
570