1
2
3
4
5
6
7
8
9
10
11 __prog__ = 'lalapps_cbc_plotrates'
12 description = 'Creates plots of histogram counts and cumulative rates vs. a given statistic. Statistics can be either single or coincident, and can be queried or computed on the fly from either a sngl table or a coinc table. NOTE: This is a modified version of that in lalsuite. This also plots extended background triggers, which must be stored in text files. It also writes out text files of the data that was plotted in the cumulative histogram.'
13 usage = '%s [options] file1.sqlite file2.sqlite ...'
14
15 from optparse import OptionParser
16 import sqlite3
17 import sys
18 import os
19 import re
20 import math
21 import numpy
22 from scipy import special
23 import bisect
24 import matplotlib
25 matplotlib.use('Agg')
26 import pylab
27 pylab.rc('font',**{'family':'serif','serif':['Computer Modern Roman']})
28 pylab.rc('text', usetex=True)
29
30 from glue import lal
31 from glue import segments
32 from glue.ligolw import dbtables
33 from glue.ligolw import lsctables
34 from glue import git_version
35
36 from pylal import ligolw_sqlutils as sqlutils
37 from pylal import CoincInspiralUtils
38 from pylal import InspiralUtils
39 from pylal import printutils
40 from pylal.ligolw_dataUtils import get_row_stat
41
42
43
44
45
46
47
48
50 """
51 Parse the command line, return options and check for consistency among the
52 options.
53 """
54 parser = OptionParser( version = git_version.verbose_msg, usage = usage, description = description )
55
56
57 parser.add_option( "-o", "--output-path", action = "store", type = "string", default = './',
58 help =
59 "Output path to save figures to."
60 )
61 parser.add_option( "-u", "--user-tag", action = "store", type = "string", default = '',
62 help =
63 "User-tag for plot file names"
64 )
65 parser.add_option( "-t", "--tmp-space", action = "store", type = "string", default = None,
66 metavar = "PATH",
67 help =
68 "Location of local disk on which to do work. " +
69 "This is used to enhance performance in a networked " +
70 "environment."
71 )
72 parser.add_option( "-c", "--cache-file", action = "append", default = [],
73 help =
74 "Read files from the given cache file. File must be lal formated. To specify multiple, give the argument multiple times."
75 )
76 parser.add_option( "-v", "--verbose", action = "store_true", default = False,
77 help =
78 "Print progress information"
79 )
80 parser.add_option( "", "--debug", action = "store_true", default = False,
81 help =
82 "Print SQLite queries used to obtain data."
83 )
84
85 parser.add_option("-s", "--statistic", action = "store", type="string",
86 default=None, metavar="STATTYPE(:label)",
87 help =
88 "Stat to plot. Can be any column or any function of columns in the given table. Syntax for functions is python; functions available are anything from python's math module. To specify a label for the stat when plotting, append a colon and the label."
89 )
90 parser.add_option("-d", "--data-table", action = "store", type = "string", default = None,
91 help =
92 "Table to get triggers from. Can be any table with either a coinc_event_id or an event_id. If the table has a coinc_event_id, it will be assumed to be a coincident table and so must have an 'ifos' column. If the table has an event_id, it will be assumed to be a single event table, and so must have an 'ifo' column. Additionally, if a combining-method or function is specified, single events will be groupped together via their event_ids in the coinc_event_map table. Note: Plotting using single event tables can cause the program to take siginificantly longer to execute than coincident event tables. If a coincident statistic has already been computed and exists in the coinc_event table, always use that instead."
93 )
94 parser.add_option("-C", "--combining-method", action = "store", type="string", metavar = 'sum | quad_sum | mean | median | min | max', default=None,
95 help =
96 "How to combine the given statistic for each ifo. This is only needed if trying to plot coincident statistics from a single event table. Supported methods are sum, quad_sum (the square root of the sum of the squares), mean, median, min, or max values."
97 )
98 parser.add_option("-F", "--combining-function", action = "store", type="string", default=None,
99 help =
100 "A more explicit alternative to combining-method, this allows you to specify exactly how to combine values from each ifo. The expression should be a function of the ifos to be combined; if an ifo isn't present in a given coinc, it will be given a 0 value. For instance, if --statistic is set to snr, and the combining-method is set to sqrt(H1**2+L1**2+(.5*V1)**2), then the sum of the squares of H1, L1, and V1 will be plotted, with a weight of .5 given to V1. If doubles are present, they will also be plotted. Only ifos that are specified will be included in coincs. In other words, if only H1 and L1 are listed in the equation only H1 and L1 will be included, even if H2 was listed as being coincident with them in the database. If combining-method is also specified, combining-function takes precedence."
101 )
102
103
104
105
106
107 parser.add_option("-f", "--foreground-datatype", action = "store", type = "string", default = "slide",
108 help =
109 "Plot the given foreground type. Options are all_data, exclude_play, playground, or simulation. Default is to plot no foreground. Background expected rates are determined by the livetime of the specified datatype. If not foreground datatype specified, expected rates are based on the average background time."
110 )
111 parser.add_option( "-p", "--param-name", metavar = "PARAMETER[:label]",
112 action = "store", default = None,
113 help =
114 "Specifying this and param-ranges will only select triggers that fall within the given range for each plot. " +
115 "This will apply to all plots. Any column(s) in the simulation or recovery table may be used. " +
116 "To specify a parameter in the simulation table, prefix with 'injected_'; " +
117 "for recovery table, 'recovered_'. As with variables, math operations are permitted between columns, e.g.," +
118 "injected_mass1+injected_mass2. Any function in the python math module may be used; syntax is python. The parameter name " +
119 "will be in the title of each plot; to give a label for the name, add a colon and the label. If no label is specified, the " +
120 "the parameter name as given will be used. " +
121 "WARNING: if you use any recovered parameters, missed injections will not be included in plots " +
122 "as recovered parameters are undefined for them. For example, if you chose to bin by recovered_mchirp " +
123 "missed injections will not be plotted on any plot. If you chose to bin by injected_mchirp, however, missed injections " +
124 "will be plotted. "
125 )
126 parser.add_option( "-q", "--param-ranges", action = "store", default = None,
127 metavar = " [ LOW1, HIGH1 ); ( LOW2, HIGH2]; !VAL3; etc.",
128 help =
129 "Requires --param-name. Specify the parameter ranges " +
130 "to select triggers in. A '(' or ')' implies an open " +
131 "boundary, a '[' or ']' a closed boundary. To specify " +
132 "multiple ranges, separate each range by a ';'. To " +
133 "specify a single value, just type that value with no " +
134 "parentheses or brackets. To specify not equal to a single " +
135 "value, put a '!' before the value. If " +
136 "multiple ranges are specified, a separate plot for each range will be generated."
137 )
138 parser.add_option("", "--param-combining-method", default = "mean",
139 help =
140 "For sngl-ifo tables, how to combine the --param-name. Either a combining-function or method may be used. Default is mean."
141 )
142 parser.add_option( "", "--exclude-coincs", action = "store", type = "string", default = None,
143 metavar = " [COINC_INSTRUMENTS1 + COINC_INSTRUMENTS2 in INSTRUMENTS_ON1];"
144 "[ALL in INSTRUMENTS_ON2]; etc.",
145 help =
146 "Exclude coincident types in specified detector times, " +
147 "e.g., '[H2,L1 in H1,H2,L1]'. Some rules: " +
148 "* Coinc-types and detector time must be separated by " +
149 "an ' in '. When specifying a coinc_type or detector " +
150 "time, detectors and/or ifos must be separated by " +
151 "commas, e.g. 'H1,L1' not 'H1L1'. " +
152 "* To specify multiple coinc-types in one type of time, " +
153 "separate each coinc-type by a '+', e.g., " +
154 "'[H1,H2 + H2,L1 in H1,H2,L1]'. " +
155 "* To exclude all coincs in a specified detector time " +
156 "or specific coinc-type in all times, use 'ALL'. E.g., " +
157 "to exclude all H1,H2 triggers, use '[H1,H2 in ALL]' " +
158 "or to exclude all H2,L1 time use '[ALL in H2,L1]'. " +
159 "* To specify multiple exclusions, separate each " +
160 "bracket by a ';'. " +
161 "* Order of the instruments nor case of the letters " +
162 "matter. So if your pinky is broken and you're " +
163 "dyslexic you can type '[h2,h1 in all]' without a " +
164 "problem."
165 )
166 parser.add_option( "", "--include-only-coincs", action = "store", type = "string", default = None,
167 metavar = " [COINC_INSTRUMENTS1 + COINC_INSTRUMENTS2 in INSTRUMENTS_ON1];" +
168 "[ALL in INSTRUMENTS_ON2]; etc.",
169 help =
170 "Opposite of --exclude-coincs: only plot the specified coinc types. WARNING: If you type 'ALL' for on-instruments, e.g., '[H1,L1 in ALL]', and plot-by-instrument-time is not on, livetime from *all* of the instrument times will be included in the background, even if it is from an instrument time that could not produce H1,L1 triggers. If this isn't desired, either be explicit about what instrument times you really want -- i.e., don't use 'ALL' -- or use --exclude-coincs to exclude the instrument times you don't want. In the example, adding --exclude-coincs [ALL in L1,V1];[ALL in H1,V1] will prevent any H1,V1 or L1,V1 time from being added."
171 )
172 parser.add_option( "", "--plot-special-time", action = "append", type = "float", default = [],
173 help =
174 "Exclude all single triggers that are within N seconds of the given end-time from the background, where N is set by --plot-special-window. All foreground triggers that fall within the range will be plotted as stars. Note: this can only be done when reading from sngl-ifo tables."
175 )
176 parser.add_option( "", "--plot-special-window", action = "store", type = "float", default = 8.0,
177 help =
178 "Specify the time window, in seconds, to use around the plot-special-time. Default is 8.0 seconds."
179 )
180 parser.add_option( "", "--add-background-file", action = "append", default = [], metavar = " FILE,BACKGROUND_LIVETIME(:LABEL)",
181 help =
182 "Add a point to the background. Any point added will be plotted, but will not be added to the background distribution obtained from input files for cumulative rate computation nor for extrapolation. If the point added is louder than the loudest background trigger, this point will be used for extending the probability contours (if plotted). Points should be specified by first providing the x-value of the point, then the total background livetime used to acquire that point, in the same units as --time-units. To specify multiple points, give the argument multiple times."
183 )
184 parser.add_option( "", "--add-special-background-file", action = "append", default = [], metavar = " FILE,BACKGROUND_LIVETIME(:LABEL)",
185 help =
186 "Add a point to the background. Any point added will be plotted, but will not be added to the background distribution obtained from input files for cumulative rate computation nor for extrapolation. If the point added is louder than the loudest background trigger, this point will be used for extending the probability contours (if plotted). Points should be specified by first providing the x-value of the point, then the total background livetime used to acquire that point, in the same units as --time-units. To specify multiple points, give the argument multiple times."
187 )
188 parser.add_option( "", "--single-table", action = "store", default = None,
189 help =
190 "If using plot-special-time with a coinc table, a --single-table must be specified to look in for the un-slid single event end-times."
191 )
192 parser.add_option( "-I", "--plot-by-ifos", action = "store_true", default = False,
193 help =
194 "Create a separate plot for each coincidence type (if plotting a combined statistic) or single ifo (if plotting single ifo statistics). Default is to plot all ifo(s) together."
195 )
196 parser.add_option( "-T", "--plot-by-instrument-time", action = "store_true", default = False,
197 help =
198 "Create a separate plot for each instrument time. Default is to plot all instrument times together."
199 )
200 parser.add_option( "-M", "--plot-by-multiplicity", action = "store_true", default = False,
201 help =
202 "Create a separate plot based on the number of coincident instruments. (For single ifo plotting, this is a no-op.) In other words, doubles will be plotted with doubles, triples with triples, etc. Default is to plot all coincident ifos together. If this and plot-by-ifos specified, plot-by-ifos takes precedence."
203 )
204 parser.add_option( "-U", "--time-units", type = "string", default = "yr",
205 help =
206 "What unit of time to plot the rates in. Can be either 'yr', 'days', 'hr', 'min', or 's'. Default is yr."
207 )
208 parser.add_option( "-R", "--use-ifo-colors", action = "store_true", default = False,
209 help =
210 "Color foreground points by ifo color. Default is to plot no color (all triggers have a blue outline)."
211 )
212 parser.add_option( "-H", "--plot-poisson-pdf", action = "store_true", default = False,
213 help =
214 "Plot the Poisson probability density of the background."
215 )
216 parser.add_option( "-K", "--plot-significance-bands", action = "store_true", default = False,
217 help =
218 "Plot the Poisson probability bands in terms of the number of sigma. The values of the n sigma are the points in the poisson PDF that equal n*erf(y)."
219 )
220 parser.add_option( "-S", "--max-sigma", type = 'int', default = 5,
221 help =
222 "Maximum number of significance bands to plot. Default=5"
223 )
224 parser.add_option( "-E", "--extrapolate", default = None, metavar = "Gaussian|Power|erfc",
225 help =
226 "Extrapolate the background using a the given model out to the largest foreground statistic. If this is beyond the measured background, the probability density beyond the loudest background point will be calculated using this extrapolation. Options for models are 'Gaussian', 'Power', or 'erfc'. If 'Gaussian' or 'Power' a least-squares fit is done to both the non-cumulative and the cumulative histograms: for 'Gaussian' y = A*exp(-B*x**(2./x-power)) is fit; if 'Power', y = A*x**B is fit. If 'erfc', a Gaussian is fitted to the non-cumulative histogram to get sigmasq and the scalling factor A. These values are then used in the complitmentary error function y = A*\int{dx*exp(-x^2/2*sigmasq)} to extrapolate the cumulative distribution."
227 )
228 parser.add_option( "-P", "--x-power", type = "float", default = 1.,
229 help =
230 "Power of the combined statistic to use when fitting a gaussian to the data and binning for the non-cumulative plot. The natural log of the Gaussian is taken so that the actual fitted equation is log(y) = Beta[1] + Beta[2]*x**(2/x-power). For example, if plotting linearly in your statistic, x-power should be 1. If plotting against the statistic squared, then x-power would be 2, thus the fit would be log(y) = Beta[1] + Beta[2]*x. Default=1."
231 )
232 parser.add_option( "", "--min-x-fit", type = 'float', default = None,
233 help =
234 "Background stat at which to begin fitting to for extrapolation. Default is the first background point."
235 )
236 parser.add_option( "", "--lin-x", action = "store_true", default = False,
237 help =
238 "Plot x-axis on a linear scale. Default is to plot on a log scale."
239 )
240 parser.add_option( "", "--lin-y", action = "store_true", default = False,
241 help =
242 "Plot y-axis on a linear scale. Default is to plot on a log scale."
243 )
244 parser.add_option('-a', '--xmin', action = 'store', type = 'float', default = None,
245 help =
246 'Set a minimum value for the x-axis. If lin-x not set, must be > 0. This will apply to all plots generated.'
247 )
248 parser.add_option('-b', '--xmax', action = 'store', type = 'float', default = None,
249 help =
250 'Set a maximum value for the x-axis. If lin-x not set, must be > 0. This will apply to all plots generated.'
251 )
252 parser.add_option('-A', '--ymin', action = 'store', type = 'float', default = None,
253 help =
254 'Set a minimum value for the y-axis. If lin-y not set, must be > 0. This will apply to all plots generated.'
255 )
256 parser.add_option('-B', '--ymax', action = 'store', type = 'float', default = None,
257 help =
258 'Set a maximum value for the y-axis. If lin-y not set, must be > 0. This will apply to all plots generated.'
259 )
260 parser.add_option( '', '--nbins', type = 'int', default = 100,
261 help =
262 'The number of bins to use for the non-cumulative histogram.'
263 )
264 parser.add_option( '', '--title', action = 'store_true', default = False,
265 help =
266 'Add a title to plots.'
267 )
268 parser.add_option( '', '--dpi', action = 'store', type = 'float', default = 150.,
269 help =
270 'Set dpi for plots; default is 150.'
271 )
272
273 if argv is None:
274 (options, args) = parser.parse_args()
275 else:
276 (options, args) = parser.parse_args(argv)
277
278 if options.extrapolate is not None and options.extrapolate.upper() == "GAUSSIAN" and options.x_power is None:
279 raise ValueError, "If extrapolating a Gaussian, must specify a fit-power."
280
281 return options, args, sys.argv[1:]
282
283
284
285
286
287
288
289
291 for n,s in enumerate(sorted(sigmas)):
292 for ii in range(zarray.shape[0]):
293 if zarray[ii,n] >= s:
294 zarray[ii,n] = s
295
296
297
298
299
300
301
302
304 """
305 Creates a DataHolder object. If tableName is the same as a table in lsctables, the DataHolder class will be an instance of that table's RowType.
306 """
307 if tableName in lsctables.TableByName:
308 base = lsctables.TableByName[ tableName ].RowType
309 else:
310 base = object
311
312 class DataHolder( base ):
313 def __init__(self, columns = None):
314
315 if columns is not None:
316 self.__slots__ = columns
317
318 def store( self, data ):
319 """
320 Takes a list of data and assigns the values to the object's variables.
321
322 @data: a list of tuples in which the first element is the column name and the second is the value to assign.
323 """
324 for col, val in data:
325 setattr( self, col, val )
326
327 def get_value(self, arg):
328 """
329 Returns the result of some operation on the elements in self.
330 'arg' can be the name of any defined function in self's base class,
331 a slot in self, or a function of either or both.
332 See ligolw.dataUtils.get_row_stat for more info.
333 """
334 return get_row_stat( self, arg )
335
336 return DataHolder
337
339 """
340 Performs the desired function on the list of single statistics. Note: this can only combine one statistic from each row.
341 @function: can be either a known pre-set (see below) or an arbitrary function. If an arbitrary function, it must be in terms of the ifo names.
342 @rows: a dictionary of statistics keyed by the ifos
343 """
344
345 if function == 'sum':
346 return sum(rows.values())
347 if function == 'quad_sum':
348 return math.sqrt(sum([x**2. for x in rows.values()]))
349 if function == 'min':
350 return min(rows.values())
351 if function == 'max':
352 return max(rows.values())
353 if function == 'mean':
354 return numpy.mean(numpy.array(rows.values()))
355 if function == 'median':
356 return numpy.median(numpy.array(rows.values()))
357 if function == 'alpha_min':
358 return rows[min(rows.keys())]
359 if function == 'sorted_keys':
360 return ','.join(sorted(rows.keys()))
361 if function == 'sorted_values':
362 return ';'.join(sorted(map( str, rows.values() )))
363 if function == 'echo':
364 return rows
365
366
367 safe_dict = dict([ [name,val] for name,val in rows.items() + math.__dict__.items() if not name.startswith('__') ])
368
369 try:
370 return eval( function, {"__builtins__":None}, safe_dict )
371 except NameError:
372
373 return None
374
376 weak_equality = False
378 for ifo in offset_dict:
379 self[ifo] = offset_dict[ifo]
380
382 """
383 The default equality test is to consider two vectors to be equal only if all ifos are the same and all offsets are the same. If one vector is a subset of the other vector, they will not be considered equal. However, if the class attribute weak_equality is set to True, only offsets of the ifos that are both in self and other will be checked. For example:
384 >>> a = OffsetVector({'H1': 0, 'L1': 5})
385 >>> b = OffsetVector({'H1': 0, 'L1': 5, 'V1': 10})
386 >>> a == b
387 False
388 >>> OffsetVector.weak_equality = True
389 >>> a == b
390 True
391 """
392 if type(other) != type(self):
393 return False
394 if OffsetVector.weak_equality:
395 return all( self[ifo] == other[ifo] for ifo in set(self.keys()) & set(other.keys()) )
396 else:
397 return self.__hash__() == other.__hash__()
398
400 return not self == other
401
407
408
410 """
411 Class to store category information.
412 """
413 default_match_criteria = ['offset_vector', 'datatype', 'veto_cat', 'on_instruments', 'ifos', 'param_group']
414
415 - def __init__(self, offset_vector = {}, datatype = None, veto_cat = None, on_instruments = frozenset([u'ALL']), ifos = frozenset([u'ALL']), param_group = None):
416 self.offset_vector = OffsetVector(offset_vector)
417 self.datatype = unicode(datatype)
418 self.veto_cat = unicode(veto_cat)
419 self.on_instruments = frozenset(on_instruments)
420 self.ifos = frozenset(ifos)
421 self.param_group = param_group
422 self.livetime = 0
423
425 self.livetime += time
426
429
431 """
432 Only checks the values listed in check_me to figure out whether or not self is equal to other.
433 """
434 if type(other) != type(self):
435 return False
436 return all(getattr(self,x) == getattr(other,x) for x in check_me)
437
446
448 return not self == other
449
452
453
455 """
456 Class to store statistics and livetime for plotting.
457 """
459 """
460 Sub-class to store individual data elements.
461
462 @categories: a list of instances of the Category class defining which categories this data element belongs to
463 @data: an instance of the DataHolder class listing statistics and methods associated with this element
464 """
465 - def __init__(self, thisid, categories, data):
466 self._id = thisid
467 self.categories = categories
468 self.data = data
469 self.cumrates = {}
470
471 - def update(self, _id = None, categories = [], data = None, addToExistingCat = True):
472
473 if _id is not None:
474 self._id = _id
475
476 if data is not None:
477 self.data = data
478
479 if addToExistingCat:
480 self.categories.extend(categories)
481 else:
482 self.categories = categories
483
485 """
486 A list of all the data elements is kept as an index.
487 """
488 self.data_index = {}
489
490 - def add_data(self, _id, categories, data):
491 """
492 Adds a new DataElement to self.
493
494 @id: some unique value to identify the data element
495 @categories: a list of categories that this data falls in. If one or more of these categories are equal (equality determined by the default Category match_criteria) to a category already in all_categories, the category is set to that category. This results in distinct categories only being saved once in memory, with all DataElements that share that category pointing to the same memory address.
496 """
497 d = self.DataElement( _id, categories, data )
498 self.data_index[_id] = d
499 for c in categories:
500 self.setdefault(c, [])
501 self[c].append( d )
502
503 - def update(self, _id, categories = [], data = None, addToExistingCat = True, errOnMissing = True):
504 """
505 Updates all DataElements in self that have the given id. If no DataElement is found with the given id and errOnMissing is False, adds a new entry.
506 """
507 if _id not in self.data_index:
508 if errOnMissing:
509 raise ValueError, "An element with id %s could not be found." % str(_id)
510 else:
511 self.add_data( _id, categories, data )
512 else:
513 self.data_index[_id].update( categories = categories, data = data, addToExistingCat = addToExistingCat)
514 self.refresh_categories( [self.data_index[_id]] )
515
517 """
518 Updates self with categories that are in data_list that are not in self.
519 """
520 for d in data_list:
521 self.data_index[d._id] = d
522 for c in d.categories:
523 self.setdefault(c, [])
524 if d not in self[c]:
525 self[c].append(d)
526
527 - def add_livetime(self, livetime, category, match_criteria = []):
528 """
529 Adds livetime to all categories in self that match the given criteria.
530 """
531 if match_criteria == []:
532 match_criteria = Category.default_match_criteria
533 for cat in [cat for cat in self if cat.selective_eq(category, match_criteria)]:
534 cat.livetime += livetime
535
536 - def get_livetime(self, category, match_criteria = [], time_units = 'yr'):
537 """
538 Returns the sum of all the livetimes of categories that match the given category via the given match_criteria.
539 """
540 if match_criteria == []:
541 match_criteria = Category.default_match_criteria
542 return sqlutils.convert_duration(sum([cat.livetime for cat in self if cat.selective_eq(category, match_criteria)]), time_units)
543
545 """
546 Creates background categories out of the slide categories and adds this to all slide elements' categories lists. Default action is to create a background for each veto-category, on_instruments, ifos, and param_group. However, this can be overridden with the match_criteria argument.
547 """
548 if match_criteria == []:
549 match_criteria = ['veto_cat', 'on_instruments', 'ifos', 'param_group']
550 for vals in set([ tuple(getattr(c, x) for x in match_criteria) for c in self if c.datatype == 'slide' ]):
551
552 bkg_cat = Category( offset_vector = {}, datatype = 'background' )
553 [setattr(bkg_cat, x, y) for x, y in zip(match_criteria, vals)]
554 bkg_cat.livetime = sum([c.livetime for c in self if c.datatype == 'slide' and bkg_cat.selective_eq(c, match_criteria) ])
555
556 self[bkg_cat] = list(set([x for c in self if c.datatype == 'slide' and bkg_cat.selective_eq(c, match_criteria) for x in self[c]]))
557
558 - def compute_cumrates(self, stat, foreground_datatype, rank_by = 'max', group_by = [], num_slides = 100.):
559 """
560 Computes the cumulative rates for all the distinct groups that exist in self. Distinct groups are determined by group_by.
561 """
562 if group_by == []:
563 group_by = ['datatype', 'veto_cat', 'on_instruments', 'ifos', 'param_group']
564 distinct_groups = set([ tuple(getattr(c,x) for x in group_by) for c in self])
565 for group in distinct_groups:
566 this_group = Category()
567 [setattr(this_group, x, y) for (x,y) in zip( group_by, group )]
568 this_group.livetime = self.get_livetime( this_group, group_by, time_units = 's' )
569
570 this_data = []
571 for c in self:
572 if c.selective_eq(this_group, group_by):
573 this_data.extend( self[c] )
574 this_data = sorted(set(this_data), key = lambda x: getattr(x.data, stat), reverse = rank_by == 'min')
575 d = [getattr(x.data, stat) for x in this_data]
576
577
578
579 orig_dt = this_group.datatype
580 this_group.datatype = foreground_datatype
581 fg_livetime = self.get_livetime( this_group, match_criteria = 'datatype' not in group_by and group_by+['datatype'] or group_by, time_units = 's' )
582 nTrials = float(this_group.livetime) / fg_livetime
583
584 this_group.datatype = orig_dt
585
586 these_cumrates = [ (len(d) - bisect.bisect_left(d, x))/nTrials for x in d ]
587
588 for data_elem, rate in zip( this_data, these_cumrates ):
589 data_elem.cumrates[this_group] = rate
590
592 """
593 Returns a sorted list (by stat) of stats, cumrates, and ids for the given group.
594 """
595 return sorted([(getattr(d.data, stat), d.cumrates[group], d._id) for d in self.data_index.values() if group in d.cumrates], reverse = rank_by == 'min')
596
597 - def get_data(self, _id = None, category = None, category_match_criteria = []):
598 """
599 Returns a list of DataElements that matches a given id, a given category, or both. If category_match_criteria is specified, will get data that matches the specified elements in category. Otherwise, will use Category.default_match_criteria for comparing category to the stored categories.
600 """
601 if category_match_criteria == []:
602 category_match_criteria = Category.default_match_criteria
603 return set([x for c in self for x in self[c] if (category is None or c.selective_eq(category, category_match_criteria)) and (_id is None or _id == x._id)])
604
606 """
607 Returns a list of categories in self that match the given category via the match_criteria.
608 """
609 if match_criteria == []:
610 match_criteria = Category.default_match_criteria
611 return [x for x in self if x.selective_eq(category, match_criteria)]
612
614 """
615 Cycles over the DataElements in self, keeping only the given args.
616
617 @args: A list of tuples. In each tuple, the first element is the name to give the new collapsed value and the second element is the argument to carry out (either a name or a function) on the uncollapsed row to get the collapsed value.
618 """
619 cols = [arg[0] for arg in args]
620 fns = [arg[1] for arg in args]
621 collapsedRow = createDataHolder( 'collapsedRow' )
622 for n,origElement in enumerate(self.data_index.values()):
623 d = collapsedRow( cols )
624 d.store([(col, origElement.data.get_value(fn)) for col, fn in zip(cols, fns)])
625 origElement.data = d
626
627 -def combineData(dataObj, match_column, args, param_grouping_function, verbose = False):
628 """
629 Cycles over the DataElements in dataObj, combining any DataElements with the same match_column value via the given args and returns a new Data object in which the element's ids are the values of the match_column. Note: the categories of the DataElements in the new Data object are just the concatenation of the older objects individual categories. These might need to be updated depending on the paramters of the newer category.
630
631 @dataObj: the instace of Data to carry the combination on
632 @match_column: name of column in the DataElements to use to match rows to combine; e.g., 'coinc_event_id'
633 @args: a list of tuples. In each tuple the first element is the name to give the new combined value, the second element is the column in each row to identify that row by, the third is the column or function of columns in each row to combine, and the final element is the way to combine them, which can be either a predefined method or a function in terms of values of the first element. For example, if you wanted the average chirp mass and the sum of the squares of new_snr over H1 and L1, the args should look like:
634 args = [ (combined_newsnr_sq, ifo, get_new_snr, H1**2.+L1**2.), (combined_mchirp, ifo, mchirp, mean) ]
635 """
636 cols = [arg[0] for arg in args]
637 colkeys = [arg[1] for arg in args]
638 sngl_stats = [arg[2] for arg in args]
639 cmb_fncs = [arg[3] for arg in args]
640 newData = Data()
641 combinedRow = createDataHolder( 'combinedRow' )
642
643 match_vals = {}
644 for d in dataObj.data_index.values():
645 this_id = d.data.get_value(match_column)
646 match_vals.setdefault(this_id, [])
647 match_vals[this_id].append(d)
648 ii = 0
649 for idcol, combine_data in match_vals.items():
650 ii += 1
651 if verbose:
652 if ii != len(match_vals):
653 print "%i/%i (%.2f%%)\r" % (ii, len(match_vals), 100*float(ii)/len(match_vals)),
654 else:
655 print ''
656 newRow = combinedRow( cols )
657 stats = [ dict([ [x.data.get_value(colkey), x.data.get_value(snglstat)] for x in combine_data ]) for colkey, snglstat in zip(colkeys, sngl_stats) ]
658 newRow.store( [( col, combineRowStats( fnc, stat_dict )) for col, fnc, stat_dict in zip(cols, cmb_fncs, stats)] )
659 orig_cats = [y for x in combine_data for y in x.categories]
660 ifos_param = 'ifos' in dir(newRow) and 'ifos' or 'ifo'
661 new_cats = [Category( c.offset_vector, c.datatype, c.veto_cat, c.on_instruments, getattr(newRow, ifos_param), param_grouping_function(newRow.param) ) for c in orig_cats]
662 newData.add_data( id(newRow), new_cats, newRow )
663
664 return newData
665
667 return """
668 JOIN
669 experiment, experiment_summary, experiment_map, coinc_event_map
670 ON (
671 experiment.experiment_id == experiment_summary.experiment_id
672 AND experiment_summary.experiment_summ_id == experiment_map.experiment_summ_id
673 AND experiment_map.coinc_event_id == coinc_event_map.coinc_event_id
674 AND coinc_event_map.event_id == %s.event_id )""" % table
675
677 if on_instruments is None or on_instruments == 'ALL':
678 on_instruments = frozenset(['ALL'])
679 if not (isinstance(on_instruments,set) or isinstance(on_instruments, frozenset)):
680 on_instruments = lsctables.instrument_set_from_ifos(on_instruments)
681
682 if ifos is None or ifos == 'ALL':
683 return frozenset(on_instruments) in filter or frozenset(['ALL']) in filter
684
685 if not (isinstance(ifos,set) or isinstance(ifos, frozenset)):
686 if sngl_ifos:
687 ifos = set([ifos])
688 else:
689 ifos = lsctables.instrument_set_from_ifos(ifos)
690
691 if on_instruments == frozenset(['ALL']):
692 if sngl_ifos:
693 return set(['ALL']) in filter.values() or any(ifos.issubset(x for x in filter.values()))
694 else:
695 return set(['ALL']) in filter.values() or ifos in filter.values()
696
697 if frozenset(['ALL']) in filter:
698 if sngl_ifos:
699 return set(['ALL']) in filter[frozenset(['ALL'])] or ifos.issubset(filter[frozenset(['ALL'])])
700 else:
701 return set(['ALL']) in filter[frozenset(['ALL'])] or ifos in filter[frozenset(['ALL'])]
702 elif frozenset(on_instruments) in filter:
703 if sngl_ifos:
704 return set(['ALL']) in filter[frozenset(on_instruments)] or ifos.issubset(filter[frozenset(on_instruments)])
705 else:
706 return set(['ALL']) in filter[frozenset(on_instruments)] or ifos in filter[frozenset(on_instruments)]
707
708 else:
709 return False
710
712 return k*numpy.log10( lmbda ) - (lmbda + special.gammaln(k+1.))/numpy.log(10.)
713
716
717
718
719
720
722 - def __init__(self, filter_name, filter, is_single = False):
723 self.filter_name = filter_name
724 self.filter = filter
725 self.is_single = is_single
729 sqlite_connection.create_function(self.filter_name, 2, self.apply_test)
730
731
733 """
734 Creates a CombineRows class that can be used in a sqlite database to combine rows on the fly. Takes in a sngl_function, which is the function used to combine columns within a single row, and a combining_function, which is the function used to combine the results of the sngl_functions across rows.
735
736 @tableName: the name of the table that will be reading from. If it is a table in lsctables.py, all methods and columns from that table will be inherited.
737 @columns: the list of columns that will be storing data to. This list must be in the same order that will be reading data in from the database with.
738 @functionList: a list of tuples. The first item should be the combining function to use, in terms of the ifos to combine, and the second item should be the sngl function to use, in terms of columns or methods of the sngl_row.
739 """
740
741 sngl_row = createDataHolder(tableName, columns)
742
743 class CombineRows:
744 def __init__(self):
745 """
746 Initializes variables needed for the step process.
747 """
748 self.this_coinc = dict([ [x, {}] for x in functionList ])
749
750 def step(self, *args):
751 """
752 Populates self.this_coinc
753 """
754 this_row = sngl_row(columns)
755 this_row.store(zip(columns,args))
756 for combine_func, sngl_function in functionList:
757 self.this_coinc[(combine_func, sngl_function)][this_row.ifo] = this_row.get_value(sngl_function)
758
759 def finalize(self):
760 """
761 Once all the singles for the coinc have been gathered, applies the desired combining function(s) to them and returns the result. Results are returned as a comma seperated string.
762 """
763 return ','.join([str(combineRowStats( cfunc, self.this_coinc[(cfunc, sfunc)] )) for cfunc, sfunc in functionList])
764
765 return CombineRows
766
767
769 """
770 Helper class to get row stat directly from the database.
771 """
772 - def __init__(self, tableName, columns, functionList):
773 """
774 @tableName: see createCombineRowsMethod.
775 @columns: see createCombineRowsMethod.
776 @functionList: a list of functions to retrieve from the columns in tableName.
777 """
778 self.dataRow = createDataHolder(tableName, columns)
779 self.functionList = functionList
780 self.columns = columns
782 """
783 Gets the stat.
784
785 @args: data must be in the same order as columns.
786 """
787 this_row = self.dataRow(self.columns)
788 this_row.store(zip(self.columns,args))
789 return ','.join([ str(this_row.get_value(func)) for func in self.functionList ])
790
791
792
793
794
795
796
797
798
799
800
801 -def main(argv = None):
802
803 opts, filenames, input_args = parse_command_line(argv)
804 for this_cache in opts.cache_file:
805 this_cache = lal.Cache().fromfile(file(this_cache))
806 filenames.extend([x.path() for x in this_cache])
807 sqlite3.enable_callback_tracebacks(True)
808
809 statistic, stat_label = len(opts.statistic.split(':')) == 2 and opts.statistic.split(':') or [opts.statistic, opts.statistic.replace('_', ' ').title()]
810 data_table = sqlutils.validate_option(opts.data_table)
811
812
813 add_background = []
814 for addme in opts.add_background_file:
815
816 f = open(addme, 'r')
817 bkg_time = f.readline()
818 stats = sorted([ float(line) for line in f ])
819 f.close()
820 lbl = ''
821 if len(addme.split(':')) > 1:
822 lbl = addme.split(':')[1]
823 add_background.append((stats, float(bkg_time) / 3.15569e7, lbl))
824 add_background_sp = []
825 for addme in opts.add_special_background_file:
826
827 f = open(addme, 'r')
828 bkg_time = f.readline()
829 stats = sorted([ float(line) for line in f ])
830 f.close()
831 lbl = ''
832 if len(addme.split(':')) > 1:
833 lbl = addme.split(':')[1]
834 add_background_sp.append((stats, float(bkg_time)/ 3.15569e7, lbl))
835
836 combining_function = None
837 if opts.combining_method is not None:
838 combining_function = opts.combining_method
839 if opts.combining_function is not None:
840 combining_function = opts.combining_function
841
842
843 fg_datatype = sqlutils.validate_option( opts.foreground_datatype, lower = True )
844 if fg_datatype not in lsctables.ExperimentSummaryTable.datatypes:
845 raise ValueError, "unrecognized foreground-datatype %s" % opts.foreground_datatype
846
847 if fg_datatype != 'slide':
848 add_foreground = ' OR experiment_summary.datatype == "%s"' % fg_datatype
849 else:
850 add_foreground = ''
851
852
853 if opts.param_name:
854 param_name, param_label = len(opts.param_name.split(':')) == 2 and opts.param_name.split(':') or [opts.param_name, opts.param_name.replace('_', ' ').title()]
855 param_parser = sqlutils.parse_param_ranges( data_table, param_name, opts.param_ranges, verbose = opts.verbose )
856
857
858 if opts.plot_special_time != []:
859 plot_special_times = segments.segmentlist([ segments.segment( t - opts.plot_special_window, t + opts.plot_special_window ) for t in opts.plot_special_time ])
860
861
862
863 if opts.exclude_coincs:
864 exclude_filter = sqlutils.parse_coinc_options( opts.exclude_coincs ).coinc_types
865 if 'ALL' in exclude_filter:
866 exclude_filter[frozenset(['ALL'])] = exclude_filter['ALL']
867 del exclude_filter['ALL']
868 for on_inst, coinc_ifos in exclude_filter.items():
869 exclude_filter[on_inst] = [ ifos == 'ALL' and set(['ALL']) or ifos for ifos in coinc_ifos ]
870 if opts.include_only_coincs:
871 include_filter = sqlutils.parse_coinc_options( opts.include_only_coincs ).coinc_types
872 if 'ALL' in include_filter:
873 include_filter[frozenset(['ALL'])] = include_filter['ALL']
874 del include_filter['ALL']
875 for on_inst, coinc_ifos in include_filter.items():
876 include_filter[on_inst] = [ ifos == 'ALL' and set(['ALL']) or ifos for ifos in coinc_ifos ]
877
878
879 Category.default_match_criteria = [x for x in Category.default_match_criteria if x != 'offset_vector']
880
881
882 data = Data()
883 rowType = createDataHolder( 'data_holder', ['end_time', stat_label] )
884 last_table_type = ''
885 gps_start_time = numpy.inf
886 gps_end_time = -numpy.inf
887 analyzed_instruments = set()
888 num_slides = None
889
890 if opts.verbose:
891 print >> sys.stdout, "Analyzing file:"
892 for filename in filenames:
893 if opts.verbose:
894 print >> sys.stdout, "\t%s" % filename
895 working_filename = dbtables.get_connection_filename( filename, tmp_path = opts.tmp_space, verbose = False )
896 connection = sqlite3.connect( working_filename )
897 if opts.tmp_space:
898 dbtables.set_temp_store_directory(connection, opts.tmp_space, verbose = False)
899
900
901 data_cols = sqlutils.get_column_names_from_table(connection, data_table)
902 if 'coinc_event_id' in data_cols:
903 table_type = 'coinc'
904 if 'event_id' in data_cols:
905 table_type = 'sngl'
906
907
908 if table_type == 'coinc' and 'ifos' not in data_cols or table_type == 'sngl' and 'ifo' not in data_cols:
909 raise ValueError, 'Could not find %s column in the data table (%s)' % (table_type == 'sngl' and 'ifo' or 'ifos', data_table)
910 if last_table_type != '' and last_table_type != table_type:
911 raise ValueError, '%s table in file %s does not match the types in the other files' %( data_table, filename )
912 if table_type == 'coinc' and opts.plot_special_time != [] and opts.single_table is None:
913 raise ValueError, "Must specify a --single-table if using --plot-special-time with a coinc-ifos table."
914 if table_type == 'sngl' and combining_function is None:
915 raise ValueError, "Must provide a combining function or method if querying sngl-ifo tables."
916
917
918
919
920 if opts.verbose:
921 print "\tgetting background categories..."
922
923
924 in_desired_times = ''
925 if opts.include_only_coincs:
926 db_include_filter = checkInstruments( 'in_include_filter', include_filter, is_single = False )
927 db_include_filter.create_apply_test(connection)
928 in_desired_times = ' AND in_include_filter(experiment.instruments, "ALL")'
929 if opts.exclude_coincs:
930 db_exclude_filter = checkInstruments( 'in_exclude_filter', exclude_filter, is_single = False )
931 db_exclude_filter.create_apply_test(connection)
932 in_desired_times = ' '.join([ in_desired_times, 'AND NOT in_exclude_filter(experiment.instruments, "ALL")'])
933
934
935 tsids = set()
936 match_criteria = ['datatype', 'veto_cat', 'param_group']
937 if opts.plot_by_instrument_time:
938 match_criteria.append( 'on_instruments' )
939 if opts.param_name:
940 n_param_groups = len(param_parser.param_ranges)
941 else:
942 n_param_groups = 1
943 sqlquery = """
944 SELECT
945 experiment.instruments,
946 experiment.gps_start_time,
947 experiment.gps_end_time,
948 experiment_summary.time_slide_id,
949 experiment_summary.veto_def_name,
950 experiment_summary.duration,
951 experiment_summary.datatype
952 FROM
953 experiment_summary
954 JOIN
955 experiment
956 ON
957 experiment.experiment_id == experiment_summary.experiment_id
958 WHERE
959 (experiment_summary.datatype == 'slide'""" + add_foreground + ')' + in_desired_times
960 if opts.debug:
961 print >> sys.stderr, "Sqlite query used to get categories:"
962 print >> sys.stderr, sqlquery
963 for on_instruments, this_gps_start, this_gps_end, tsid, veto_cat, duration, datatype in connection.cursor().execute(sqlquery):
964 on_instruments = frozenset(lsctables.instrument_set_from_ifos(on_instruments))
965 analyzed_instruments.update( on_instruments )
966 gps_start_time = min(gps_start_time, this_gps_start)
967 gps_end_time = max(gps_end_time, this_gps_end)
968 if datatype == "slide":
969 datatypes = ["slide", "background"]
970 if opts.plot_special_time != []:
971 datatypes.append("background_special")
972
973 tsids.add(tsid)
974 else:
975 datatypes = [datatype]
976 if opts.plot_by_ifos:
977 ifo_list = [frozenset([x for x in ifos]) for ifos in CoincInspiralUtils.get_ifo_combos(list(on_instruments))]
978 else:
979 ifo_list = [frozenset(['ALL'])]
980
981 for datatype in datatypes:
982 for param_group in range(n_param_groups):
983 for ifos in ifo_list:
984 this_cat = Category( offset_vector = {}, datatype = datatype, veto_cat = veto_cat, ifos = ifos, param_group = param_group)
985 if opts.plot_by_instrument_time:
986 this_cat.on_instruments = on_instruments
987
988 data.setdefault( this_cat, [] )
989
990
991
992
993 if datatype == "background_special" and opts.plot_special_time != [] and plot_special_times.intersects_segment( segments.segment(this_gps_start, this_gps_end) ):
994 data.add_livetime( duration - len(on_instruments)*sum([abs(seg) for seg in plot_special_times]), this_cat, match_criteria )
995 else:
996 data.add_livetime( duration, this_cat, match_criteria )
997
998
999
1000
1001 if num_slides is None and len(tsids) != 0:
1002 num_slides = len(tsids)
1003 elif len(tsids) != num_slides and len(tsids) != 0:
1004 raise ValueError, "Database %s has a different number of slides (%i) than the other databases (%i)." %( filename, len(tsids), num_slides )
1005
1006
1007 if len(tsids) != 0.:
1008 for slide_cat in data.get_categories( Category(datatype = 'slide'), match_criteria = ['datatype'] ):
1009 slide_cat.livetime = float(slide_cat.livetime) / len(tsids)
1010
1011
1012
1013
1014 if opts.verbose:
1015 print >> sys.stdout, "\tgetting data..."
1016
1017 quick_query = False
1018 if table_type == 'sngl':
1019
1020 functionList = []
1021 if opts.param_name:
1022 functionList.append((opts.param_combining_method, param_name))
1023 functionList.extend([('alpha_min', 'end_time+end_time_ns*1e-9'), ('mean', 'mtotal'), (combining_function, statistic)])
1024 if opts.plot_special_time != []:
1025 functionList.append( ('sorted_values', 'end_time+end_time_ns*1e-9') )
1026 get_combined_data = createCombineRowsMethod( data_table, data_cols, functionList )
1027 connection.create_aggregate('get_combined_data', len(data_cols), get_combined_data)
1028
1029 get_ifos = createCombineRowsMethod( 'ifo_holder', ['ifo'], [('sorted_keys', 'ifo')] )
1030 connection.create_aggregate('get_ifos', 1, get_ifos)
1031
1032
1033
1034
1035 add_sngl_filter = ''
1036 add_coinc_filter = ''
1037 if opts.include_only_coincs:
1038 include_sngl_filter = dict([ [on_inst, set([coinc == 'ALL' and 'ALL' or ifo for coinc in coincs for ifo in coinc])] for on_inst,coincs in include_filter.items() ])
1039 db_include_sngl_filter = checkInstruments( 'in_include_sngl_filter', include_sngl_filter, is_single = True )
1040 db_include_sngl_filter.create_apply_test(connection)
1041 add_sngl_filter = ''.join([ 'AND in_include_sngl_filter(experiment.instruments, ', data_table, '.ifo', ') '])
1042 add_coinc_filter = ''.join([ 'HAVING in_include_filter(experiment.instruments, ifos)' ])
1043 if opts.exclude_coincs:
1044 exclude_sngl_filter = dict([ [on_inst, set([coinc == 'ALL' and 'ALL' or ifo for coinc in coincs for ifo in coinc])] for on_inst,coincs in exclude_filter.items() ])
1045 db_exclude_sngl_filter = checkInstruments( 'in_exclude_sngl_filter', exclude_sngl_filter, is_single = True )
1046 db_exclude_sngl_filter.create_apply_test(connection)
1047 add_sngl_filter = ''.join([ add_sngl_filter, 'AND NOT in_exclude_sngl_filter(experiment.instruments, ', data_table, '.ifo', ') '])
1048 add_coinc_filter = ''.join([ add_coinc_filter, add_coinc_filter == '' and 'HAVING ' or 'AND ', 'NOT in_exclude_filter(experiment.instruments, ifos)'])
1049 sqlquery = ''.join([ """
1050 SELECT
1051 experiment.instruments,
1052 experiment_summary.datatype,
1053 experiment_summary.veto_def_name,
1054 coinc_event_map.coinc_event_id,
1055 get_ifos(""", data_table, """.ifo) AS ifos,
1056 get_combined_data(""", ','.join(['.'.join([data_table,col]) for col in data_cols]), """)
1057 FROM
1058 """, data_table, """
1059 """, join_experiment_tables_to_sngl_table( data_table ), """
1060 WHERE
1061 (experiment_summary.datatype == 'slide'""", add_foreground, """)
1062 """, add_sngl_filter, """
1063 GROUP BY
1064 coinc_event_map.coinc_event_id
1065 """, add_coinc_filter ])
1066
1067 else:
1068
1069 add_time_filter = ''
1070 if opts.plot_special_time != []:
1071 sngl_table = sqlutils.validate_option( opts.single_table )
1072
1073 def check_sngl_time( end_time, end_time_ns ):
1074 return end_time+end_time_ns*1e-9 in plot_special_times
1075 connection.create_function('check_sngl_time', 2, check_sngl_time)
1076
1077 sqlquery = ''.join([ """
1078 SELECT DISTINCT
1079 coinc_event_map.coinc_event_id AS ceid
1080 FROM
1081 coinc_event_map
1082 JOIN
1083 """, sngl_table, """ AS sngl_table
1084 ON (
1085 coinc_event_map.table_name == '""", sngl_table, """' AND
1086 sngl_table.event_id == coinc_event_map.event_id )
1087 JOIN
1088 experiment_summary, experiment_map
1089 ON (
1090 experiment_summary.experiment_summ_id == experiment_map.experiment_summ_id AND
1091 experiment_map.coinc_event_id == coinc_event_map.coinc_event_id )
1092 WHERE
1093 experiment_summary.datatype == "slide" AND
1094 check_sngl_time( sngl_table.end_time, sngl_table.end_time_ns )
1095 """])
1096 if opts.debug:
1097 print >> sys.stderr, "SQLite query used to get conics falling in plot special time:"
1098 print >> sys.stderr, sqlquery
1099 remove_coincs = [ceid[0] for ceid in connection.cursor().execute(sqlquery)]
1100
1101
1102
1103
1104 if (opts.param_name is not None and param_name in data_cols) and statistic in data_cols:
1105 quick_query = True
1106 get_stats = ''.join([ data_table, ".", param_name, ", ", data_table, ".end_time+", data_table, ".end_time_ns*1e-9, ", data_table, ".mass, ", data_table, ".", statistic ])
1107 elif opts.param_name is None and statistic in data_cols:
1108 quick_query = true
1109 get_stats = ''.join([ data_table, ".end_time+", data_table, ".end_time_ns*1e-9, ", data_table, ".mass, ", data_table, ".", statistic ])
1110 else:
1111
1112 functionList = []
1113 if opts.param_name:
1114 functionList.append(param_name)
1115 functionList.extend(['end_time+end_time_ns*1e-9', 'mass', statistic])
1116 getStatDB = getRowStatsFromDatabase( data_table, data_cols, functionList )
1117 connection.create_function( 'get_stat', len(data_cols), getStatDB.get_dbrow_stat )
1118 get_stats = ''.join([ 'get_stat(', ','.join([ '.'.join([data_table,col]) for col in data_cols ]), ')' ])
1119
1120 add_coinc_filter = ''
1121 if opts.include_only_coincs:
1122 add_coinc_filter = ''.join([ '\nAND in_include_filter(experiment.instruments, ', data_table, '.ifos)' ])
1123 if opts.exclude_coincs:
1124 add_coinc_filter = ''.join([ add_coinc_filter, '\nAND NOT in_exclude_filter(experiment.instruments, ', data_table, '.ifos)'])
1125 sqlquery = ''.join([ """
1126 SELECT
1127 experiment.instruments,
1128 experiment_summary.datatype,
1129 experiment_summary.veto_def_name,
1130 """, data_table, """.coinc_event_id,
1131 """, data_table, """.ifos,
1132 """, get_stats, """
1133 FROM
1134 """, data_table, """
1135 """,
1136 sqlutils.join_experiment_tables_to_coinc_table( data_table ),
1137 """
1138 WHERE
1139 (experiment_summary.datatype == 'slide'""", add_foreground, ')', add_coinc_filter, add_time_filter ])
1140 if opts.debug:
1141 print >> sys.stderr, "Sqlite query used to get data:"
1142 print >> sys.stderr, sqlquery
1143 results = connection.cursor().execute(sqlquery).fetchall()
1144 for result in results:
1145 if opts.plot_by_instrument_time:
1146 on_instruments = frozenset(lsctables.instrument_set_from_ifos( result[0] ))
1147 else:
1148 on_instruments = frozenset(["ALL"])
1149 datatype = result[1]
1150
1151 if datatype == "slide" and opts.plot_special_time != [] and table_type == 'sngl':
1152 sngl_end_times = [float(t) for t in result[5].split(',')[-1].split(';')]
1153 if any(t in plot_special_times for t in sngl_end_times):
1154 continue
1155 veto_cat = result[2]
1156 ceid = result[3]
1157 if opts.plot_by_ifos:
1158 ifos = frozenset(lsctables.instrument_set_from_ifos(result[4]))
1159 else:
1160 ifos = frozenset(["ALL"])
1161 if opts.param_name is not None:
1162 if quick_query:
1163 param_group, end_time, mtotal, stat = [result[5], result[6], result[7], result[8]]
1164 else:
1165 param_group, end_time, mtotal, stat = [float(x) for x in result[5].split(',') if ';' not in x]
1166 param_group = param_parser.group_by_param_range( param_group )
1167
1168 if param_group is None:
1169 continue
1170 else:
1171 param_group = 0.
1172 if quick_query:
1173 end_time, mtotal, stat = [result[5], result[6], result[7]]
1174 else:
1175 end_time, mtotal, stat = [float(x) for x in result[5].split(',') if ';' not in x]
1176
1177 if opts.xmin is not None:
1178 if stat < opts.xmin and opts.min_x_fit is not None and stat < opts.min_x_fit:
1179 continue
1180 elif stat < opts.xmin and opts.min_x_fit is None:
1181 continue
1182
1183 this_row = rowType()
1184 this_row.store([ ('end_time', end_time), ('mtotal', mtotal), ('stat', stat), ('ifos', result[4]) ])
1185
1186 categories = [ Category( {}, datatype, veto_cat, on_instruments, ifos, param_group ) ]
1187 if datatype == "slide":
1188 categories.append(Category( {}, 'background', veto_cat, on_instruments, ifos, param_group ))
1189 if opts.plot_special_time != [] and ceid not in remove_coincs:
1190 categories.append(Category( {}, 'background_special', veto_cat, on_instruments, ifos, param_group ))
1191
1192
1193 thisid = id(this_row)
1194 data.add_data( thisid, categories, this_row )
1195
1196
1197 connection.close()
1198 dbtables.discard_connection_filename( filename, working_filename, verbose = False)
1199
1200 for cat in data:
1201 print cat.datatype, len(data[cat])
1202
1203
1204
1205
1206 if opts.verbose:
1207 print "Computing cumulative rates..."
1208
1209
1210 group_by = ['veto_cat', 'param_group']
1211 if opts.plot_by_instrument_time:
1212 group_by.append( 'on_instruments' )
1213 if opts.plot_by_ifos:
1214 group_by.append( 'ifos' )
1215
1216 data.compute_cumrates( 'stat', fg_datatype, rank_by = 'max', group_by = group_by+['datatype'])
1217
1218
1219
1220
1221 if opts.verbose:
1222 print >> sys.stdout, "Plotting..."
1223 opts.gps_start_time = gps_start_time
1224 opts.gps_end_time = gps_end_time
1225 opts.ifo_times = ''.join(sorted(analyzed_instruments))
1226 opts.ifo_tag = ''
1227 opts.enable_output = True
1228 InspiralUtilsOpts = InspiralUtils.initialise( opts, __prog__, git_version.verbose_msg )
1229 fnameList = []
1230 tagList = []
1231 comments = ''
1232
1233 for bkg_category in data.get_categories( Category(datatype = 'background'), ['datatype'] ):
1234 if opts.verbose:
1235 print >> sys.stdout, "\tgroup: %s, %s, %s, %s" %(bkg_category.veto_cat, str(bkg_category.param_group), ''.join(bkg_category.on_instruments), ''.join(bkg_category.ifos))
1236
1237 match_category = Category()
1238 match_criteria = group_by+['datatype']
1239 [setattr(match_category, x, getattr(bkg_category, x)) for x in match_criteria]
1240
1241
1242 T_bkg = data.get_livetime(bkg_category, time_units = opts.time_units)
1243 match_category.datatype = fg_datatype
1244 T_fg = data.get_livetime( match_category, time_units = opts.time_units )
1245
1246
1247
1248
1249
1250
1251 plot_non_cum = True
1252 if plot_non_cum:
1253 if opts.verbose:
1254 print >> sys.stdout, "\tcreating histogram..."
1255 pylab.figure()
1256 pylab.hold(True)
1257 xmin = numpy.inf
1258 xmax = -numpy.inf
1259 ymin = numpy.inf
1260 ymax = -numpy.inf
1261
1262
1263
1264
1265
1266 if fg_datatype != 'slide':
1267 fg_data = data.get_data( category = match_category, category_match_criteria = match_criteria )
1268 fg_stats = numpy.array([x.data.stat for x in fg_data])
1269 plot_fg = fg_stats != numpy.array([])
1270 else:
1271 plot_fg = False
1272
1273 bkg_data = data[bkg_category]
1274 bkg_stats = numpy.array([x.data.stat for x in bkg_data])
1275
1276 if bkg_stats == numpy.array([]):
1277 continue
1278
1279
1280 if plot_fg:
1281 min_val = min( bkg_stats.min(), fg_stats.min() )
1282 max_val = max( bkg_stats.max(), fg_stats.max() )
1283 else:
1284 min_val = min( bkg_stats.min(), bkg_stats.min() )
1285 max_val = max( bkg_stats.max(), bkg_stats.max() )
1286
1287
1288 nbins = opts.nbins
1289 if opts.lin_x:
1290
1291 bins = numpy.linspace(numpy.power(min_val, 1./opts.x_power), numpy.power(max_val, 1./opts.x_power), nbins+1, endpoint = True)
1292 else:
1293 bins = numpy.logspace( numpy.log10(min_val), numpy.log10(max_val), nbins+1, endpoint = True )
1294 bins = numpy.power(bins, opts.x_power)
1295
1296 bins[-1] += .0001
1297 ds = numpy.array([bins[i+1] - bins[i] for i in range(len(bins)-1)])/2.
1298 xvals = bins[:-1] + ds
1299
1300
1301
1302
1303 bkg_count, _ = numpy.histogram(bkg_stats, bins)
1304
1305 bkg_count = bkg_count * (T_fg / T_bkg )
1306 remove_me = pylab.find(bkg_count == 0.)
1307 bkg_count = numpy.array([bkg_count[i] for i in range(len(bkg_count)) if i not in remove_me])
1308 bkg_hist_stats = numpy.array([xvals[i] for i in range(len(xvals)) if i not in remove_me])
1309 bkg_ds = numpy.array([ds[i] for i in range(len(xvals)) if i not in remove_me])
1310
1311
1312 if bkg_count != []:
1313 ymin = min(min(bkg_count),ymin)
1314 ymax = max(max(bkg_count),ymax)
1315
1316
1317
1318
1319 bkg_error = numpy.sqrt( bkg_count / (T_bkg/T_fg) )
1320 pylab.errorbar( bkg_hist_stats, bkg_count, xerr = bkg_ds, yerr = bkg_error, color = 'k', ecolor = 'gray', marker = 'o', markersize = 4, linestyle = 'None', zorder = 2, label = 'Background' )
1321
1322
1323
1324
1325 loudest_events = []
1326 if plot_fg:
1327
1328 fg_count, _ = numpy.histogram(fg_stats, bins)
1329 remove_me = pylab.find(fg_count == 0.)
1330 fg_count = numpy.array([fg_count[i] for i in range(len(fg_count)) if i not in remove_me])
1331 fg_hist_stats = numpy.array([xvals[i] for i in range(len(xvals)) if i not in remove_me])
1332
1333
1334 if fg_count != []:
1335 ymin = min(min(fg_count),ymin)
1336 ymax = max(max(fg_count),ymax)
1337
1338 fclr = 'b'
1339 lbl = fg_datatype.replace( '_', '-').title()
1340 if fg_datatype == 'simulation':
1341 edgclr = 'r'
1342 elif opts.plot_poisson_pdf:
1343 edgclr = 'white'
1344 else:
1345 edgclr = 'none'
1346 pylab.scatter( fg_hist_stats, fg_count, marker = 'o', edgecolor = edgclr, facecolor = fclr, label = lbl, s = 20, linewidth = .5, zorder = 4 )
1347
1348
1349
1350
1351 if opts.plot_special_time != []:
1352 plot_sp_vals = [x.data.stat for x in fg_data if x.data.end_time in plot_special_times]
1353 spx = []
1354 spy = []
1355 if plot_sp_vals != []:
1356
1357 for x in plot_sp_vals:
1358 if x > fg_hist_stats.max():
1359 spx.append(fg_hist_stats.max())
1360 spy.append(fg_count[-1])
1361 else:
1362 idxs = set(pylab.find( x >= fg_hist_stats )) & set(pylab.find( x <= fg_hist_stats))
1363 spx.extend([ fg_hist_stats[i] for i in idxs ])
1364 spy.extend([ fg_count[i] for i in idxs ])
1365 pylab.scatter( spx, spy, c = 'yellow', marker = (5,1,0), s = 50, linewidth = .5, label = '_no_legend', zorder = 5 )
1366
1367
1368
1369
1370
1371 if plot_fg and opts.extrapolate is not None:
1372 if opts.verbose:
1373 print >> sys.stdout, "\textrapolating background..."
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385 if opts.min_x_fit is None:
1386 min_x_fit = bkg_hist_stats.min()
1387 else:
1388 min_x_fit = opts.min_x_fit
1389 start_i = pylab.find( bkg_hist_stats >= min_x_fit )[0]
1390 n = len(bkg_count) - start_i
1391 X = numpy.matrix(numpy.zeros((n,2), dtype = float))
1392 Y = numpy.matrix(numpy.zeros((n,1), dtype = float))
1393 W = numpy.matrix(numpy.zeros((n,n), dtype = float))
1394
1395 for i in range(n):
1396 if bkg_count[start_i + i] == 0:
1397 continue
1398 Y[i,0] = numpy.log( bkg_count[start_i + i] )
1399 W[i,i] = 1./numpy.power( bkg_error[start_i + i]/bkg_count[start_i + i], 2. )
1400 X[i,0] = 1.
1401 if opts.extrapolate.upper() == 'POWER':
1402 X[i,1] = numpy.log( bkg_hist_stats[start_i + i] )
1403 else:
1404 X[i,1] = bkg_hist_stats[start_i + i]**(2./opts.x_power)
1405
1406
1407 Beta_hat = (X.T * W * X).I * X.T * W * Y
1408
1409
1410 noncum_chisq = ( W * numpy.power((Y - X * Beta_hat), 2.) ).sum()
1411
1412 noncum_Ndof = n - 2 - 1
1413
1414
1415 n_fit = 50
1416 if opts.lin_x:
1417 x_fit = numpy.linspace( bkg_hist_stats[start_i], bkg_hist_stats.max(), num = n_fit )
1418 else:
1419 x_fit = numpy.logspace( numpy.log10(bg_hist_stats[start_i]), numpy.log10( bkg_hist_stats.max() ), num = n_fit )
1420 Xfit = numpy.matrix(numpy.zeros((n_fit,2), dtype = float))
1421 for i,x in enumerate(x_fit):
1422 Xfit[i,0] = 1.
1423 if opts.extrapolate.upper() == 'POWER':
1424 Xfit[i,1] = numpy.log(x)
1425 else:
1426 Xfit[i,1] = x**(2./opts.x_power)
1427 y_fit = numpy.exp( numpy.array(Xfit * Beta_hat) )
1428
1429
1430 pylab.plot( x_fit, y_fit, 'g-', linewidth = 2, zorder = 5, label = 'Fitted Bkg.' )
1431
1432
1433 if y_fit.any():
1434 ymin = min(y_fit.min(),ymin)
1435 ymax = max(y_fit.max(),ymax)
1436
1437
1438
1439
1440 pylab.grid()
1441 pylab.legend(loc = 'lower left', scatterpoints = 1, numpoints = 1)
1442
1443 pylab.xlabel( stat_label )
1444 pylab.ylabel( "Count per Foreground Experiment" )
1445 if opts.title:
1446 if opts.param_name is not None:
1447 t = 'Histogram %s %s' % (param_label.title(), param_parser.param_range_by_group(bkg_category.param_group))
1448 else:
1449 t = 'Histogram'
1450 pylab.title(t)
1451
1452
1453 xmin = min(xvals)
1454 xmax = max(xvals)
1455 if not opts.lin_x:
1456 pylab.gca().semilogx()
1457 xmax = xmax*10**(0.3)
1458 else:
1459 xmax = xmax+.1*xmax
1460 if not opts.lin_y:
1461 pylab.gca().semilogy()
1462 ymin, ymax = ymin*10**(-0.5), ymax*10**(0.5)
1463 else:
1464 ymin, ymax = ymin - .1*ymin, ymax + .1*ymax
1465
1466 if opts.xmin is not None:
1467 xmin = opts.xmin
1468 if opts.xmax is not None:
1469 xmax = opts.xmax
1470 if opts.ymin is not None:
1471 ymin = opts.ymin
1472 if opts.ymax is not None:
1473 ymax = opts.ymax
1474
1475
1476 pylab.xlim( xmin, xmax )
1477 pylab.ylim( ymin, ymax )
1478
1479
1480 plot_description = 'F%i' % ( len(fnameList) )
1481 name = InspiralUtils.set_figure_tag( plot_description, datatype_plotted = fg_datatype.upper(), open_box = 'all_data' in fg_datatype or 'exclude_play' in fg_datatype)
1482 fname = InspiralUtils.set_figure_name(InspiralUtilsOpts, name)
1483
1484 fname_thumb = InspiralUtils.savefig_pylal( filename=fname, dpi=opts.dpi )
1485 fnameList.append(fname)
1486 tagList.append(name)
1487
1488
1489
1490
1491
1492
1493
1494 if opts.verbose:
1495 print >> sys.stdout, "\tcreating cumulative plot..."
1496 pylab.figure()
1497 pylab.hold(True)
1498 xmin = numpy.inf
1499 xmax = -numpy.inf
1500 ymin = numpy.inf
1501 ymax = -numpy.inf
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512 txthead = """#
1513 # Data used to generate the %s
1514 # in the cumulative rate plot (Figure 3) in LIGO Document P1100034.
1515 #
1516 # For questions regarding the use of this material, please contact the LSC
1517 # spokesperson Gaby Gonzalez (gonzalez@lsu.edu) or corresponding authors
1518 # Collin Capano (collin.capano@ligo.org) and Stephen Privitera (sprivite@caltech.edu)
1519 #
1520 """
1521
1522
1523
1524
1525 if opts.plot_special_time != []:
1526
1527 match_category.datatype = 'background_special'
1528 plotvals = data.get_cumrates( match_category, 'stat', rank_by = 'max' )
1529 bkgsp_stats = numpy.array([x[0] for x in plotvals])
1530 bkgsp_cumnum = numpy.array([y[1] for y in plotvals])
1531
1532
1533 if bkgsp_stats != []:
1534 xmin = min(min(bkgsp_stats),xmin)
1535 xmax = max(max(bkgsp_stats),xmax)
1536
1537 if bkgsp_cumnum != []:
1538 ymin = min(min(bkgsp_cumnum / T_fg),ymin)
1539 ymax = max(max(bkgsp_cumnum / T_fg),ymax)
1540
1541
1542
1543
1544 bkgsp_error = numpy.sqrt( bkgsp_cumnum / (T_fg * T_bkg) )
1545 plot_errs = numpy.array([ bkgsp_error,bkgsp_error ])
1546 for n, (y, err) in enumerate(zip(bkgsp_cumnum.tolist(),bkgsp_error.tolist())):
1547 if y/T_fg - err <= 0.0:
1548 plot_errs[0,n] = y/T_fg - 1e-15
1549 pylab.errorbar( bkgsp_stats, bkgsp_cumnum / T_fg, yerr = plot_errs, color = '#777777', ecolor = '#777777', marker = 'o', markersize = 4, markeredgecolor = '#777777', linestyle = 'None', zorder = -1, label = '_no_legend')
1550
1551 f = open('PlotRatesData-HundredSlideBkg-NoEvent.txt', 'w')
1552 print >> f, txthead % 'background estimate from 100 slides with the candidate event removed (gray dots)'
1553 print >> f, '#Combined NewSNR\tCumulative Rate (yr^-1)\ty-error (+/- yr^-1)'
1554 for stat, cumrate, err in zip(bkgsp_stats.tolist(), (bkgsp_cumnum / T_fg).tolist(), bkgsp_error.tolist()):
1555 print >> f, '%f\t%f\t%f' % (stat, cumrate, err)
1556 f.close()
1557
1558
1559 loudest_bkgsp_events = []
1560 ii = 0
1561 last_stat = None
1562 for x in plotvals[::-1]:
1563 if ii > 10:
1564 break
1565 if x[0] != last_stat:
1566 ii += 1
1567 loudest_bkgsp_events.append(x[2])
1568
1569
1570
1571
1572 added_bkgsp = []
1573 for newsnrs, bkg_time, lbl in add_background_sp:
1574 nTrials = bkg_time / T_fg
1575
1576 these_cumnums = numpy.array([ (len(newsnrs) - bisect.bisect_left(newsnrs, x))/nTrials for x in newsnrs ])
1577 this_error = numpy.sqrt( these_cumnums / (T_fg * bkg_time) )
1578 plot_errs = numpy.array([this_error, this_error])
1579 for n, (y, err) in enumerate(zip(these_cumnums.tolist(),this_error.tolist())):
1580 if y/T_fg - err <= 0.0:
1581 plot_errs[0,n] = y/T_fg - 1e-15
1582 T_add_bkgsp = bkg_time
1583 pylab.errorbar( newsnrs, these_cumnums / T_fg, yerr = plot_errs, color = '#777777', ecolor = '#777777', marker = 'x', markersize = 6, mew = 1, linestyle = 'None', zorder = -1, label = '_no_legend' )
1584
1585
1586 f = open('PlotRatesData-ExtendedBkg-NoEvent.txt', 'w')
1587 print >> f, txthead % 'extended background estimate with the candidate event removed (gray crosses)'
1588 print >> f, '#Combined NewSNR\tCumulative Rate (yr^-1)\ty-error (+/- yr^-1)'
1589 for stat, cumrate, err in zip(newsnrs, (these_cumnums / T_fg).tolist(), this_error.tolist()):
1590 print >> f, '%f\t%f\t%f' % (stat, cumrate, err)
1591 f.close()
1592
1593
1594
1595
1596
1597
1598
1599
1600 xmin = min(min(newsnrs), xmin)
1601 xmax = max(max(newsnrs), xmax)
1602 ymin = min(these_cumnums.min(), ymin)
1603 ymax = max(these_cumnums.max(), ymax)
1604
1605 added_bkgsp = [x for x in zip(newsnrs, these_cumnums) if x[0] > bkgsp_stats.max()]
1606
1607
1608
1609
1610
1611
1612 match_category.datatype = 'background'
1613 plotvals = data.get_cumrates( match_category, 'stat', rank_by = 'max' )
1614 bkg_stats = numpy.array([x[0] for x in plotvals])
1615 bkg_cumnum = numpy.array([y[1] for y in plotvals])
1616
1617
1618 if bkg_stats != []:
1619 xmin = min(min(bkg_stats),xmin)
1620 xmax = max(max(bkg_stats),xmax)
1621
1622 if bkg_cumnum != []:
1623 ymin = min(min(bkg_cumnum / T_fg),ymin)
1624 ymax = max(max(bkg_cumnum / T_fg),ymax)
1625
1626
1627
1628
1629 bkg_error = numpy.sqrt( bkg_cumnum / (T_fg * T_bkg) )
1630 plot_errs = numpy.array([ bkg_error,bkg_error ])
1631 for n, (y, err) in enumerate(zip(bkg_cumnum.tolist(),bkg_error.tolist())):
1632 if y/T_fg - err <= 0.0:
1633 plot_errs[0,n] = y/T_fg - 1e-15
1634 pylab.errorbar( bkg_stats, bkg_cumnum / T_fg, yerr = plot_errs, color = 'k', ecolor = 'k', marker = 'o', markersize = 4, markeredgecolor = 'k', linestyle = 'None', zorder = 2, label = 'Background' )
1635
1636
1637 f = open('PlotRatesData-HundredSlideBkg.txt', 'w')
1638 print >> f, txthead % 'background estimate from 100 slides (black dots)'
1639 print >> f, '#Combined NewSNR\tCumulative Rate (yr^-1)\ty-error (+/- yr^-1)'
1640 for stat, cumrate, err in zip(bkg_stats.tolist(), (bkg_cumnum / T_fg).tolist(), bkg_error.tolist()):
1641 print >> f, '%f\t%f\t%f' % (stat, cumrate, err)
1642 f.close()
1643
1644
1645 loudest_bkg_events = []
1646 ii = 0
1647 last_stat = None
1648 for x in plotvals[::-1]:
1649 if ii > 10:
1650 break
1651 if x[0] != last_stat:
1652 ii += 1
1653 loudest_bkg_events.append(x[2])
1654
1655
1656
1657
1658 added_bkg = []
1659 for newsnrs, bkg_time, lbl in add_background:
1660 nTrials = bkg_time / T_fg
1661
1662 these_cumnums = numpy.array([ (len(newsnrs) - bisect.bisect_left(newsnrs, x))/nTrials for x in newsnrs ])
1663 this_error = numpy.sqrt( these_cumnums / (T_fg * bkg_time) )
1664 plot_errs = numpy.array([this_error, this_error])
1665 for n, (y, err) in enumerate(zip(these_cumnums.tolist(),this_error.tolist())):
1666 if y/T_fg - err <= 0.0:
1667 plot_errs[0,n] = y/T_fg - 1e-15
1668 T_add_bkg = bkg_time
1669 pylab.errorbar( newsnrs, these_cumnums / T_fg, yerr = plot_errs, color = 'k', ecolor = 'k', marker = 'x', markersize = 6, mew = 1, linestyle = 'None', zorder = 2, label = lbl )
1670
1671
1672 f = open('PlotRatesData-ExtendedBkg.txt', 'w')
1673 print >> f, txthead % 'extended background estimate (black crosses)'
1674 print >> f, '#Combined NewSNR\tCumulative Rate (yr^-1)\ty-error (+/- yr^-1)'
1675 for stat, cumrate, err in zip(newsnrs, (these_cumnums / T_fg).tolist(), this_error.tolist()):
1676 print >> f, '%f\t%f\t%f' % (stat, cumrate, err)
1677 f.close()
1678
1679
1680
1681
1682
1683
1684
1685
1686 xmin = min(min(newsnrs), xmin)
1687 xmax = max(max(newsnrs), xmax)
1688 ymin = min(these_cumnums.min(), ymin)
1689 ymax = max(these_cumnums.max(), ymax)
1690
1691 added_bkg = [x for x in zip(newsnrs, these_cumnums) if x[0] > bkg_stats.max()]
1692
1693
1694
1695
1696
1697 loudest_events = []
1698 if plot_fg:
1699 match_category.datatype = fg_datatype
1700 plotvals = data.get_cumrates( match_category, 'stat', rank_by = 'max' )
1701 fg_stats = numpy.array([x[0] for x in plotvals])
1702 fg_cumrate = numpy.array([y[1] for y in plotvals]) / T_fg
1703
1704 fclr = 'b'
1705 if fg_datatype == 'simulation':
1706 edgclr = 'r'
1707 edgwth = .5
1708 elif opts.plot_poisson_pdf:
1709 edgclr = 'white'
1710 edgwth = .5
1711 else:
1712 edgclr = 'b'
1713 edgwth = 0.5
1714 lbl = 'Foreground'
1715 pylab.plot( fg_stats, fg_cumrate, marker = '^', markersize = 6.5, markerfacecolor = fclr, markeredgecolor = edgclr, markeredgewidth = edgwth, linestyle = 'None', label = lbl, zorder = 4 )
1716
1717
1718 f = open('PlotRatesData-Foreground.txt', 'w')
1719 print >> f, txthead % 'foreground coincident events (blue triangles)'
1720 print >> f, '#Combined NewSNR\tCumulative Rate (yr^-1)'
1721 for stat, cumrate in zip(fg_stats.tolist(), fg_cumrate.tolist()):
1722 print >> f, '%f\t%f' % (stat, cumrate)
1723 f.close()
1724
1725
1726 if fg_stats.any():
1727 xmin = min(min(fg_stats),xmin)
1728 xmax = max(max(fg_stats),xmax)
1729
1730 if fg_cumrate.any():
1731 ymin = min(min(fg_cumrate),ymin)
1732 ymax = max(max(fg_cumrate),ymax)
1733
1734
1735 for x in plotvals[::-1]:
1736 if x[0] != max(fg_stats):
1737 break
1738 loudest_events.append(x[2])
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751 x_ext = numpy.array([])
1752 y_ext = numpy.array([])
1753 if plot_fg and opts.extrapolate is not None:
1754 if opts.verbose:
1755 print >> sys.stdout, "\textrapolating background..."
1756 if opts.extrapolate.upper() != 'ERFC':
1757
1758 min_x_fit = opts.min_x_fit is None and bkg_hist_stats.min() or opts.min_x_fit
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770 start_i = pylab.find( bkg_stats >= min_x_fit )[0]
1771 n = len(bkg_cumnum) - start_i
1772 X = numpy.matrix(numpy.zeros((n,2), dtype = float))
1773 Y = numpy.matrix(numpy.zeros((n,1), dtype = float))
1774 W = numpy.matrix(numpy.zeros((n,n), dtype = float))
1775
1776 for i in range(n):
1777 Y[i,0] = numpy.log( bkg_cumnum[start_i + i] ) - numpy.log( T_fg )
1778 W[i,i] = 1./numpy.power( bkg_error[start_i + i]/bkg_cumnum[start_i + i], 2. )
1779 X[i,0] = 1.
1780 if opts.extrapolate.upper() == 'POWER':
1781 X[i,1] = numpy.log( bkg_stats[start_i + i] )
1782 else:
1783 X[i,1] = bkg_stats[start_i + i]**(2./opts.x_power)
1784
1785
1786 Beta_hat = (X.T * W * X).I * X.T * W * Y
1787
1788
1789 cum_chisq = ( W * numpy.power((Y - X * Beta_hat), 2.) ).sum()
1790
1791 cum_Ndof = n - 2 - 1
1792
1793
1794 n_ext = 50
1795 if opts.lin_x:
1796 x_ext = numpy.linspace( bkg_stats[start_i], max(fg_stats.max(), bkg_stats.max()), num = n_ext )
1797 else:
1798 x_ext = numpy.logspace( numpy.log10(bkg_stats[start_i]), numpy.log10( max(fg_stats.max(), bkg_stats.max()) ), num = n_ext )
1799 Xext = numpy.matrix(numpy.zeros((n_ext,2), dtype = float))
1800 for i,x in enumerate(x_ext):
1801 Xext[i,0] = 1.
1802 if opts.extrapolate.upper() == 'POWER':
1803 Xext[i,1] = numpy.log(x)
1804 else:
1805 Xext[i,1] = x**(2./opts.x_power)
1806 y_ext = numpy.exp( numpy.array(Xext * Beta_hat) )
1807
1808
1809 else:
1810
1811 n_ext = 50
1812 sigmasq = -1./(2.*Beta_hat[1,0])
1813
1814 amp = numpy.exp(Beta_hat[0,0]) * numpy.sqrt(2*numpy.pi*sigmasq) / ( numpy.power(bins[1], 1./opts.x_power) - numpy.power(bins[0], 1./opts.x_power) )
1815 if opts.lin_x:
1816 x_ext = numpy.linspace( min_x_fit, fg_stats.max(), num = n_ext )
1817 else:
1818 x_ext = numpy.logspace( numpy.log10(min_x_fit), numpy.log10( fg_stats.max() ), num = n_ext )
1819
1820 y_ext = amp * special.erfc( numpy.power(x_ext, 1./opts.x_power)/numpy.sqrt(2*sigmasq) ) / (2. * T_fg)
1821
1822
1823 pylab.plot( x_ext, y_ext, 'g--', linewidth = 2, zorder = 5, label = 'Extrapolated Bkg.' )
1824
1825
1826 if x_ext.any():
1827 xmin = min(min(x_ext),xmin)
1828 xmax = max(max(x_ext),xmax)
1829
1830 if y_ext.any():
1831 ymin = min(y_ext.min(),ymin)
1832 ymax = max(y_ext.max(),ymax)
1833
1834
1835
1836
1837
1838 if opts.plot_poisson_pdf or opts.plot_significance_bands:
1839 if opts.verbose:
1840 print "\tcomputing poisson pdf..."
1841 xrange = 101
1842 yrange = 2*xrange
1843
1844 bkg_stats = numpy.array( bkg_stats.tolist() + [x[0] for x in added_bkg] )
1845 bkg_cumnum = numpy.array( bkg_cumnum.tolist() + [x[1] for x in added_bkg] )
1846
1847
1848 if opts.extrapolate is not None and plot_fg and x_ext.any():
1849 xcoords = numpy.linspace(bkg_stats.min(), max(x_ext.max(), bkg_stats.max()), xrange)
1850 else:
1851 xcoords = numpy.linspace(bkg_stats.min(), bkg_stats.max(), xrange)
1852 ycoords = numpy.logspace(0, numpy.log10(ymax+0.5), yrange)
1853
1854 xarray = numpy.zeros((xrange,yrange), dtype=float)
1855 yarray = numpy.zeros((xrange,yrange), dtype=float)
1856 zarray = numpy.zeros((xrange,yrange), dtype=float)
1857
1858 for xi,x in enumerate(xcoords):
1859 if x <= bkg_stats.max():
1860
1861 lmbda = bkg_cumnum[pylab.find( bkg_stats >= x )[0]]
1862
1863 elif opts.extrapolate is not None and opts.extrapolate.upper() == "POWER":
1864 lmbda = numpy.exp( (Beta_hat[0,0] + Beta_hat[1,0]*numpy.log(x)) ) * T_fg
1865 elif opts.extrapolate is not None and opts.extrapolate.upper() == "GAUSSIAN":
1866 lmbda = numpy.exp( (Beta_hat[0,0] + Beta_hat[1,0]*x**(2./opts.x_power)) ) * T_fg
1867 elif opts.extrapolate is not None and opts.extrapolate.upper() == "ERFC":
1868 lmbda = amp * special.erfc( numpy.power(x,1./opts.x_power)/numpy.sqrt(2*sigmasq) ) / 2.
1869 for yi,y in enumerate(ycoords):
1870 xarray[xi,yi] += x
1871 yarray[xi,yi] += y
1872 p = poissonPDFlog10( y, lmbda )
1873 zarray[xi, yi] += p
1874
1875
1876 minz = min([z for z in zarray.flatten() if z != -numpy.inf])
1877 for xi in range(len(xcoords)):
1878 for yi in range(len(ycoords)):
1879 if zarray[xi,yi] == -numpy.inf:
1880 zarray[xi,yi] = minz - 1
1881
1882
1883 yarray = yarray / T_fg
1884
1885
1886 if opts.plot_poisson_pdf and 'NaN' not in zarray:
1887 hxplt = pylab.hexbin(xarray.flatten(), yarray.flatten(), C = zarray.flatten(), gridsize=xrange-1, xscale='linear', yscale='log', edgecolors = 'none', vmin=-10.)
1888 cb = pylab.colorbar(hxplt, format = pylab.FuncFormatter(ColorFormatter))
1889 cb.ax.set_ylabel( 'Probability Density' )
1890
1891
1892
1893
1894 if opts.plot_significance_bands and opts.max_sigma > 0.:
1895 if opts.verbose:
1896 print "\tplotting contours..."
1897 sigmas = sorted([numpy.log10(special.erfc((n+1)/numpy.sqrt(2))/2.) for n in range(opts.max_sigma)])
1898 if opts.plot_poisson_pdf:
1899 contplt = pylab.contour(xarray, yarray, zarray, sigmas, colors = 'k', linestyles='dashdot', label='N-\sigma', zorder = 1)
1900 cb.add_lines(contplt)
1901 else:
1902 fix_zarray(zarray,sigmas)
1903 pylab.contourf(xarray, yarray, zarray, sigmas+[0.0], cmap = pylab.cm.gray_r, alpha = 0.7)
1904
1905
1906
1907
1908 if plot_fg and opts.extrapolate is not None:
1909 pylab.plot( [bkg_stats.max(), bkg_stats.max()], [fg_cumrate.min(), ymax*10**0.5], 'k-', linewidth = 2, zorder = 3 )
1910
1911
1912
1913
1914
1915 pylab.grid()
1916 pylab.legend(loc = 'lower left', scatterpoints = 1, numpoints = 3)
1917 pylab.xlabel( stat_label, fontsize=16 )
1918
1919 for xl in pylab.gca().get_xticklabels():
1920 xl.set_size(16)
1921 pylab.ylabel( "Cumulative Rate (%s$^{-1}$)" % opts.time_units, fontsize=16 )
1922
1923 for yl in pylab.gca().get_yticklabels():
1924 yl.set_size(16)
1925 if opts.title:
1926 if opts.param_name is not None:
1927 t = 'Cumulative Histogram %s %s' % (param_label.title(), param_parser.param_range_by_group(bkg_category.param_group))
1928 else:
1929 t = 'Cumulative Histogram'
1930 t = 'Cumulative Histogram of H1L1 triggers with $3.48\mathrm{M_\odot} \le \mathcal{M} < 7.40\mathrm{M_\odot}$'
1931 pylab.title(t)
1932
1933
1934 if plot_fg:
1935 xmin = min(fg_stats.min(), bkg_stats.min())
1936 else:
1937 xmin = bkg_stats.min()
1938 if not opts.lin_x:
1939 pylab.gca().semilogx()
1940 xmax = xmax*10**(0.3)
1941 else:
1942 xmax = xmax+.1*xmax
1943 if not opts.lin_y:
1944 pylab.gca().semilogy()
1945 ymin, ymax = ymin*10**(-0.5), ymax*10**(0.5)
1946 else:
1947 ymin, ymax = ymin - .1*ymin, ymax + .1*ymax
1948
1949 if opts.xmin is not None:
1950 xmin = opts.xmin
1951 if opts.xmax is not None:
1952 xmax = opts.xmax
1953 if opts.ymin is not None:
1954 ymin = opts.ymin
1955 if opts.ymax is not None:
1956 ymax = opts.ymax
1957
1958 pylab.xlim( xmin, xmax )
1959 pylab.ylim( ymin, ymax )
1960
1961
1962 plot_description = 'cumulative_F%i' % ( len(fnameList) )
1963 name = InspiralUtils.set_figure_tag( plot_description, datatype_plotted = fg_datatype.upper(), open_box = 'all_data' in fg_datatype or 'exclude_play' in fg_datatype)
1964 fname = InspiralUtils.set_figure_name(InspiralUtilsOpts, name)
1965
1966 fname_thumb = InspiralUtils.savefig_pylal( filename=fname, dpi=opts.dpi )
1967 fnameList.append(fname)
1968 tagList.append(name)
1969
1970
1971 comments += 'On instruments: <b>%s</b> Coinc. ifos: <b>%s</b> Veto Category: <b>%s</b>' %(''.join(sorted(match_category.on_instruments)), ''.join(sorted(match_category.ifos)), match_category.veto_cat)
1972 if opts.param_name is not None:
1973 comments += ' Param-group: <b>%s</b>' %(param_parser.param_range_by_group(match_category.param_group))
1974 comments += '<br />\n'
1975 comments += "Background livetime: %.2f %s<br />\n" % (T_bkg, opts.time_units)
1976 if opts.add_background_file:
1977 comments+= "Added background livetime: %.2f %s<br />\n" % (T_add_bkg, opts.time_units)
1978 comments += "<b>Loudest background event%s:</b><br />\n" % (len(loudest_bkg_events) > 1 and 's' or '')
1979 comments += '<table cellpadding="5", border="1">\n'
1980 comments += '<tr><th>End Time</th><th>UTC</th><th>ifos</th><th>Total Mass</th><th>%s</th></tr>\n' % stat_label.replace(r'$', '')
1981 for le_id in loudest_bkg_events:
1982 le = data.data_index[le_id]
1983 comments += '<tr><td>%.2f</td><td>%s</td><td>%s</td><td>%.2f</td><td>%.2f</td></tr>' %( le.data.get_value('end_time'), printutils.format_end_time_in_utc(int(le.data.get_value('end_time'))), le.data.get_value('ifos'), le.data.get_value('mtotal'), le.data.get_value('stat') )
1984 comments += '</table>\n'
1985 if plot_fg:
1986 comments += "<br />%s livetime: %.3f %s<br />\n" % (fg_datatype.replace('_', '-').title(), T_fg, opts.time_units)
1987 comments += "<b>Loudest %s event%s:</b><br />\n" % (fg_datatype, len(loudest_events) > 1 and 's' or '')
1988 comments += '<table cellpadding="5", border="1">\n'
1989 comments += '<tr><th>End Time</th><th>UTC</th><th>ifos</th><th>Total Mass</th><th>%s</th><th>Measured FAR</th>%s<th>Probability given background</tr>\n' % (stat_label.replace(r'$', ''), opts.extrapolate is not None and '<th>Estimated FAR</th>' or '')
1990 for le_id in loudest_events:
1991 le = data.data_index[le_id]
1992 if opts.add_background_file and le.data.stat >= added_bkg[0][0]:
1993 T_far = T_add_bkg
1994 else:
1995 T_far = T_bkg
1996 far = (len(bkg_stats)-bisect.bisect_left(bkg_stats, le.data.stat))/T_far
1997 if far == 0:
1998 far = '< 1/%.2f %s<sup>-1</sup>' % ( T_far, opts.time_units )
1999 else:
2000 far = '1/%.2e %s<sup>-1</sup>' % (1./far, opts.time_units)
2001 comments += '<tr><td>%.2f</td><td>%s</td><td>%s</td><td>%.2f</td><td>%.2f</td><td>%s</td>' %( le.data.get_value('end_time'), printutils.format_end_time_in_utc(int(le.data.get_value('end_time'))), le.data.get_value('ifos'), le.data.get_value('mtotal'), le.data.get_value('stat'), far )
2002 if opts.extrapolate is not None:
2003 if opts.extrapolate.upper() == "POWER":
2004 far_est = numpy.exp( Beta_hat[0,0] + Beta_hat[1,0]*numpy.log(le.data.stat) )
2005 elif opts.extrapolate.upper() == "GAUSSIAN":
2006 far_est = numpy.exp( Beta_hat[0,0] + Beta_hat[1,0]*le.data.stat**(2./opts.x_power) )
2007 else:
2008 far_est = amp * special.erfc( numpy.power(le.data.stat, 1./opts.x_power)/numpy.sqrt(2*sigmasq) ) / (2. * T_fg)
2009 comments += '<td>1/%.2e %s<sup>-1</sup></td>' % ( 1./far_est, opts.time_units )
2010 x = le.data.stat
2011 if x <= bkg_stats.max():
2012 lmbda = bkg_cumnum[pylab.find( bkg_stats >= x )[0]]
2013 comments += "<td>%.2e</td>" % numpy.power(10, poissonPDFlog10( 1, lmbda ))
2014 else:
2015 lmbda = bkg_cumnum[-1]
2016 comments += "<td>< %.2e</td>" % numpy.power(10, poissonPDFlog10( 1, lmbda ))
2017 comments += '</tr>'
2018 comments += '</table>\n'
2019 if opts.extrapolate is not None:
2020 if opts.extrapolate.upper() == 'POWER':
2021 comments += "<br />Fitted <i>y = Ax<sup>B</sup></i><br />Results:<br />"
2022 comments += "A = %.2e B = %.2f<br />" % (numpy.exp(Beta_hat[0,0]), Beta_hat[1,0])
2023 elif opts.extrapolate.upper() == 'GAUSSIAN':
2024 comments += "<br />Fitted <i>y = Aexp(-x<sup>2</sup>/2σ<sup>2</sup>)/sqrt(2πσ<sup>2</sup>)</i><br />Results:<br />"
2025 sigmasq = -1./(2.*Beta_hat[1,0])
2026 amp = numpy.exp(Beta_hat[0,0] + numpy.log(2*numpy.pi*sigmasq)/2.)
2027 comments += "σ<sup>2</sup> = %.2f A = %.2f<br />" %( sigmasq, amp )
2028 comments += "Extrapolated from:<br />β<sub>0</sub> = %.2f β<sub>1</sub> = %.2f<br />" %( Beta_hat[0,0], Beta_hat[1,0] )
2029 else:
2030 comments += "<br />Fitted <i>y = Aexp(-x<sup>2</sup>/2σ<sup>2</sup>)/sqrt(2πσ<sup>2</sup>)</i> to non-cumulative plot.<br />Results:<br />"
2031 comments += "σ<sup>2</sup> = %.2f A = %.2f<br />" %( sigmasq, amp )
2032 comments += "<br />Goodness of fit:<br />"
2033 comments += "χ<sup>2</sup> = %.2f Reduced χ<sup>2</sup> = %.2f" %( noncum_chisq, noncum_chisq / noncum_Ndof )
2034
2035 comments += '<hr />\n'
2036
2037 if opts.verbose:
2038 print >> sys.stdout, "Writing html file and cache."
2039
2040
2041 comment = comments
2042 plothtml = InspiralUtils.write_html_output( InspiralUtilsOpts, input_args, fnameList, tagList, comment = comments, add_box_flag = True )
2043 InspiralUtils.write_cache_output( InspiralUtilsOpts, plothtml, fnameList )
2044
2045
2046 if __name__ == "__main__":
2047 main()
2048