pylal.lalapps_cbc

1 #!/usr/bin/env python 2 3 # 4 # ============================================================================= 5 # 6 # Preamble 7 # 8 # ============================================================================= 9 # 10 11 __prog__ = 'lalapps_cbc_plotrates' 12 description = 'Creates plots of histogram counts and cumulative rates vs. a given statistic. Statistics can be either single or coincident, and can be queried or computed on the fly from either a sngl table or a coinc table. NOTE: This is a modified version of that in lalsuite. This also plots extended background triggers, which must be stored in text files. It also writes out text files of the data that was plotted in the cumulative histogram.' 13 usage = '%s [options] file1.sqlite file2.sqlite ...' 14 15 from optparse import OptionParser 16 import sqlite3 17 import sys 18 import os 19 import re 20 import math 21 import numpy 22 from scipy import special 23 import bisect 24 import matplotlib 25 matplotlib.use('Agg') 26 import pylab 27 pylab.rc('font',**{'family':'serif','serif':['Computer Modern Roman']}) 28 pylab.rc('text', usetex=True) 29 30 from glue import lal 31 from glue import segments 32 from glue.ligolw import dbtables 33 from glue.ligolw import lsctables 34 from glue import git_version 35 36 from pylal import ligolw_sqlutils as sqlutils 37 from pylal import CoincInspiralUtils 38 from pylal import InspiralUtils 39 from pylal import printutils 40 from pylal.ligolw_dataUtils import get_row_stat 41 42 # ============================================================================= 43 # 44 # Set Options 45 # 46 # ============================================================================= 47 48

49 -def parse_command_line(argv = None):

50 """ 51 Parse the command line, return options and check for consistency among the 52 options. 53 """ 54 parser = OptionParser( version = git_version.verbose_msg, usage = usage, description = description ) 55 56 # following are related to file input and output naming 57 parser.add_option( "-o", "--output-path", action = "store", type = "string", default = './', 58 help = 59 "Output path to save figures to." 60 ) 61 parser.add_option( "-u", "--user-tag", action = "store", type = "string", default = '', 62 help = 63 "User-tag for plot file names" 64 ) 65 parser.add_option( "-t", "--tmp-space", action = "store", type = "string", default = None, 66 metavar = "PATH", 67 help = 68 "Location of local disk on which to do work. " + 69 "This is used to enhance performance in a networked " + 70 "environment." 71 ) 72 parser.add_option( "-c", "--cache-file", action = "append", default = [], 73 help = 74 "Read files from the given cache file. File must be lal formated. To specify multiple, give the argument multiple times." 75 ) 76 parser.add_option( "-v", "--verbose", action = "store_true", default = False, 77 help = 78 "Print progress information" 79 ) 80 parser.add_option( "", "--debug", action = "store_true", default = False, 81 help = 82 "Print SQLite queries used to obtain data." 83 ) 84 # following are specific to this program 85 parser.add_option("-s", "--statistic", action = "store", type="string", 86 default=None, metavar="STATTYPE(:label)", 87 help = 88 "Stat to plot. Can be any column or any function of columns in the given table. Syntax for functions is python; functions available are anything from python's math module. To specify a label for the stat when plotting, append a colon and the label." 89 ) 90 parser.add_option("-d", "--data-table", action = "store", type = "string", default = None, 91 help = 92 "Table to get triggers from. Can be any table with either a coinc_event_id or an event_id. If the table has a coinc_event_id, it will be assumed to be a coincident table and so must have an 'ifos' column. If the table has an event_id, it will be assumed to be a single event table, and so must have an 'ifo' column. Additionally, if a combining-method or function is specified, single events will be groupped together via their event_ids in the coinc_event_map table. Note: Plotting using single event tables can cause the program to take siginificantly longer to execute than coincident event tables. If a coincident statistic has already been computed and exists in the coinc_event table, always use that instead." 93 ) 94 parser.add_option("-C", "--combining-method", action = "store", type="string", metavar = 'sum | quad_sum | mean | median | min | max', default=None, 95 help = 96 "How to combine the given statistic for each ifo. This is only needed if trying to plot coincident statistics from a single event table. Supported methods are sum, quad_sum (the square root of the sum of the squares), mean, median, min, or max values." 97 ) 98 parser.add_option("-F", "--combining-function", action = "store", type="string", default=None, 99 help = 100 "A more explicit alternative to combining-method, this allows you to specify exactly how to combine values from each ifo. The expression should be a function of the ifos to be combined; if an ifo isn't present in a given coinc, it will be given a 0 value. For instance, if --statistic is set to snr, and the combining-method is set to sqrt(H1**2+L1**2+(.5*V1)**2), then the sum of the squares of H1, L1, and V1 will be plotted, with a weight of .5 given to V1. If doubles are present, they will also be plotted. Only ifos that are specified will be included in coincs. In other words, if only H1 and L1 are listed in the equation only H1 and L1 will be included, even if H2 was listed as being coincident with them in the database. If combining-method is also specified, combining-function takes precedence." 101 ) 102 # parser.add_option("-S", "--reference-snr", action = "store", type = "float", 103 # default = 8., 104 # help = 105 # "Reference snr to plot. Default is 8.0." 106 # ) 107 parser.add_option("-f", "--foreground-datatype", action = "store", type = "string", default = "slide", 108 help = 109 "Plot the given foreground type. Options are all_data, exclude_play, playground, or simulation. Default is to plot no foreground. Background expected rates are determined by the livetime of the specified datatype. If not foreground datatype specified, expected rates are based on the average background time." 110 ) 111 parser.add_option( "-p", "--param-name", metavar = "PARAMETER[:label]", 112 action = "store", default = None, 113 help = 114 "Specifying this and param-ranges will only select triggers that fall within the given range for each plot. " + 115 "This will apply to all plots. Any column(s) in the simulation or recovery table may be used. " + 116 "To specify a parameter in the simulation table, prefix with 'injected_'; " + 117 "for recovery table, 'recovered_'. As with variables, math operations are permitted between columns, e.g.," + 118 "injected_mass1+injected_mass2. Any function in the python math module may be used; syntax is python. The parameter name " + 119 "will be in the title of each plot; to give a label for the name, add a colon and the label. If no label is specified, the " + 120 "the parameter name as given will be used. " + 121 "WARNING: if you use any recovered parameters, missed injections will not be included in plots " + 122 "as recovered parameters are undefined for them. For example, if you chose to bin by recovered_mchirp " + 123 "missed injections will not be plotted on any plot. If you chose to bin by injected_mchirp, however, missed injections " + 124 "will be plotted. " 125 ) 126 parser.add_option( "-q", "--param-ranges", action = "store", default = None, 127 metavar = " [ LOW1, HIGH1 ); ( LOW2, HIGH2]; !VAL3; etc.", 128 help = 129 "Requires --param-name. Specify the parameter ranges " + 130 "to select triggers in. A '(' or ')' implies an open " + 131 "boundary, a '[' or ']' a closed boundary. To specify " + 132 "multiple ranges, separate each range by a ';'. To " + 133 "specify a single value, just type that value with no " + 134 "parentheses or brackets. To specify not equal to a single " + 135 "value, put a '!' before the value. If " + 136 "multiple ranges are specified, a separate plot for each range will be generated." 137 ) 138 parser.add_option("", "--param-combining-method", default = "mean", 139 help = 140 "For sngl-ifo tables, how to combine the --param-name. Either a combining-function or method may be used. Default is mean." 141 ) 142 parser.add_option( "", "--exclude-coincs", action = "store", type = "string", default = None, 143 metavar = " [COINC_INSTRUMENTS1 + COINC_INSTRUMENTS2 in INSTRUMENTS_ON1];" 144 "[ALL in INSTRUMENTS_ON2]; etc.", 145 help = 146 "Exclude coincident types in specified detector times, " + 147 "e.g., '[H2,L1 in H1,H2,L1]'. Some rules: " + 148 "* Coinc-types and detector time must be separated by " + 149 "an ' in '. When specifying a coinc_type or detector " + 150 "time, detectors and/or ifos must be separated by " + 151 "commas, e.g. 'H1,L1' not 'H1L1'. " + 152 "* To specify multiple coinc-types in one type of time, " + 153 "separate each coinc-type by a '+', e.g., " + 154 "'[H1,H2 + H2,L1 in H1,H2,L1]'. " + 155 "* To exclude all coincs in a specified detector time " + 156 "or specific coinc-type in all times, use 'ALL'. E.g., " + 157 "to exclude all H1,H2 triggers, use '[H1,H2 in ALL]' " + 158 "or to exclude all H2,L1 time use '[ALL in H2,L1]'. " + 159 "* To specify multiple exclusions, separate each " + 160 "bracket by a ';'. " + 161 "* Order of the instruments nor case of the letters " + 162 "matter. So if your pinky is broken and you're " + 163 "dyslexic you can type '[h2,h1 in all]' without a " + 164 "problem." 165 ) 166 parser.add_option( "", "--include-only-coincs", action = "store", type = "string", default = None, 167 metavar = " [COINC_INSTRUMENTS1 + COINC_INSTRUMENTS2 in INSTRUMENTS_ON1];" + 168 "[ALL in INSTRUMENTS_ON2]; etc.", 169 help = 170 "Opposite of --exclude-coincs: only plot the specified coinc types. WARNING: If you type 'ALL' for on-instruments, e.g., '[H1,L1 in ALL]', and plot-by-instrument-time is not on, livetime from *all* of the instrument times will be included in the background, even if it is from an instrument time that could not produce H1,L1 triggers. If this isn't desired, either be explicit about what instrument times you really want -- i.e., don't use 'ALL' -- or use --exclude-coincs to exclude the instrument times you don't want. In the example, adding --exclude-coincs [ALL in L1,V1];[ALL in H1,V1] will prevent any H1,V1 or L1,V1 time from being added." 171 ) 172 parser.add_option( "", "--plot-special-time", action = "append", type = "float", default = [], 173 help = 174 "Exclude all single triggers that are within N seconds of the given end-time from the background, where N is set by --plot-special-window. All foreground triggers that fall within the range will be plotted as stars. Note: this can only be done when reading from sngl-ifo tables." 175 ) 176 parser.add_option( "", "--plot-special-window", action = "store", type = "float", default = 8.0, 177 help = 178 "Specify the time window, in seconds, to use around the plot-special-time. Default is 8.0 seconds." 179 ) 180 parser.add_option( "", "--add-background-file", action = "append", default = [], metavar = " FILE,BACKGROUND_LIVETIME(:LABEL)", 181 help = 182 "Add a point to the background. Any point added will be plotted, but will not be added to the background distribution obtained from input files for cumulative rate computation nor for extrapolation. If the point added is louder than the loudest background trigger, this point will be used for extending the probability contours (if plotted). Points should be specified by first providing the x-value of the point, then the total background livetime used to acquire that point, in the same units as --time-units. To specify multiple points, give the argument multiple times." 183 ) 184 parser.add_option( "", "--add-special-background-file", action = "append", default = [], metavar = " FILE,BACKGROUND_LIVETIME(:LABEL)", 185 help = 186 "Add a point to the background. Any point added will be plotted, but will not be added to the background distribution obtained from input files for cumulative rate computation nor for extrapolation. If the point added is louder than the loudest background trigger, this point will be used for extending the probability contours (if plotted). Points should be specified by first providing the x-value of the point, then the total background livetime used to acquire that point, in the same units as --time-units. To specify multiple points, give the argument multiple times." 187 ) 188 parser.add_option( "", "--single-table", action = "store", default = None, 189 help = 190 "If using plot-special-time with a coinc table, a --single-table must be specified to look in for the un-slid single event end-times." 191 ) 192 parser.add_option( "-I", "--plot-by-ifos", action = "store_true", default = False, 193 help = 194 "Create a separate plot for each coincidence type (if plotting a combined statistic) or single ifo (if plotting single ifo statistics). Default is to plot all ifo(s) together." 195 ) 196 parser.add_option( "-T", "--plot-by-instrument-time", action = "store_true", default = False, 197 help = 198 "Create a separate plot for each instrument time. Default is to plot all instrument times together." 199 ) 200 parser.add_option( "-M", "--plot-by-multiplicity", action = "store_true", default = False, 201 help = 202 "Create a separate plot based on the number of coincident instruments. (For single ifo plotting, this is a no-op.) In other words, doubles will be plotted with doubles, triples with triples, etc. Default is to plot all coincident ifos together. If this and plot-by-ifos specified, plot-by-ifos takes precedence." 203 ) 204 parser.add_option( "-U", "--time-units", type = "string", default = "yr", 205 help = 206 "What unit of time to plot the rates in. Can be either 'yr', 'days', 'hr', 'min', or 's'. Default is yr." 207 ) 208 parser.add_option( "-R", "--use-ifo-colors", action = "store_true", default = False, 209 help = 210 "Color foreground points by ifo color. Default is to plot no color (all triggers have a blue outline)." 211 ) 212 parser.add_option( "-H", "--plot-poisson-pdf", action = "store_true", default = False, 213 help = 214 "Plot the Poisson probability density of the background." 215 ) 216 parser.add_option( "-K", "--plot-significance-bands", action = "store_true", default = False, 217 help = 218 "Plot the Poisson probability bands in terms of the number of sigma. The values of the n sigma are the points in the poisson PDF that equal n*erf(y)." 219 ) 220 parser.add_option( "-S", "--max-sigma", type = 'int', default = 5, 221 help = 222 "Maximum number of significance bands to plot. Default=5" 223 ) 224 parser.add_option( "-E", "--extrapolate", default = None, metavar = "Gaussian|Power|erfc", 225 help = 226 "Extrapolate the background using a the given model out to the largest foreground statistic. If this is beyond the measured background, the probability density beyond the loudest background point will be calculated using this extrapolation. Options for models are 'Gaussian', 'Power', or 'erfc'. If 'Gaussian' or 'Power' a least-squares fit is done to both the non-cumulative and the cumulative histograms: for 'Gaussian' y = A*exp(-B*x**(2./x-power)) is fit; if 'Power', y = A*x**B is fit. If 'erfc', a Gaussian is fitted to the non-cumulative histogram to get sigmasq and the scalling factor A. These values are then used in the complitmentary error function y = A*\int{dx*exp(-x^2/2*sigmasq)} to extrapolate the cumulative distribution." 227 ) 228 parser.add_option( "-P", "--x-power", type = "float", default = 1., 229 help = 230 "Power of the combined statistic to use when fitting a gaussian to the data and binning for the non-cumulative plot. The natural log of the Gaussian is taken so that the actual fitted equation is log(y) = Beta[1] + Beta[2]*x**(2/x-power). For example, if plotting linearly in your statistic, x-power should be 1. If plotting against the statistic squared, then x-power would be 2, thus the fit would be log(y) = Beta[1] + Beta[2]*x. Default=1." 231 ) 232 parser.add_option( "", "--min-x-fit", type = 'float', default = None, 233 help = 234 "Background stat at which to begin fitting to for extrapolation. Default is the first background point." 235 ) 236 parser.add_option( "", "--lin-x", action = "store_true", default = False, 237 help = 238 "Plot x-axis on a linear scale. Default is to plot on a log scale." 239 ) 240 parser.add_option( "", "--lin-y", action = "store_true", default = False, 241 help = 242 "Plot y-axis on a linear scale. Default is to plot on a log scale." 243 ) 244 parser.add_option('-a', '--xmin', action = 'store', type = 'float', default = None, 245 help = 246 'Set a minimum value for the x-axis. If lin-x not set, must be > 0. This will apply to all plots generated.' 247 ) 248 parser.add_option('-b', '--xmax', action = 'store', type = 'float', default = None, 249 help = 250 'Set a maximum value for the x-axis. If lin-x not set, must be > 0. This will apply to all plots generated.' 251 ) 252 parser.add_option('-A', '--ymin', action = 'store', type = 'float', default = None, 253 help = 254 'Set a minimum value for the y-axis. If lin-y not set, must be > 0. This will apply to all plots generated.' 255 ) 256 parser.add_option('-B', '--ymax', action = 'store', type = 'float', default = None, 257 help = 258 'Set a maximum value for the y-axis. If lin-y not set, must be > 0. This will apply to all plots generated.' 259 ) 260 parser.add_option( '', '--nbins', type = 'int', default = 100, 261 help = 262 'The number of bins to use for the non-cumulative histogram.' 263 ) 264 parser.add_option( '', '--title', action = 'store_true', default = False, 265 help = 266 'Add a title to plots.' 267 ) 268 parser.add_option( '', '--dpi', action = 'store', type = 'float', default = 150., 269 help = 270 'Set dpi for plots; default is 150.' 271 ) 272 273 if argv is None: 274 (options, args) = parser.parse_args() 275 else: 276 (options, args) = parser.parse_args(argv) 277 278 if options.extrapolate is not None and options.extrapolate.upper() == "GAUSSIAN" and options.x_power is None: 279 raise ValueError, "If extrapolating a Gaussian, must specify a fit-power." 280 281 return options, args, sys.argv[1:]

282 283 284 # ============================================================================= 285 # 286 # Function Definitions 287 # 288 # ============================================================================= 289

290 -def fix_zarray(zarray,sigmas):

291 for n,s in enumerate(sorted(sigmas)): 292 for ii in range(zarray.shape[0]): 293 if zarray[ii,n] >= s: 294 zarray[ii,n] = s

295 #for jj in range(zarray.shape[1]): 296 # if zarray[n,jj] >= s: 297 # zarray[n,jj] = s 298 #for ii in range(zarray.shape[0]): 299 # if zarray[ii,-(n+1)] >= s: 300 # zarray[ii,-(n+1)] = s 301 302

303 -def createDataHolder( tableName, columns = None ):

304 """ 305 Creates a DataHolder object. If tableName is the same as a table in lsctables, the DataHolder class will be an instance of that table's RowType. 306 """ 307 if tableName in lsctables.TableByName: 308 base = lsctables.TableByName[ tableName ].RowType 309 else: 310 base = object 311 # define the class 312 class DataHolder( base ): 313 def __init__(self, columns = None): 314 # override the __slots__ class (if it exists) with columns, if they are specified 315 if columns is not None: 316 self.__slots__ = columns

317 318 def store( self, data ): 319 """ 320 Takes a list of data and assigns the values to the object's variables. 321 322 @data: a list of tuples in which the first element is the column name and the second is the value to assign. 323 """ 324 for col, val in data: 325 setattr( self, col, val ) 326 327 def get_value(self, arg): 328 """ 329 Returns the result of some operation on the elements in self. 330 'arg' can be the name of any defined function in self's base class, 331 a slot in self, or a function of either or both. 332 See ligolw.dataUtils.get_row_stat for more info. 333 """ 334 return get_row_stat( self, arg ) 335 336 return DataHolder 337

338 -def combineRowStats( function, rows ):

339 """ 340 Performs the desired function on the list of single statistics. Note: this can only combine one statistic from each row. 341 @function: can be either a known pre-set (see below) or an arbitrary function. If an arbitrary function, it must be in terms of the ifo names. 342 @rows: a dictionary of statistics keyed by the ifos 343 """ 344 # check if the function is a known pre-sets 345 if function == 'sum': 346 return sum(rows.values()) 347 if function == 'quad_sum': 348 return math.sqrt(sum([x**2. for x in rows.values()])) 349 if function == 'min': 350 return min(rows.values()) 351 if function == 'max': 352 return max(rows.values()) 353 if function == 'mean': 354 return numpy.mean(numpy.array(rows.values())) 355 if function == 'median': 356 return numpy.median(numpy.array(rows.values())) 357 if function == 'alpha_min': 358 return rows[min(rows.keys())] 359 if function == 'sorted_keys': 360 return ','.join(sorted(rows.keys())) 361 if function == 'sorted_values': 362 return ';'.join(sorted(map( str, rows.values() ))) 363 if function == 'echo': 364 return rows 365 366 # otherwise, evaluate the function explicitly 367 safe_dict = dict([ [name,val] for name,val in rows.items() + math.__dict__.items() if not name.startswith('__') ]) 368 369 try: 370 return eval( function, {"__builtins__":None}, safe_dict ) 371 except NameError: 372 # this can happen if an ifo that's specified in the combining function is not in the coincident ifos; in this case, just return None 373 return None

374

375 -class OffsetVector(dict):

376 weak_equality = False

377 - def __init__(self, offset_dict):

378 for ifo in offset_dict: 379 self[ifo] = offset_dict[ifo]

380

381 - def __eq__(self, other):

382 """ 383 The default equality test is to consider two vectors to be equal only if all ifos are the same and all offsets are the same. If one vector is a subset of the other vector, they will not be considered equal. However, if the class attribute weak_equality is set to True, only offsets of the ifos that are both in self and other will be checked. For example: 384 >>> a = OffsetVector({'H1': 0, 'L1': 5}) 385 >>> b = OffsetVector({'H1': 0, 'L1': 5, 'V1': 10}) 386 >>> a == b 387 False 388 >>> OffsetVector.weak_equality = True 389 >>> a == b 390 True 391 """ 392 if type(other) != type(self): 393 return False 394 if OffsetVector.weak_equality: 395 return all( self[ifo] == other[ifo] for ifo in set(self.keys()) & set(other.keys()) ) 396 else: 397 return self.__hash__() == other.__hash__()

398

399 - def __ne__(self, other):

400 return not self == other

401

402 - def __hash__(self):

403 if OffsetVector.weak_equality: 404 return 1 405 else: 406 return hash(tuple(sorted(self.items())))

407 408

409 -class Category:

410 """ 411 Class to store category information. 412 """ 413 default_match_criteria = ['offset_vector', 'datatype', 'veto_cat', 'on_instruments', 'ifos', 'param_group'] 414

415 - def __init__(self, offset_vector = {}, datatype = None, veto_cat = None, on_instruments = frozenset([u'ALL']), ifos = frozenset([u'ALL']), param_group = None):

416 self.offset_vector = OffsetVector(offset_vector) 417 self.datatype = unicode(datatype) 418 self.veto_cat = unicode(veto_cat) 419 self.on_instruments = frozenset(on_instruments) 420 self.ifos = frozenset(ifos) 421 self.param_group = param_group 422 self.livetime = 0

423

424 - def add_livetime(self, time):

425 self.livetime += time

426

427 - def get_livetime(self, time_units = 'yr'):

428 return sqlutils.convert_duration( self.livetime, time_units )

429

430 - def selective_eq(self, other, check_me):

431 """ 432 Only checks the values listed in check_me to figure out whether or not self is equal to other. 433 """ 434 if type(other) != type(self): 435 return False 436 return all(getattr(self,x) == getattr(other,x) for x in check_me)

437

438 - def __eq__(self, other):

439 """ 440 For default equality check, uses class attribute default_match_criteria to check what parameters should be considered. 441 """ 442 b = type(self) == type(other) and self.__hash__() == other.__hash__() 443 if b and OffsetVector.weak_equality and 'offset_vector' in Category.default_match_criteria: 444 b = self.offset_vector == other.offset_vector 445 return b

446

447 - def __ne__(self, other):

448 return not self == other

449

450 - def __hash__(self):

451 return hash(tuple(getattr(self,x) for x in Category.default_match_criteria))

452 453

454 -class Data( dict ):

455 """ 456 Class to store statistics and livetime for plotting. 457 """

458 - class DataElement:

459 """ 460 Sub-class to store individual data elements. 461 462 @categories: a list of instances of the Category class defining which categories this data element belongs to 463 @data: an instance of the DataHolder class listing statistics and methods associated with this element 464 """

465 - def __init__(self, thisid, categories, data):

466 self._id = thisid 467 self.categories = categories 468 self.data = data 469 self.cumrates = {}

470

471 - def update(self, _id = None, categories = [], data = None, addToExistingCat = True):

472 # update id 473 if _id is not None: 474 self._id = _id 475 # update data 476 if data is not None: 477 self.data = data 478 # update categories 479 if addToExistingCat: 480 self.categories.extend(categories) 481 else: 482 self.categories = categories

483

484 - def __init__(self):

485 """ 486 A list of all the data elements is kept as an index. 487 """ 488 self.data_index = {}

489

490 - def add_data(self, _id, categories, data):

491 """ 492 Adds a new DataElement to self. 493 494 @id: some unique value to identify the data element 495 @categories: a list of categories that this data falls in. If one or more of these categories are equal (equality determined by the default Category match_criteria) to a category already in all_categories, the category is set to that category. This results in distinct categories only being saved once in memory, with all DataElements that share that category pointing to the same memory address. 496 """ 497 d = self.DataElement( _id, categories, data ) 498 self.data_index[_id] = d 499 for c in categories: 500 self.setdefault(c, []) 501 self[c].append( d )

502

503 - def update(self, _id, categories = [], data = None, addToExistingCat = True, errOnMissing = True):

504 """ 505 Updates all DataElements in self that have the given id. If no DataElement is found with the given id and errOnMissing is False, adds a new entry. 506 """ 507 if _id not in self.data_index: 508 if errOnMissing: 509 raise ValueError, "An element with id %s could not be found." % str(_id) 510 else: 511 self.add_data( _id, categories, data ) 512 else: 513 self.data_index[_id].update( categories = categories, data = data, addToExistingCat = addToExistingCat) 514 self.refresh_categories( [self.data_index[_id]] )

515

516 - def refresh_categories( self, data_list ):

517 """ 518 Updates self with categories that are in data_list that are not in self. 519 """ 520 for d in data_list: 521 self.data_index[d._id] = d 522 for c in d.categories: 523 self.setdefault(c, []) 524 if d not in self[c]: 525 self[c].append(d)

526

527 - def add_livetime(self, livetime, category, match_criteria = []):

528 """ 529 Adds livetime to all categories in self that match the given criteria. 530 """ 531 if match_criteria == []: 532 match_criteria = Category.default_match_criteria 533 for cat in [cat for cat in self if cat.selective_eq(category, match_criteria)]: 534 cat.livetime += livetime

535

536 - def get_livetime(self, category, match_criteria = [], time_units = 'yr'):

537 """ 538 Returns the sum of all the livetimes of categories that match the given category via the given match_criteria. 539 """ 540 if match_criteria == []: 541 match_criteria = Category.default_match_criteria 542 return sqlutils.convert_duration(sum([cat.livetime for cat in self if cat.selective_eq(category, match_criteria)]), time_units)

543

544 - def create_background(self, match_criteria = []):

545 """ 546 Creates background categories out of the slide categories and adds this to all slide elements' categories lists. Default action is to create a background for each veto-category, on_instruments, ifos, and param_group. However, this can be overridden with the match_criteria argument. 547 """ 548 if match_criteria == []: 549 match_criteria = ['veto_cat', 'on_instruments', 'ifos', 'param_group'] 550 for vals in set([ tuple(getattr(c, x) for x in match_criteria) for c in self if c.datatype == 'slide' ]): 551 # create the background category 552 bkg_cat = Category( offset_vector = {}, datatype = 'background' ) 553 [setattr(bkg_cat, x, y) for x, y in zip(match_criteria, vals)] 554 bkg_cat.livetime = sum([c.livetime for c in self if c.datatype == 'slide' and bkg_cat.selective_eq(c, match_criteria) ]) 555 # add this background category to each matching slide's categories 556 self[bkg_cat] = list(set([x for c in self if c.datatype == 'slide' and bkg_cat.selective_eq(c, match_criteria) for x in self[c]]))

557

558 - def compute_cumrates(self, stat, foreground_datatype, rank_by = 'max', group_by = [], num_slides = 100.):

559 """ 560 Computes the cumulative rates for all the distinct groups that exist in self. Distinct groups are determined by group_by. 561 """ 562 if group_by == []: 563 group_by = ['datatype', 'veto_cat', 'on_instruments', 'ifos', 'param_group'] 564 distinct_groups = set([ tuple(getattr(c,x) for x in group_by) for c in self]) 565 for group in distinct_groups: 566 this_group = Category() 567 [setattr(this_group, x, y) for (x,y) in zip( group_by, group )] 568 this_group.livetime = self.get_livetime( this_group, group_by, time_units = 's' ) 569 # get the list of all stats that fall in this category 570 this_data = [] 571 for c in self: 572 if c.selective_eq(this_group, group_by): 573 this_data.extend( self[c] ) 574 this_data = sorted(set(this_data), key = lambda x: getattr(x.data, stat), reverse = rank_by == 'min') 575 d = [getattr(x.data, stat) for x in this_data] 576 # we need to figure out the number of trials that were done in this category; we do this by taking the ratio 577 # of the this category's livetime to the foreground datatype associated with this category's livetime 578 # temporarily set this_group's datatype to foreground_datatype in order to get the right livetime 579 orig_dt = this_group.datatype 580 this_group.datatype = foreground_datatype 581 fg_livetime = self.get_livetime( this_group, match_criteria = 'datatype' not in group_by and group_by+['datatype'] or group_by, time_units = 's' ) 582 nTrials = float(this_group.livetime) / fg_livetime 583 # set datatype back to correct 584 this_group.datatype = orig_dt 585 # compute the cum-rates 586 these_cumrates = [ (len(d) - bisect.bisect_left(d, x))/nTrials for x in d ] 587 # assign to data 588 for data_elem, rate in zip( this_data, these_cumrates ): 589 data_elem.cumrates[this_group] = rate

590

591 - def get_cumrates(self, group, stat, rank_by ='max'):

592 """ 593 Returns a sorted list (by stat) of stats, cumrates, and ids for the given group. 594 """ 595 return sorted([(getattr(d.data, stat), d.cumrates[group], d._id) for d in self.data_index.values() if group in d.cumrates], reverse = rank_by == 'min')

596

597 - def get_data(self, _id = None, category = None, category_match_criteria = []):

598 """ 599 Returns a list of DataElements that matches a given id, a given category, or both. If category_match_criteria is specified, will get data that matches the specified elements in category. Otherwise, will use Category.default_match_criteria for comparing category to the stored categories. 600 """ 601 if category_match_criteria == []: 602 category_match_criteria = Category.default_match_criteria 603 return set([x for c in self for x in self[c] if (category is None or c.selective_eq(category, category_match_criteria)) and (_id is None or _id == x._id)])

604

605 - def get_categories(self, category, match_criteria = []):

606 """ 607 Returns a list of categories in self that match the given category via the match_criteria. 608 """ 609 if match_criteria == []: 610 match_criteria = Category.default_match_criteria 611 return [x for x in self if x.selective_eq(category, match_criteria)]

612

613 - def collapse(self, args):

614 """ 615 Cycles over the DataElements in self, keeping only the given args. 616 617 @args: A list of tuples. In each tuple, the first element is the name to give the new collapsed value and the second element is the argument to carry out (either a name or a function) on the uncollapsed row to get the collapsed value. 618 """ 619 cols = [arg[0] for arg in args] 620 fns = [arg[1] for arg in args] 621 collapsedRow = createDataHolder( 'collapsedRow' ) 622 for n,origElement in enumerate(self.data_index.values()): 623 d = collapsedRow( cols ) 624 d.store([(col, origElement.data.get_value(fn)) for col, fn in zip(cols, fns)]) 625 origElement.data = d

626

627 -def combineData(dataObj, match_column, args, param_grouping_function, verbose = False):

628 """ 629 Cycles over the DataElements in dataObj, combining any DataElements with the same match_column value via the given args and returns a new Data object in which the element's ids are the values of the match_column. Note: the categories of the DataElements in the new Data object are just the concatenation of the older objects individual categories. These might need to be updated depending on the paramters of the newer category. 630 631 @dataObj: the instace of Data to carry the combination on 632 @match_column: name of column in the DataElements to use to match rows to combine; e.g., 'coinc_event_id' 633 @args: a list of tuples. In each tuple the first element is the name to give the new combined value, the second element is the column in each row to identify that row by, the third is the column or function of columns in each row to combine, and the final element is the way to combine them, which can be either a predefined method or a function in terms of values of the first element. For example, if you wanted the average chirp mass and the sum of the squares of new_snr over H1 and L1, the args should look like: 634 args = [ (combined_newsnr_sq, ifo, get_new_snr, H1**2.+L1**2.), (combined_mchirp, ifo, mchirp, mean) ] 635 """ 636 cols = [arg[0] for arg in args] 637 colkeys = [arg[1] for arg in args] 638 sngl_stats = [arg[2] for arg in args] 639 cmb_fncs = [arg[3] for arg in args] 640 newData = Data() 641 combinedRow = createDataHolder( 'combinedRow' ) 642 # get the unique match values 643 match_vals = {} 644 for d in dataObj.data_index.values(): 645 this_id = d.data.get_value(match_column) 646 match_vals.setdefault(this_id, []) 647 match_vals[this_id].append(d) 648 ii = 0 649 for idcol, combine_data in match_vals.items(): 650 ii += 1 651 if verbose: 652 if ii != len(match_vals): 653 print "%i/%i (%.2f%%)\r" % (ii, len(match_vals), 100*float(ii)/len(match_vals)), 654 else: 655 print '' 656 newRow = combinedRow( cols ) 657 stats = [ dict([ [x.data.get_value(colkey), x.data.get_value(snglstat)] for x in combine_data ]) for colkey, snglstat in zip(colkeys, sngl_stats) ] 658 newRow.store( [( col, combineRowStats( fnc, stat_dict )) for col, fnc, stat_dict in zip(cols, cmb_fncs, stats)] ) 659 orig_cats = [y for x in combine_data for y in x.categories] 660 ifos_param = 'ifos' in dir(newRow) and 'ifos' or 'ifo' 661 new_cats = [Category( c.offset_vector, c.datatype, c.veto_cat, c.on_instruments, getattr(newRow, ifos_param), param_grouping_function(newRow.param) ) for c in orig_cats] 662 newData.add_data( id(newRow), new_cats, newRow ) 663 664 return newData

665

666 -def join_experiment_tables_to_sngl_table( table ):

667 return """ 668 JOIN 669 experiment, experiment_summary, experiment_map, coinc_event_map 670 ON ( 671 experiment.experiment_id == experiment_summary.experiment_id 672 AND experiment_summary.experiment_summ_id == experiment_map.experiment_summ_id 673 AND experiment_map.coinc_event_id == coinc_event_map.coinc_event_id 674 AND coinc_event_map.event_id == %s.event_id )""" % table

675

676 -def coinc_in_filter( on_instruments, ifos, filter, sngl_ifos = False ):

677 if on_instruments is None or on_instruments == 'ALL': 678 on_instruments = frozenset(['ALL']) 679 if not (isinstance(on_instruments,set) or isinstance(on_instruments, frozenset)): 680 on_instruments = lsctables.instrument_set_from_ifos(on_instruments) 681 # following means to only check the on_instruments 682 if ifos is None or ifos == 'ALL': 683 return frozenset(on_instruments) in filter or frozenset(['ALL']) in filter 684 685 if not (isinstance(ifos,set) or isinstance(ifos, frozenset)): 686 if sngl_ifos: 687 ifos = set([ifos]) 688 else: 689 ifos = lsctables.instrument_set_from_ifos(ifos) 690 # if given on_instruments is 'ALL', means to only check if ifos are in any filter 691 if on_instruments == frozenset(['ALL']): 692 if sngl_ifos: 693 return set(['ALL']) in filter.values() or any(ifos.issubset(x for x in filter.values())) 694 else: 695 return set(['ALL']) in filter.values() or ifos in filter.values() 696 # do full test 697 if frozenset(['ALL']) in filter: # on-instruments don't matter, just check ifos 698 if sngl_ifos: 699 return set(['ALL']) in filter[frozenset(['ALL'])] or ifos.issubset(filter[frozenset(['ALL'])]) 700 else: 701 return set(['ALL']) in filter[frozenset(['ALL'])] or ifos in filter[frozenset(['ALL'])] 702 elif frozenset(on_instruments) in filter: 703 if sngl_ifos: 704 return set(['ALL']) in filter[frozenset(on_instruments)] or ifos.issubset(filter[frozenset(on_instruments)]) 705 else: 706 return set(['ALL']) in filter[frozenset(on_instruments)] or ifos in filter[frozenset(on_instruments)] 707 # if get to here, fails all tests 708 else: 709 return False

710

711 -def poissonPDFlog10( k,lmbda ):

712 return k*numpy.log10( lmbda ) - (lmbda + special.gammaln(k+1.))/numpy.log(10.)

713

714 -def ColorFormatter(y, pos):

715 return "$10^{%.1f}$" % y

716 717 # 718 # Helper functions to compute stats on the fly when querying the database 719 # 720

721 -class checkInstruments:

722 - def __init__(self, filter_name, filter, is_single = False):

723 self.filter_name = filter_name 724 self.filter = filter 725 self.is_single = is_single

726 - def apply_test(self, on_instruments, ifos):

727 return coinc_in_filter( on_instruments, ifos, self.filter, sngl_ifos = self.is_single )

728 - def create_apply_test(self, sqlite_connection):

729 sqlite_connection.create_function(self.filter_name, 2, self.apply_test)

730 731

732 -def createCombineRowsMethod( tableName, columns, functionList ):

733 """ 734 Creates a CombineRows class that can be used in a sqlite database to combine rows on the fly. Takes in a sngl_function, which is the function used to combine columns within a single row, and a combining_function, which is the function used to combine the results of the sngl_functions across rows. 735 736 @tableName: the name of the table that will be reading from. If it is a table in lsctables.py, all methods and columns from that table will be inherited. 737 @columns: the list of columns that will be storing data to. This list must be in the same order that will be reading data in from the database with. 738 @functionList: a list of tuples. The first item should be the combining function to use, in terms of the ifos to combine, and the second item should be the sngl function to use, in terms of columns or methods of the sngl_row. 739 """ 740 741 sngl_row = createDataHolder(tableName, columns) 742 743 class CombineRows: 744 def __init__(self): 745 """ 746 Initializes variables needed for the step process. 747 """ 748 self.this_coinc = dict([ [x, {}] for x in functionList ])

749 750 def step(self, *args): 751 """ 752 Populates self.this_coinc 753 """ 754 this_row = sngl_row(columns) 755 this_row.store(zip(columns,args)) 756 for combine_func, sngl_function in functionList: 757 self.this_coinc[(combine_func, sngl_function)][this_row.ifo] = this_row.get_value(sngl_function) 758 759 def finalize(self): 760 """ 761 Once all the singles for the coinc have been gathered, applies the desired combining function(s) to them and returns the result. Results are returned as a comma seperated string. 762 """ 763 return ','.join([str(combineRowStats( cfunc, self.this_coinc[(cfunc, sfunc)] )) for cfunc, sfunc in functionList]) 764 765 return CombineRows 766 767

768 -class getRowStatsFromDatabase:

769 """ 770 Helper class to get row stat directly from the database. 771 """

772 - def __init__(self, tableName, columns, functionList):

773 """ 774 @tableName: see createCombineRowsMethod. 775 @columns: see createCombineRowsMethod. 776 @functionList: a list of functions to retrieve from the columns in tableName. 777 """ 778 self.dataRow = createDataHolder(tableName, columns) 779 self.functionList = functionList 780 self.columns = columns

781 - def get_dbrow_stat(self, *args ):

782 """ 783 Gets the stat. 784 785 @args: data must be in the same order as columns. 786 """ 787 this_row = self.dataRow(self.columns) 788 this_row.store(zip(self.columns,args)) 789 return ','.join([ str(this_row.get_value(func)) for func in self.functionList ])

790 791 792 # ============================================================================= 793 # 794 # Main 795 # 796 # ============================================================================= 797 798 # 799 # Generic Initilization 800 #

801 -def main(argv = None):

802 803 opts, filenames, input_args = parse_command_line(argv) 804 for this_cache in opts.cache_file: 805 this_cache = lal.Cache().fromfile(file(this_cache)) 806 filenames.extend([x.path() for x in this_cache]) 807 sqlite3.enable_callback_tracebacks(True) 808 809 statistic, stat_label = len(opts.statistic.split(':')) == 2 and opts.statistic.split(':') or [opts.statistic, opts.statistic.replace('_', ' ').title()] 810 data_table = sqlutils.validate_option(opts.data_table) 811 812 # get any added background points 813 add_background = [] 814 for addme in opts.add_background_file: 815 # compute combined_newsnr 816 f = open(addme, 'r') 817 bkg_time = f.readline() 818 stats = sorted([ float(line) for line in f ]) 819 f.close() 820 lbl = '' 821 if len(addme.split(':')) > 1: 822 lbl = addme.split(':')[1] 823 add_background.append((stats, float(bkg_time) / 3.15569e7, lbl)) 824 add_background_sp = [] 825 for addme in opts.add_special_background_file: 826 # compute combined_newsnr 827 f = open(addme, 'r') 828 bkg_time = f.readline() 829 stats = sorted([ float(line) for line in f ]) 830 f.close() 831 lbl = '' 832 if len(addme.split(':')) > 1: 833 lbl = addme.split(':')[1] 834 add_background_sp.append((stats, float(bkg_time)/ 3.15569e7, lbl)) 835 836 combining_function = None 837 if opts.combining_method is not None: 838 combining_function = opts.combining_method 839 if opts.combining_function is not None: 840 combining_function = opts.combining_function 841 842 # get foreground datatype 843 fg_datatype = sqlutils.validate_option( opts.foreground_datatype, lower = True ) 844 if fg_datatype not in lsctables.ExperimentSummaryTable.datatypes: 845 raise ValueError, "unrecognized foreground-datatype %s" % opts.foreground_datatype 846 # for the sqlqueries: 847 if fg_datatype != 'slide': 848 add_foreground = ' OR experiment_summary.datatype == "%s"' % fg_datatype 849 else: 850 add_foreground = '' 851 852 # Get param and param-ranges if specified 853 if opts.param_name: 854 param_name, param_label = len(opts.param_name.split(':')) == 2 and opts.param_name.split(':') or [opts.param_name, opts.param_name.replace('_', ' ').title()] 855 param_parser = sqlutils.parse_param_ranges( data_table, param_name, opts.param_ranges, verbose = opts.verbose ) 856 857 # Get plot-special ranges if desired 858 if opts.plot_special_time != []: 859 plot_special_times = segments.segmentlist([ segments.segment( t - opts.plot_special_window, t + opts.plot_special_window ) for t in opts.plot_special_time ]) 860 861 # Get coincs to include/exclude 862 #FIXME: parse_coinc_options should use frozenset(['ALL]), not 'ALL' 863 if opts.exclude_coincs: 864 exclude_filter = sqlutils.parse_coinc_options( opts.exclude_coincs ).coinc_types 865 if 'ALL' in exclude_filter: 866 exclude_filter[frozenset(['ALL'])] = exclude_filter['ALL'] 867 del exclude_filter['ALL'] 868 for on_inst, coinc_ifos in exclude_filter.items(): 869 exclude_filter[on_inst] = [ ifos == 'ALL' and set(['ALL']) or ifos for ifos in coinc_ifos ] 870 if opts.include_only_coincs: 871 include_filter = sqlutils.parse_coinc_options( opts.include_only_coincs ).coinc_types 872 if 'ALL' in include_filter: 873 include_filter[frozenset(['ALL'])] = include_filter['ALL'] 874 del include_filter['ALL'] 875 for on_inst, coinc_ifos in include_filter.items(): 876 include_filter[on_inst] = [ ifos == 'ALL' and set(['ALL']) or ifos for ifos in coinc_ifos ] 877 878 # since we don't care about offset vectors, remove it from Category's default_match_criteria 879 Category.default_match_criteria = [x for x in Category.default_match_criteria if x != 'offset_vector'] 880 881 # create things needed to store data 882 data = Data() 883 rowType = createDataHolder( 'data_holder', ['end_time', stat_label] ) 884 last_table_type = '' 885 gps_start_time = numpy.inf 886 gps_end_time = -numpy.inf 887 analyzed_instruments = set() 888 num_slides = None 889 890 if opts.verbose: 891 print >> sys.stdout, "Analyzing file:" 892 for filename in filenames: 893 if opts.verbose: 894 print >> sys.stdout, "\t%s" % filename 895 working_filename = dbtables.get_connection_filename( filename, tmp_path = opts.tmp_space, verbose = False ) 896 connection = sqlite3.connect( working_filename ) 897 if opts.tmp_space: 898 dbtables.set_temp_store_directory(connection, opts.tmp_space, verbose = False) 899 900 # figure out the data table type 901 data_cols = sqlutils.get_column_names_from_table(connection, data_table) 902 if 'coinc_event_id' in data_cols: 903 table_type = 'coinc' 904 if 'event_id' in data_cols: 905 table_type = 'sngl' 906 907 # sanity checks 908 if table_type == 'coinc' and 'ifos' not in data_cols or table_type == 'sngl' and 'ifo' not in data_cols: 909 raise ValueError, 'Could not find %s column in the data table (%s)' % (table_type == 'sngl' and 'ifo' or 'ifos', data_table) 910 if last_table_type != '' and last_table_type != table_type: 911 raise ValueError, '%s table in file %s does not match the types in the other files' %( data_table, filename ) 912 if table_type == 'coinc' and opts.plot_special_time != [] and opts.single_table is None: 913 raise ValueError, "Must specify a --single-table if using --plot-special-time with a coinc-ifos table." 914 if table_type == 'sngl' and combining_function is None: 915 raise ValueError, "Must provide a combining function or method if querying sngl-ifo tables." 916 917 # 918 # create/update the background categories in data 919 # 920 if opts.verbose: 921 print "\tgetting background categories..." 922 923 # if excluding instrument times or coincs, create the test for them 924 in_desired_times = '' 925 if opts.include_only_coincs: 926 db_include_filter = checkInstruments( 'in_include_filter', include_filter, is_single = False ) 927 db_include_filter.create_apply_test(connection) 928 in_desired_times = ' AND in_include_filter(experiment.instruments, "ALL")' 929 if opts.exclude_coincs: 930 db_exclude_filter = checkInstruments( 'in_exclude_filter', exclude_filter, is_single = False ) 931 db_exclude_filter.create_apply_test(connection) 932 in_desired_times = ' '.join([ in_desired_times, 'AND NOT in_exclude_filter(experiment.instruments, "ALL")']) 933 934 # establish how many independent backgrounds we'll need 935 tsids = set() 936 match_criteria = ['datatype', 'veto_cat', 'param_group'] 937 if opts.plot_by_instrument_time: 938 match_criteria.append( 'on_instruments' ) 939 if opts.param_name: 940 n_param_groups = len(param_parser.param_ranges) 941 else: 942 n_param_groups = 1 943 sqlquery = """ 944 SELECT 945 experiment.instruments, 946 experiment.gps_start_time, 947 experiment.gps_end_time, 948 experiment_summary.time_slide_id, 949 experiment_summary.veto_def_name, 950 experiment_summary.duration, 951 experiment_summary.datatype 952 FROM 953 experiment_summary 954 JOIN 955 experiment 956 ON 957 experiment.experiment_id == experiment_summary.experiment_id 958 WHERE 959 (experiment_summary.datatype == 'slide'""" + add_foreground + ')' + in_desired_times 960 if opts.debug: 961 print >> sys.stderr, "Sqlite query used to get categories:" 962 print >> sys.stderr, sqlquery 963 for on_instruments, this_gps_start, this_gps_end, tsid, veto_cat, duration, datatype in connection.cursor().execute(sqlquery): 964 on_instruments = frozenset(lsctables.instrument_set_from_ifos(on_instruments)) 965 analyzed_instruments.update( on_instruments ) 966 gps_start_time = min(gps_start_time, this_gps_start) 967 gps_end_time = max(gps_end_time, this_gps_end) 968 if datatype == "slide": 969 datatypes = ["slide", "background"] 970 if opts.plot_special_time != []: 971 datatypes.append("background_special") 972 # store the slide tsid's to keep track of the number of slides 973 tsids.add(tsid) 974 else: 975 datatypes = [datatype] 976 if opts.plot_by_ifos: 977 ifo_list = [frozenset([x for x in ifos]) for ifos in CoincInspiralUtils.get_ifo_combos(list(on_instruments))] 978 else: 979 ifo_list = [frozenset(['ALL'])] 980 # if there is more than one param-group or we are plotting by ifos, add the needed backgrounds 981 for datatype in datatypes: 982 for param_group in range(n_param_groups): 983 for ifos in ifo_list: 984 this_cat = Category( offset_vector = {}, datatype = datatype, veto_cat = veto_cat, ifos = ifos, param_group = param_group) 985 if opts.plot_by_instrument_time: 986 this_cat.on_instruments = on_instruments 987 # add the background category 988 data.setdefault( this_cat, [] ) 989 # add the livetime 990 # if doing plot special, subtract the duration of the removed time from the background livetime. 991 # Note that this isn't exactly correct: the range could overlap a slid veto, in which case only some of the time should be subtracted. 992 # However, we assume this is a small affect. 993 if datatype == "background_special" and opts.plot_special_time != [] and plot_special_times.intersects_segment( segments.segment(this_gps_start, this_gps_end) ): 994 data.add_livetime( duration - len(on_instruments)*sum([abs(seg) for seg in plot_special_times]), this_cat, match_criteria ) 995 else: 996 data.add_livetime( duration, this_cat, match_criteria ) 997 998 # add and check the slide count 999 # FIXME: We don't have to have the same number of slides in each database in order to do this. However, for that to happen, 1000 # weighted averages have to be computed. 1001 if num_slides is None and len(tsids) != 0: 1002 num_slides = len(tsids) 1003 elif len(tsids) != num_slides and len(tsids) != 0: 1004 raise ValueError, "Database %s has a different number of slides (%i) than the other databases (%i)." %( filename, len(tsids), num_slides ) 1005 1006 # set the slide's livetime to the average background time; we do this because all slides have been thrown into the same bucket 1007 if len(tsids) != 0.: 1008 for slide_cat in data.get_categories( Category(datatype = 'slide'), match_criteria = ['datatype'] ): 1009 slide_cat.livetime = float(slide_cat.livetime) / len(tsids) 1010 1011 # 1012 # retrieve the desired statistic 1013 # 1014 if opts.verbose: 1015 print >> sys.stdout, "\tgetting data..." 1016 1017 quick_query = False 1018 if table_type == 'sngl': 1019 # create a function list of functions we'll need to combine 1020 functionList = [] 1021 if opts.param_name: 1022 functionList.append((opts.param_combining_method, param_name)) 1023 functionList.extend([('alpha_min', 'end_time+end_time_ns*1e-9'), ('mean', 'mtotal'), (combining_function, statistic)]) 1024 if opts.plot_special_time != []: 1025 functionList.append( ('sorted_values', 'end_time+end_time_ns*1e-9') ) 1026 get_combined_data = createCombineRowsMethod( data_table, data_cols, functionList ) 1027 connection.create_aggregate('get_combined_data', len(data_cols), get_combined_data) 1028 # create an aggregate function to get the ifos 1029 get_ifos = createCombineRowsMethod( 'ifo_holder', ['ifo'], [('sorted_keys', 'ifo')] ) 1030 connection.create_aggregate('get_ifos', 1, get_ifos) 1031 1032 # If including or excluding coincs, need to create a function to combine ifos as well as discriminate single ifos. 1033 # By not allowing any single ifos by that are not in the desired coincident ifos, we're able to pick out doubles from triples (if desired). 1034 # We do this by taking the coinc filters and breaking up the coincident ifos that are in them, then pass them through coinc_in_filter 1035 add_sngl_filter = '' 1036 add_coinc_filter = '' 1037 if opts.include_only_coincs: 1038 include_sngl_filter = dict([ [on_inst, set([coinc == 'ALL' and 'ALL' or ifo for coinc in coincs for ifo in coinc])] for on_inst,coincs in include_filter.items() ]) 1039 db_include_sngl_filter = checkInstruments( 'in_include_sngl_filter', include_sngl_filter, is_single = True ) 1040 db_include_sngl_filter.create_apply_test(connection) 1041 add_sngl_filter = ''.join([ 'AND in_include_sngl_filter(experiment.instruments, ', data_table, '.ifo', ') ']) 1042 add_coinc_filter = ''.join([ 'HAVING in_include_filter(experiment.instruments, ifos)' ]) 1043 if opts.exclude_coincs: 1044 exclude_sngl_filter = dict([ [on_inst, set([coinc == 'ALL' and 'ALL' or ifo for coinc in coincs for ifo in coinc])] for on_inst,coincs in exclude_filter.items() ]) 1045 db_exclude_sngl_filter = checkInstruments( 'in_exclude_sngl_filter', exclude_sngl_filter, is_single = True ) 1046 db_exclude_sngl_filter.create_apply_test(connection) 1047 add_sngl_filter = ''.join([ add_sngl_filter, 'AND NOT in_exclude_sngl_filter(experiment.instruments, ', data_table, '.ifo', ') ']) 1048 add_coinc_filter = ''.join([ add_coinc_filter, add_coinc_filter == '' and 'HAVING ' or 'AND ', 'NOT in_exclude_filter(experiment.instruments, ifos)']) 1049 sqlquery = ''.join([ """ 1050 SELECT 1051 experiment.instruments, 1052 experiment_summary.datatype, 1053 experiment_summary.veto_def_name, 1054 coinc_event_map.coinc_event_id, 1055 get_ifos(""", data_table, """.ifo) AS ifos, 1056 get_combined_data(""", ','.join(['.'.join([data_table,col]) for col in data_cols]), """) 1057 FROM 1058 """, data_table, """ 1059 """, join_experiment_tables_to_sngl_table( data_table ), """ 1060 WHERE 1061 (experiment_summary.datatype == 'slide'""", add_foreground, """) 1062 """, add_sngl_filter, """ 1063 GROUP BY 1064 coinc_event_map.coinc_event_id 1065 """, add_coinc_filter ]) 1066 1067 else: 1068 # if doing plot special, create a table of the coinc events to ignore in the database 1069 add_time_filter = '' 1070 if opts.plot_special_time != []: 1071 sngl_table = sqlutils.validate_option( opts.single_table ) 1072 # create a function in the database to check whether a single time falls in the plot sepcial windows or not 1073 def check_sngl_time( end_time, end_time_ns ): 1074 return end_time+end_time_ns*1e-9 in plot_special_times

1075 connection.create_function('check_sngl_time', 2, check_sngl_time) 1076 # populate the table 1077 sqlquery = ''.join([ """ 1078 SELECT DISTINCT 1079 coinc_event_map.coinc_event_id AS ceid 1080 FROM 1081 coinc_event_map 1082 JOIN 1083 """, sngl_table, """ AS sngl_table 1084 ON ( 1085 coinc_event_map.table_name == '""", sngl_table, """' AND 1086 sngl_table.event_id == coinc_event_map.event_id ) 1087 JOIN 1088 experiment_summary, experiment_map 1089 ON ( 1090 experiment_summary.experiment_summ_id == experiment_map.experiment_summ_id AND 1091 experiment_map.coinc_event_id == coinc_event_map.coinc_event_id ) 1092 WHERE 1093 experiment_summary.datatype == "slide" AND 1094 check_sngl_time( sngl_table.end_time, sngl_table.end_time_ns ) 1095 """]) 1096 if opts.debug: 1097 print >> sys.stderr, "SQLite query used to get conics falling in plot special time:" 1098 print >> sys.stderr, sqlquery 1099 remove_coincs = [ceid[0] for ceid in connection.cursor().execute(sqlquery)] 1100 1101 #add_time_filter = '\nAND %s.coinc_event_id NOT IN (SELECT ceid FROM skip_coincs)' % data_table 1102 1103 # speed-up: if param_name and statistic are in the data_cols list, just query them directly 1104 if (opts.param_name is not None and param_name in data_cols) and statistic in data_cols: 1105 quick_query = True 1106 get_stats = ''.join([ data_table, ".", param_name, ", ", data_table, ".end_time+", data_table, ".end_time_ns*1e-9, ", data_table, ".mass, ", data_table, ".", statistic ]) 1107 elif opts.param_name is None and statistic in data_cols: 1108 quick_query = true 1109 get_stats = ''.join([ data_table, ".end_time+", data_table, ".end_time_ns*1e-9, ", data_table, ".mass, ", data_table, ".", statistic ]) 1110 else: 1111 # create method to get the stats and parameter 1112 functionList = [] 1113 if opts.param_name: 1114 functionList.append(param_name) 1115 functionList.extend(['end_time+end_time_ns*1e-9', 'mass', statistic]) 1116 getStatDB = getRowStatsFromDatabase( data_table, data_cols, functionList ) 1117 connection.create_function( 'get_stat', len(data_cols), getStatDB.get_dbrow_stat ) 1118 get_stats = ''.join([ 'get_stat(', ','.join([ '.'.join([data_table,col]) for col in data_cols ]), ')' ]) 1119 1120 add_coinc_filter = '' 1121 if opts.include_only_coincs: 1122 add_coinc_filter = ''.join([ '\nAND in_include_filter(experiment.instruments, ', data_table, '.ifos)' ]) 1123 if opts.exclude_coincs: 1124 add_coinc_filter = ''.join([ add_coinc_filter, '\nAND NOT in_exclude_filter(experiment.instruments, ', data_table, '.ifos)']) 1125 sqlquery = ''.join([ """ 1126 SELECT 1127 experiment.instruments, 1128 experiment_summary.datatype, 1129 experiment_summary.veto_def_name, 1130 """, data_table, """.coinc_event_id, 1131 """, data_table, """.ifos, 1132 """, get_stats, """ 1133 FROM 1134 """, data_table, """ 1135 """, 1136 sqlutils.join_experiment_tables_to_coinc_table( data_table ), 1137 """ 1138 WHERE 1139 (experiment_summary.datatype == 'slide'""", add_foreground, ')', add_coinc_filter, add_time_filter ]) 1140 if opts.debug: 1141 print >> sys.stderr, "Sqlite query used to get data:" 1142 print >> sys.stderr, sqlquery 1143 results = connection.cursor().execute(sqlquery).fetchall() 1144 for result in results: 1145 if opts.plot_by_instrument_time: 1146 on_instruments = frozenset(lsctables.instrument_set_from_ifos( result[0] )) 1147 else: 1148 on_instruments = frozenset(["ALL"]) 1149 datatype = result[1] 1150 # if these are slid triggers, check if any fall in the plot-special window (for sngl tables only) 1151 if datatype == "slide" and opts.plot_special_time != [] and table_type == 'sngl': 1152 sngl_end_times = [float(t) for t in result[5].split(',')[-1].split(';')] 1153 if any(t in plot_special_times for t in sngl_end_times): 1154 continue 1155 veto_cat = result[2] 1156 ceid = result[3] 1157 if opts.plot_by_ifos: 1158 ifos = frozenset(lsctables.instrument_set_from_ifos(result[4])) 1159 else: 1160 ifos = frozenset(["ALL"]) 1161 if opts.param_name is not None: 1162 if quick_query: 1163 param_group, end_time, mtotal, stat = [result[5], result[6], result[7], result[8]] 1164 else: 1165 param_group, end_time, mtotal, stat = [float(x) for x in result[5].split(',') if ';' not in x] 1166 param_group = param_parser.group_by_param_range( param_group ) 1167 # skip this if it doesn't fall in any desired param group 1168 if param_group is None: 1169 continue 1170 else: 1171 param_group = 0. 1172 if quick_query: 1173 end_time, mtotal, stat = [result[5], result[6], result[7]] 1174 else: 1175 end_time, mtotal, stat = [float(x) for x in result[5].split(',') if ';' not in x] 1176 # Memory improvement: throw out any data less than x-fit and xmin 1177 if opts.xmin is not None: 1178 if stat < opts.xmin and opts.min_x_fit is not None and stat < opts.min_x_fit: 1179 continue 1180 elif stat < opts.xmin and opts.min_x_fit is None: 1181 continue 1182 # passed any tests, store data 1183 this_row = rowType() 1184 this_row.store([ ('end_time', end_time), ('mtotal', mtotal), ('stat', stat), ('ifos', result[4]) ]) 1185 # construct categories 1186 categories = [ Category( {}, datatype, veto_cat, on_instruments, ifos, param_group ) ] 1187 if datatype == "slide": 1188 categories.append(Category( {}, 'background', veto_cat, on_instruments, ifos, param_group )) 1189 if opts.plot_special_time != [] and ceid not in remove_coincs: 1190 categories.append(Category( {}, 'background_special', veto_cat, on_instruments, ifos, param_group )) 1191 1192 # add to data 1193 thisid = id(this_row) 1194 data.add_data( thisid, categories, this_row ) 1195 1196 # go on to the next database 1197 connection.close() 1198 dbtables.discard_connection_filename( filename, working_filename, verbose = False) 1199 1200 for cat in data: 1201 print cat.datatype, len(data[cat]) 1202 1203 # 1204 # Sort and calculate cumrate 1205 # 1206 if opts.verbose: 1207 print "Computing cumulative rates..." 1208 1209 # figure out how to populate the background categories 1210 group_by = ['veto_cat', 'param_group'] 1211 if opts.plot_by_instrument_time: 1212 group_by.append( 'on_instruments' ) 1213 if opts.plot_by_ifos: 1214 group_by.append( 'ifos' ) 1215 1216 data.compute_cumrates( 'stat', fg_datatype, rank_by = 'max', group_by = group_by+['datatype']) 1217 1218 # 1219 # Plot/Compute statistics 1220 # 1221 if opts.verbose: 1222 print >> sys.stdout, "Plotting..." 1223 opts.gps_start_time = gps_start_time 1224 opts.gps_end_time = gps_end_time 1225 opts.ifo_times = ''.join(sorted(analyzed_instruments)) 1226 opts.ifo_tag = '' 1227 opts.enable_output = True 1228 InspiralUtilsOpts = InspiralUtils.initialise( opts, __prog__, git_version.verbose_msg ) 1229 fnameList = [] 1230 tagList = [] 1231 comments = '' 1232 1233 for bkg_category in data.get_categories( Category(datatype = 'background'), ['datatype'] ): 1234 if opts.verbose: 1235 print >> sys.stdout, "\tgroup: %s, %s, %s, %s" %(bkg_category.veto_cat, str(bkg_category.param_group), ''.join(bkg_category.on_instruments), ''.join(bkg_category.ifos)) 1236 # set a match category to pick out data with 1237 match_category = Category() 1238 match_criteria = group_by+['datatype'] 1239 [setattr(match_category, x, getattr(bkg_category, x)) for x in match_criteria] 1240 1241 # get foreground and background livetime 1242 T_bkg = data.get_livetime(bkg_category, time_units = opts.time_units) 1243 match_category.datatype = fg_datatype 1244 T_fg = data.get_livetime( match_category, time_units = opts.time_units ) 1245 1246 # 1247 # 1248 # Non-cumulative plot 1249 # 1250 # 1251 plot_non_cum = True 1252 if plot_non_cum: 1253 if opts.verbose: 1254 print >> sys.stdout, "\tcreating histogram..." 1255 pylab.figure() 1256 pylab.hold(True) 1257 xmin = numpy.inf 1258 xmax = -numpy.inf 1259 ymin = numpy.inf 1260 ymax = -numpy.inf 1261 1262 # 1263 # create histogram 1264 # 1265 # get foreground data 1266 if fg_datatype != 'slide': 1267 fg_data = data.get_data( category = match_category, category_match_criteria = match_criteria ) 1268 fg_stats = numpy.array([x.data.stat for x in fg_data]) 1269 plot_fg = fg_stats != numpy.array([]) 1270 else: 1271 plot_fg = False 1272 # get background data 1273 bkg_data = data[bkg_category] 1274 bkg_stats = numpy.array([x.data.stat for x in bkg_data]) 1275 # FIXME: create an empty plot 1276 if bkg_stats == numpy.array([]): 1277 continue 1278 1279 # get min/max for bins values 1280 if plot_fg: 1281 min_val = min( bkg_stats.min(), fg_stats.min() ) 1282 max_val = max( bkg_stats.max(), fg_stats.max() ) 1283 else: 1284 min_val = min( bkg_stats.min(), bkg_stats.min() ) 1285 max_val = max( bkg_stats.max(), bkg_stats.max() ) 1286 1287 # create the bins 1288 nbins = opts.nbins 1289 if opts.lin_x: 1290 #bins = numpy.linspace(min_val, max_val, nbins+1, endpoint = True) 1291 bins = numpy.linspace(numpy.power(min_val, 1./opts.x_power), numpy.power(max_val, 1./opts.x_power), nbins+1, endpoint = True) 1292 else: 1293 bins = numpy.logspace( numpy.log10(min_val), numpy.log10(max_val), nbins+1, endpoint = True ) 1294 bins = numpy.power(bins, opts.x_power) 1295 # bump up the last bin by a little bit to make sure there are no numerical errors 1296 bins[-1] += .0001 1297 ds = numpy.array([bins[i+1] - bins[i] for i in range(len(bins)-1)])/2. 1298 xvals = bins[:-1] + ds 1299 1300 # 1301 # plot background 1302 # 1303 bkg_count, _ = numpy.histogram(bkg_stats, bins) 1304 # divide the background count by the ratio of the background time to the foreground time 1305 bkg_count = bkg_count * (T_fg / T_bkg ) 1306 remove_me = pylab.find(bkg_count == 0.) 1307 bkg_count = numpy.array([bkg_count[i] for i in range(len(bkg_count)) if i not in remove_me]) 1308 bkg_hist_stats = numpy.array([xvals[i] for i in range(len(xvals)) if i not in remove_me]) 1309 bkg_ds = numpy.array([ds[i] for i in range(len(xvals)) if i not in remove_me]) 1310 1311 # reset ymin/ymax 1312 if bkg_count != []: 1313 ymin = min(min(bkg_count),ymin) 1314 ymax = max(max(bkg_count),ymax) 1315 1316 # 1317 # compute and plot background error 1318 # 1319 bkg_error = numpy.sqrt( bkg_count / (T_bkg/T_fg) ) 1320 pylab.errorbar( bkg_hist_stats, bkg_count, xerr = bkg_ds, yerr = bkg_error, color = 'k', ecolor = 'gray', marker = 'o', markersize = 4, linestyle = 'None', zorder = 2, label = 'Background' ) 1321 1322 # 1323 # plot foreground 1324 # 1325 loudest_events = [] 1326 if plot_fg: 1327 1328 fg_count, _ = numpy.histogram(fg_stats, bins) 1329 remove_me = pylab.find(fg_count == 0.) 1330 fg_count = numpy.array([fg_count[i] for i in range(len(fg_count)) if i not in remove_me]) 1331 fg_hist_stats = numpy.array([xvals[i] for i in range(len(xvals)) if i not in remove_me]) 1332 1333 # reset ymin/ymax 1334 if fg_count != []: 1335 ymin = min(min(fg_count),ymin) 1336 ymax = max(max(fg_count),ymax) 1337 1338 fclr = 'b' 1339 lbl = fg_datatype.replace( '_', '-').title() 1340 if fg_datatype == 'simulation': 1341 edgclr = 'r' 1342 elif opts.plot_poisson_pdf: 1343 edgclr = 'white' 1344 else: 1345 edgclr = 'none' 1346 pylab.scatter( fg_hist_stats, fg_count, marker = 'o', edgecolor = edgclr, facecolor = fclr, label = lbl, s = 20, linewidth = .5, zorder = 4 ) 1347 1348 # 1349 # plot foreground triggers that fall in plot-special window as stars 1350 # 1351 if opts.plot_special_time != []: 1352 plot_sp_vals = [x.data.stat for x in fg_data if x.data.end_time in plot_special_times] 1353 spx = [] 1354 spy = [] 1355 if plot_sp_vals != []: 1356 # in which bin does each plot_sp_val fall? 1357 for x in plot_sp_vals: 1358 if x > fg_hist_stats.max(): 1359 spx.append(fg_hist_stats.max()) 1360 spy.append(fg_count[-1]) 1361 else: 1362 idxs = set(pylab.find( x >= fg_hist_stats )) & set(pylab.find( x <= fg_hist_stats)) 1363 spx.extend([ fg_hist_stats[i] for i in idxs ]) 1364 spy.extend([ fg_count[i] for i in idxs ]) 1365 pylab.scatter( spx, spy, c = 'yellow', marker = (5,1,0), s = 50, linewidth = .5, label = '_no_legend', zorder = 5 ) 1366 1367 1368 # 1369 # plot extrapolation 1370 # 1371 if plot_fg and opts.extrapolate is not None: 1372 if opts.verbose: 1373 print >> sys.stdout, "\textrapolating background..." 1374 ### For Gaussian fit: 1375 # We're going to fit a Gaussian with 0 mean to the background. Since this is given by y = A*exp(-x^2./(2*sigma^2.))/sqrt(2*pi*sigma^2.), we take the natural log, and thus fit 1376 # y = beta[0] + beta[2]*x^2 where beta[0] = ln(A) - ln(2*pi*sigma^2.)/2., beta[2] = -1/(2.*sigma^2.) and y = ln(bkg_count) 1377 # To do this, we'll do a weighted linear least squares fit using Beta_hat = (transpose(X).W.X)^(-1).transpose(X).W.Y. where W is the nxn matrix of weights (which are just taken 1378 # as the error in each background point), X is the nx2 matrix of [1 x[i]^2.], Y is the nx1 matrix of ln(bkg_cound[i]), and Beta_hat is the 2x1 matrix of fitted betas. 1379 # That assumes that the plotted x is linear in whatever the desired statistic is. However, if the plotted x is actually the desired statistic squared, then we should 1380 # fit y = beta[0] + beta[2]*x. Generalizing, we actually fit y = beta[0] + beta[2]*x^((2./opts.x_power)), since only the user can know what function of their desired statistic they've 1381 # punched into the command line. 1382 ### For power-law fit: 1383 # We're going to fit bkg_count/T_fg = A*statistic^B by doing a least squares fit to y = beta[0] + beta[1]*x where y = ln(bkg_count/T_fg), x = ln(statistic), beta[0] = ln(A) 1384 # and beta[1] = B. 1385 if opts.min_x_fit is None: 1386 min_x_fit = bkg_hist_stats.min() 1387 else: 1388 min_x_fit = opts.min_x_fit 1389 start_i = pylab.find( bkg_hist_stats >= min_x_fit )[0] 1390 n = len(bkg_count) - start_i 1391 X = numpy.matrix(numpy.zeros((n,2), dtype = float)) 1392 Y = numpy.matrix(numpy.zeros((n,1), dtype = float)) 1393 W = numpy.matrix(numpy.zeros((n,n), dtype = float)) 1394 # populate the input matrices 1395 for i in range(n): 1396 if bkg_count[start_i + i] == 0: 1397 continue 1398 Y[i,0] = numpy.log( bkg_count[start_i + i] ) 1399 W[i,i] = 1./numpy.power( bkg_error[start_i + i]/bkg_count[start_i + i], 2. ) 1400 X[i,0] = 1. 1401 if opts.extrapolate.upper() == 'POWER': 1402 X[i,1] = numpy.log( bkg_hist_stats[start_i + i] ) 1403 else: 1404 X[i,1] = bkg_hist_stats[start_i + i]**(2./opts.x_power) 1405 1406 # calculate the Beta_hats 1407 Beta_hat = (X.T * W * X).I * X.T * W * Y 1408 1409 # Compute the Chisq of the fit 1410 noncum_chisq = ( W * numpy.power((Y - X * Beta_hat), 2.) ).sum() 1411 # the number of degrees of freedom = Ndof = number of points - number of fitting params - 1 1412 noncum_Ndof = n - 2 - 1 1413 1414 # now that we have the fit parameters, plot the fitted values 1415 n_fit = 50 1416 if opts.lin_x: 1417 x_fit = numpy.linspace( bkg_hist_stats[start_i], bkg_hist_stats.max(), num = n_fit ) 1418 else: 1419 x_fit = numpy.logspace( numpy.log10(bg_hist_stats[start_i]), numpy.log10( bkg_hist_stats.max() ), num = n_fit ) 1420 Xfit = numpy.matrix(numpy.zeros((n_fit,2), dtype = float)) 1421 for i,x in enumerate(x_fit): 1422 Xfit[i,0] = 1. 1423 if opts.extrapolate.upper() == 'POWER': 1424 Xfit[i,1] = numpy.log(x) 1425 else: 1426 Xfit[i,1] = x**(2./opts.x_power) 1427 y_fit = numpy.exp( numpy.array(Xfit * Beta_hat) ) 1428 1429 # plot the extrapolated line 1430 pylab.plot( x_fit, y_fit, 'g-', linewidth = 2, zorder = 5, label = 'Fitted Bkg.' ) 1431 1432 # reset ymin/ymax 1433 if y_fit.any(): 1434 ymin = min(y_fit.min(),ymin) 1435 ymax = max(y_fit.max(),ymax) 1436 1437 # 1438 # finalize plot settings 1439 # 1440 pylab.grid() 1441 pylab.legend(loc = 'lower left', scatterpoints = 1, numpoints = 1) 1442 1443 pylab.xlabel( stat_label ) 1444 pylab.ylabel( "Count per Foreground Experiment" ) 1445 if opts.title: 1446 if opts.param_name is not None: 1447 t = 'Histogram %s %s' % (param_label.title(), param_parser.param_range_by_group(bkg_category.param_group)) 1448 else: 1449 t = 'Histogram' 1450 pylab.title(t) 1451 1452 # set axes limits and scale 1453 xmin = min(xvals) 1454 xmax = max(xvals) 1455 if not opts.lin_x: 1456 pylab.gca().semilogx() 1457 xmax = xmax*10**(0.3) 1458 else: 1459 xmax = xmax+.1*xmax 1460 if not opts.lin_y: 1461 pylab.gca().semilogy() 1462 ymin, ymax = ymin*10**(-0.5), ymax*10**(0.5) 1463 else: 1464 ymin, ymax = ymin - .1*ymin, ymax + .1*ymax 1465 # overrule with input options 1466 if opts.xmin is not None: 1467 xmin = opts.xmin 1468 if opts.xmax is not None: 1469 xmax = opts.xmax 1470 if opts.ymin is not None: 1471 ymin = opts.ymin 1472 if opts.ymax is not None: 1473 ymax = opts.ymax 1474 1475 1476 pylab.xlim( xmin, xmax ) 1477 pylab.ylim( ymin, ymax ) 1478 1479 # store plot info 1480 plot_description = 'F%i' % ( len(fnameList) ) 1481 name = InspiralUtils.set_figure_tag( plot_description, datatype_plotted = fg_datatype.upper(), open_box = 'all_data' in fg_datatype or 'exclude_play' in fg_datatype) 1482 fname = InspiralUtils.set_figure_name(InspiralUtilsOpts, name) 1483 #fname = re.sub('.png', '.pdf', fname) 1484 fname_thumb = InspiralUtils.savefig_pylal( filename=fname, dpi=opts.dpi ) 1485 fnameList.append(fname) 1486 tagList.append(name) 1487 1488 1489 # 1490 # 1491 # Cumulative plot 1492 # 1493 # 1494 if opts.verbose: 1495 print >> sys.stdout, "\tcreating cumulative plot..." 1496 pylab.figure() 1497 pylab.hold(True) 1498 xmin = numpy.inf 1499 xmax = -numpy.inf 1500 ymin = numpy.inf 1501 ymax = -numpy.inf 1502 1503 # 1504 # XXX: For paper, create text files of data. One file is created 1505 # for each data type: regular background (black dots), extended background 1506 # (black crosses), foreground (blue triangles), regular background with 1507 # the loud event removed (gray dots), and extended background with 1508 # the loud event removed (gray crosses). 1509 # 1510 1511 # the basic header used in all of the text files 1512 txthead = """# 1513 # Data used to generate the %s 1514 # in the cumulative rate plot (Figure 3) in LIGO Document P1100034. 1515 # 1516 # For questions regarding the use of this material, please contact the LSC 1517 # spokesperson Gaby Gonzalez (gonzalez@lsu.edu) or corresponding authors 1518 # Collin Capano (collin.capano@ligo.org) and Stephen Privitera (sprivite@caltech.edu) 1519 # 1520 """ 1521 1522 # 1523 # plot special background 1524 # 1525 if opts.plot_special_time != []: 1526 # get background data 1527 match_category.datatype = 'background_special' 1528 plotvals = data.get_cumrates( match_category, 'stat', rank_by = 'max' ) 1529 bkgsp_stats = numpy.array([x[0] for x in plotvals]) 1530 bkgsp_cumnum = numpy.array([y[1] for y in plotvals]) 1531 1532 # rest xmmin/xmax 1533 if bkgsp_stats != []: 1534 xmin = min(min(bkgsp_stats),xmin) 1535 xmax = max(max(bkgsp_stats),xmax) 1536 # reset ymin/ymax 1537 if bkgsp_cumnum != []: 1538 ymin = min(min(bkgsp_cumnum / T_fg),ymin) 1539 ymax = max(max(bkgsp_cumnum / T_fg),ymax) 1540 1541 # 1542 # compute and plot background error 1543 # 1544 bkgsp_error = numpy.sqrt( bkgsp_cumnum / (T_fg * T_bkg) ) 1545 plot_errs = numpy.array([ bkgsp_error,bkgsp_error ]) 1546 for n, (y, err) in enumerate(zip(bkgsp_cumnum.tolist(),bkgsp_error.tolist())): 1547 if y/T_fg - err <= 0.0: 1548 plot_errs[0,n] = y/T_fg - 1e-15 1549 pylab.errorbar( bkgsp_stats, bkgsp_cumnum / T_fg, yerr = plot_errs, color = '#777777', ecolor = '#777777', marker = 'o', markersize = 4, markeredgecolor = '#777777', linestyle = 'None', zorder = -1, label = '_no_legend') 1550 # print data to text file 1551 f = open('PlotRatesData-HundredSlideBkg-NoEvent.txt', 'w') 1552 print >> f, txthead % 'background estimate from 100 slides with the candidate event removed (gray dots)' 1553 print >> f, '#Combined NewSNR\tCumulative Rate (yr^-1)\ty-error (+/- yr^-1)' 1554 for stat, cumrate, err in zip(bkgsp_stats.tolist(), (bkgsp_cumnum / T_fg).tolist(), bkgsp_error.tolist()): 1555 print >> f, '%f\t%f\t%f' % (stat, cumrate, err) 1556 f.close() 1557 1558 # store the ids of the loudest background events 1559 loudest_bkgsp_events = [] 1560 ii = 0 1561 last_stat = None 1562 for x in plotvals[::-1]: 1563 if ii > 10: 1564 break 1565 if x[0] != last_stat: 1566 ii += 1 1567 loudest_bkgsp_events.append(x[2]) 1568 1569 # 1570 # plot added background triggers 1571 # 1572 added_bkgsp = [] 1573 for newsnrs, bkg_time, lbl in add_background_sp: 1574 nTrials = bkg_time / T_fg 1575 # compute the cum-rates 1576 these_cumnums = numpy.array([ (len(newsnrs) - bisect.bisect_left(newsnrs, x))/nTrials for x in newsnrs ]) 1577 this_error = numpy.sqrt( these_cumnums / (T_fg * bkg_time) ) 1578 plot_errs = numpy.array([this_error, this_error]) 1579 for n, (y, err) in enumerate(zip(these_cumnums.tolist(),this_error.tolist())): 1580 if y/T_fg - err <= 0.0: 1581 plot_errs[0,n] = y/T_fg - 1e-15 1582 T_add_bkgsp = bkg_time 1583 pylab.errorbar( newsnrs, these_cumnums / T_fg, yerr = plot_errs, color = '#777777', ecolor = '#777777', marker = 'x', markersize = 6, mew = 1, linestyle = 'None', zorder = -1, label = '_no_legend' ) 1584 1585 # print data to text file 1586 f = open('PlotRatesData-ExtendedBkg-NoEvent.txt', 'w') 1587 print >> f, txthead % 'extended background estimate with the candidate event removed (gray crosses)' 1588 print >> f, '#Combined NewSNR\tCumulative Rate (yr^-1)\ty-error (+/- yr^-1)' 1589 for stat, cumrate, err in zip(newsnrs, (these_cumnums / T_fg).tolist(), this_error.tolist()): 1590 print >> f, '%f\t%f\t%f' % (stat, cumrate, err) 1591 f.close() 1592 1593 #if lbl != '': 1594 # if opts.lin_y: 1595 # ytext = yval+.05*(ymax-ymin) 1596 # else: 1597 # ytext = yval * 10**.25 1598 # pylab.annotate( lbl, (xval,yval), xytext=(xval,ytext) ) 1599 # reset xmin/xmax 1600 xmin = min(min(newsnrs), xmin) 1601 xmax = max(max(newsnrs), xmax) 1602 ymin = min(these_cumnums.min(), ymin) 1603 ymax = max(these_cumnums.max(), ymax) 1604 # save x, y points for poisson pdf calc. 1605 added_bkgsp = [x for x in zip(newsnrs, these_cumnums) if x[0] > bkgsp_stats.max()] #.append((xval, T_fg / pt_livetime)) 1606 1607 # 1608 # plot background 1609 # 1610 1611 # get background data 1612 match_category.datatype = 'background' 1613 plotvals = data.get_cumrates( match_category, 'stat', rank_by = 'max' ) 1614 bkg_stats = numpy.array([x[0] for x in plotvals]) 1615 bkg_cumnum = numpy.array([y[1] for y in plotvals]) 1616 1617 # rest xmmin/xmax 1618 if bkg_stats != []: 1619 xmin = min(min(bkg_stats),xmin) 1620 xmax = max(max(bkg_stats),xmax) 1621 # reset ymin/ymax 1622 if bkg_cumnum != []: 1623 ymin = min(min(bkg_cumnum / T_fg),ymin) 1624 ymax = max(max(bkg_cumnum / T_fg),ymax) 1625 1626 # 1627 # compute and plot background error 1628 # 1629 bkg_error = numpy.sqrt( bkg_cumnum / (T_fg * T_bkg) ) 1630 plot_errs = numpy.array([ bkg_error,bkg_error ]) 1631 for n, (y, err) in enumerate(zip(bkg_cumnum.tolist(),bkg_error.tolist())): 1632 if y/T_fg - err <= 0.0: 1633 plot_errs[0,n] = y/T_fg - 1e-15 1634 pylab.errorbar( bkg_stats, bkg_cumnum / T_fg, yerr = plot_errs, color = 'k', ecolor = 'k', marker = 'o', markersize = 4, markeredgecolor = 'k', linestyle = 'None', zorder = 2, label = 'Background' ) 1635 1636 # print data to text file 1637 f = open('PlotRatesData-HundredSlideBkg.txt', 'w') 1638 print >> f, txthead % 'background estimate from 100 slides (black dots)' 1639 print >> f, '#Combined NewSNR\tCumulative Rate (yr^-1)\ty-error (+/- yr^-1)' 1640 for stat, cumrate, err in zip(bkg_stats.tolist(), (bkg_cumnum / T_fg).tolist(), bkg_error.tolist()): 1641 print >> f, '%f\t%f\t%f' % (stat, cumrate, err) 1642 f.close() 1643 1644 # store the ids of the loudest background events 1645 loudest_bkg_events = [] 1646 ii = 0 1647 last_stat = None 1648 for x in plotvals[::-1]: 1649 if ii > 10: 1650 break 1651 if x[0] != last_stat: 1652 ii += 1 1653 loudest_bkg_events.append(x[2]) 1654 1655 # 1656 # plot added background triggers 1657 # 1658 added_bkg = [] 1659 for newsnrs, bkg_time, lbl in add_background: 1660 nTrials = bkg_time / T_fg 1661 # compute the cum-rates 1662 these_cumnums = numpy.array([ (len(newsnrs) - bisect.bisect_left(newsnrs, x))/nTrials for x in newsnrs ]) 1663 this_error = numpy.sqrt( these_cumnums / (T_fg * bkg_time) ) 1664 plot_errs = numpy.array([this_error, this_error]) 1665 for n, (y, err) in enumerate(zip(these_cumnums.tolist(),this_error.tolist())): 1666 if y/T_fg - err <= 0.0: 1667 plot_errs[0,n] = y/T_fg - 1e-15 1668 T_add_bkg = bkg_time 1669 pylab.errorbar( newsnrs, these_cumnums / T_fg, yerr = plot_errs, color = 'k', ecolor = 'k', marker = 'x', markersize = 6, mew = 1, linestyle = 'None', zorder = 2, label = lbl ) 1670 1671 # print data to text file 1672 f = open('PlotRatesData-ExtendedBkg.txt', 'w') 1673 print >> f, txthead % 'extended background estimate (black crosses)' 1674 print >> f, '#Combined NewSNR\tCumulative Rate (yr^-1)\ty-error (+/- yr^-1)' 1675 for stat, cumrate, err in zip(newsnrs, (these_cumnums / T_fg).tolist(), this_error.tolist()): 1676 print >> f, '%f\t%f\t%f' % (stat, cumrate, err) 1677 f.close() 1678 1679 #if lbl != '': 1680 # if opts.lin_y: 1681 # ytext = yval+.05*(ymax-ymin) 1682 # else: 1683 # ytext = yval * 10**.25 1684 # pylab.annotate( lbl, (xval,yval), xytext=(xval,ytext) ) 1685 # reset xmin/xmax 1686 xmin = min(min(newsnrs), xmin) 1687 xmax = max(max(newsnrs), xmax) 1688 ymin = min(these_cumnums.min(), ymin) 1689 ymax = max(these_cumnums.max(), ymax) 1690 # save x, y points for poisson pdf calc. 1691 added_bkg = [x for x in zip(newsnrs, these_cumnums) if x[0] > bkg_stats.max()] #.append((xval, T_fg / pt_livetime)) 1692 1693 1694 # 1695 # plot foreground 1696 # 1697 loudest_events = [] 1698 if plot_fg: 1699 match_category.datatype = fg_datatype 1700 plotvals = data.get_cumrates( match_category, 'stat', rank_by = 'max' ) 1701 fg_stats = numpy.array([x[0] for x in plotvals]) 1702 fg_cumrate = numpy.array([y[1] for y in plotvals]) / T_fg 1703 # FIXME: add ability to color by ifo colors? 1704 fclr = 'b' 1705 if fg_datatype == 'simulation': 1706 edgclr = 'r' 1707 edgwth = .5 1708 elif opts.plot_poisson_pdf: 1709 edgclr = 'white' 1710 edgwth = .5 1711 else: 1712 edgclr = 'b' 1713 edgwth = 0.5 1714 lbl = 'Foreground' #fg_datatype.replace( '_', '-').title() 1715 pylab.plot( fg_stats, fg_cumrate, marker = '^', markersize = 6.5, markerfacecolor = fclr, markeredgecolor = edgclr, markeredgewidth = edgwth, linestyle = 'None', label = lbl, zorder = 4 ) 1716 1717 # print data to text file 1718 f = open('PlotRatesData-Foreground.txt', 'w') 1719 print >> f, txthead % 'foreground coincident events (blue triangles)' 1720 print >> f, '#Combined NewSNR\tCumulative Rate (yr^-1)' 1721 for stat, cumrate in zip(fg_stats.tolist(), fg_cumrate.tolist()): 1722 print >> f, '%f\t%f' % (stat, cumrate) 1723 f.close() 1724 1725 # rest xmmin/xmax 1726 if fg_stats.any(): 1727 xmin = min(min(fg_stats),xmin) 1728 xmax = max(max(fg_stats),xmax) 1729 # reset ymin/ymax 1730 if fg_cumrate.any(): 1731 ymin = min(min(fg_cumrate),ymin) 1732 ymax = max(max(fg_cumrate),ymax) 1733 1734 # store the ids of the loudest events 1735 for x in plotvals[::-1]: 1736 if x[0] != max(fg_stats): 1737 break 1738 loudest_events.append(x[2]) 1739 1740 # 1741 # plot foreground triggers that fall in plot-special window as stars 1742 # 1743 #if opts.plot_special_time != []: 1744 # plot_sp_vals = [x for x in plotvals if data.data_index[x[2]].data.end_time in plot_special_times] 1745 # if plot_sp_vals != []: 1746 # pylab.scatter( [x[0] for x in plot_sp_vals], [y[1] / T_fg for y in plot_sp_vals], c = 'yellow', marker = (5,1,0), s = 80, linewidth = .5, label = 'Candidate', zorder = 5 ) 1747 1748 # 1749 # plot extrapolation 1750 # 1751 x_ext = numpy.array([]) 1752 y_ext = numpy.array([]) 1753 if plot_fg and opts.extrapolate is not None: 1754 if opts.verbose: 1755 print >> sys.stdout, "\textrapolating background..." 1756 if opts.extrapolate.upper() != 'ERFC': 1757 # figure out how many points to use for the fit; the default is to start at the first background point that drops below the foreground; this can be overridden with options 1758 min_x_fit = opts.min_x_fit is None and bkg_hist_stats.min() or opts.min_x_fit 1759 ### For Gaussian fit: 1760 # We're going to fit a Gaussian with 0 mean to the background. Since this is given by y = A*exp(-x^2./(2*sigma^2.))/sqrt(2*pi*sigma^2.), we take the natural log, and thus fit 1761 # y = beta[0] + beta[2]*x^2 where beta[0] = ln(A) - ln(2*pi*sigma^2.)/2., beta[2] = -1/(2.*sigma^2.) and y = ln(bkg_cumnum/T_fg) 1762 # To do this, we'll do a weighted linear least squares fit using Beta_hat = (transpose(X).W.X)^(-1).transpose(X).W.Y. where W is the nxn matrix of weights (which are just taken 1763 # as the error in each background point), X is the nx2 matrix of [1 x[i]^2.], Y is the nx1 matrix of ln(bkg_cumnum[i]/T_fg), and Beta_hat is the 2x1 matrix of fitted betas. 1764 # That assumes that the plotted x is linear in whatever the desired statistic is. However, if the plotted x is actually the desired statistic squared, then we should 1765 # fit y = beta[0] + beta[2]*x. Generalizing, we actually fit y = beta[0] + beta[2]*x^((2./opts.x_power)), since only the user can know what function of their desired statistic they've 1766 # punched into the command line. 1767 ### For power-law fit: 1768 # We're going to fit bkg_cumnum/T_fg = A*statistic^B by doing a least squares fit to y = beta[0] + beta[1]*x where y = ln(bkg_cumnum/T_fg), x = ln(statistic), beta[0] = ln(A) 1769 # and beta[1] = B. 1770 start_i = pylab.find( bkg_stats >= min_x_fit )[0] 1771 n = len(bkg_cumnum) - start_i 1772 X = numpy.matrix(numpy.zeros((n,2), dtype = float)) 1773 Y = numpy.matrix(numpy.zeros((n,1), dtype = float)) 1774 W = numpy.matrix(numpy.zeros((n,n), dtype = float)) 1775 # populate the input matrices 1776 for i in range(n): 1777 Y[i,0] = numpy.log( bkg_cumnum[start_i + i] ) - numpy.log( T_fg ) 1778 W[i,i] = 1./numpy.power( bkg_error[start_i + i]/bkg_cumnum[start_i + i], 2. ) 1779 X[i,0] = 1. 1780 if opts.extrapolate.upper() == 'POWER': 1781 X[i,1] = numpy.log( bkg_stats[start_i + i] ) 1782 else: 1783 X[i,1] = bkg_stats[start_i + i]**(2./opts.x_power) 1784 1785 # calculate the Beta_hats 1786 Beta_hat = (X.T * W * X).I * X.T * W * Y 1787 1788 # Compute the Chisq of the fit 1789 cum_chisq = ( W * numpy.power((Y - X * Beta_hat), 2.) ).sum() 1790 # the number of degrees of freedom = Ndof = number of points - number of fitting params - 1 1791 cum_Ndof = n - 2 - 1 1792 1793 # now that we have the fit parameters, extrapolate out to the maximum foreground value 1794 n_ext = 50 1795 if opts.lin_x: 1796 x_ext = numpy.linspace( bkg_stats[start_i], max(fg_stats.max(), bkg_stats.max()), num = n_ext ) 1797 else: 1798 x_ext = numpy.logspace( numpy.log10(bkg_stats[start_i]), numpy.log10( max(fg_stats.max(), bkg_stats.max()) ), num = n_ext ) 1799 Xext = numpy.matrix(numpy.zeros((n_ext,2), dtype = float)) 1800 for i,x in enumerate(x_ext): 1801 Xext[i,0] = 1. 1802 if opts.extrapolate.upper() == 'POWER': 1803 Xext[i,1] = numpy.log(x) 1804 else: 1805 Xext[i,1] = x**(2./opts.x_power) 1806 y_ext = numpy.exp( numpy.array(Xext * Beta_hat) ) 1807 1808 1809 else: 1810 # for error function, we use the results of the gaussian fit to the non-cumulative histogram 1811 n_ext = 50 1812 sigmasq = -1./(2.*Beta_hat[1,0]) 1813 1814 amp = numpy.exp(Beta_hat[0,0]) * numpy.sqrt(2*numpy.pi*sigmasq) / ( numpy.power(bins[1], 1./opts.x_power) - numpy.power(bins[0], 1./opts.x_power) ) #numpy.power(ds*2, 1./opts.x_power)[0] # (ds* 2.) 1815 if opts.lin_x: 1816 x_ext = numpy.linspace( min_x_fit, fg_stats.max(), num = n_ext ) 1817 else: 1818 x_ext = numpy.logspace( numpy.log10(min_x_fit), numpy.log10( fg_stats.max() ), num = n_ext ) 1819 1820 y_ext = amp * special.erfc( numpy.power(x_ext, 1./opts.x_power)/numpy.sqrt(2*sigmasq) ) / (2. * T_fg) 1821 1822 # plot the extrapolated line 1823 pylab.plot( x_ext, y_ext, 'g--', linewidth = 2, zorder = 5, label = 'Extrapolated Bkg.' ) 1824 1825 # rest xmmin/xmax 1826 if x_ext.any(): 1827 xmin = min(min(x_ext),xmin) 1828 xmax = max(max(x_ext),xmax) 1829 # reset ymin/ymax 1830 if y_ext.any(): 1831 ymin = min(y_ext.min(),ymin) 1832 ymax = max(y_ext.max(),ymax) 1833 1834 1835 # 1836 # plot poisson pdf 1837 # 1838 if opts.plot_poisson_pdf or opts.plot_significance_bands: 1839 if opts.verbose: 1840 print "\tcomputing poisson pdf..." 1841 xrange = 101 1842 yrange = 2*xrange 1843 # add added_bkg points to bkg stats 1844 bkg_stats = numpy.array( bkg_stats.tolist() + [x[0] for x in added_bkg] ) 1845 bkg_cumnum = numpy.array( bkg_cumnum.tolist() + [x[1] for x in added_bkg] ) 1846 1847 # if extrapolating, use the extrpolation beyond the largeest measured background point 1848 if opts.extrapolate is not None and plot_fg and x_ext.any(): 1849 xcoords = numpy.linspace(bkg_stats.min(), max(x_ext.max(), bkg_stats.max()), xrange) 1850 else: 1851 xcoords = numpy.linspace(bkg_stats.min(), bkg_stats.max(), xrange) 1852 ycoords = numpy.logspace(0, numpy.log10(ymax+0.5), yrange) 1853 1854 xarray = numpy.zeros((xrange,yrange), dtype=float) 1855 yarray = numpy.zeros((xrange,yrange), dtype=float) 1856 zarray = numpy.zeros((xrange,yrange), dtype=float) 1857 1858 for xi,x in enumerate(xcoords): 1859 if x <= bkg_stats.max(): 1860 # lmbda is the cumnum at the closest snr to (the right of) of x in the background 1861 lmbda = bkg_cumnum[pylab.find( bkg_stats >= x )[0]] 1862 # if x greater than bkg_stats.max, must mean we have extrapolated and we are in the region of extrapolation 1863 elif opts.extrapolate is not None and opts.extrapolate.upper() == "POWER": 1864 lmbda = numpy.exp( (Beta_hat[0,0] + Beta_hat[1,0]*numpy.log(x)) ) * T_fg 1865 elif opts.extrapolate is not None and opts.extrapolate.upper() == "GAUSSIAN": 1866 lmbda = numpy.exp( (Beta_hat[0,0] + Beta_hat[1,0]*x**(2./opts.x_power)) ) * T_fg 1867 elif opts.extrapolate is not None and opts.extrapolate.upper() == "ERFC": 1868 lmbda = amp * special.erfc( numpy.power(x,1./opts.x_power)/numpy.sqrt(2*sigmasq) ) / 2. 1869 for yi,y in enumerate(ycoords): 1870 xarray[xi,yi] += x 1871 yarray[xi,yi] += y 1872 p = poissonPDFlog10( y, lmbda ) 1873 zarray[xi, yi] += p 1874 1875 # replace -infs with smallest non-inf value of z - 1 1876 minz = min([z for z in zarray.flatten() if z != -numpy.inf]) 1877 for xi in range(len(xcoords)): 1878 for yi in range(len(ycoords)): 1879 if zarray[xi,yi] == -numpy.inf: 1880 zarray[xi,yi] = minz - 1 1881 1882 # rescale the y-coordinates by T_fg 1883 yarray = yarray / T_fg 1884 1885 # plot it 1886 if opts.plot_poisson_pdf and 'NaN' not in zarray: 1887 hxplt = pylab.hexbin(xarray.flatten(), yarray.flatten(), C = zarray.flatten(), gridsize=xrange-1, xscale='linear', yscale='log', edgecolors = 'none', vmin=-10.) 1888 cb = pylab.colorbar(hxplt, format = pylab.FuncFormatter(ColorFormatter)) 1889 cb.ax.set_ylabel( 'Probability Density' ) 1890 1891 # 1892 # plot contours 1893 # 1894 if opts.plot_significance_bands and opts.max_sigma > 0.: 1895 if opts.verbose: 1896 print "\tplotting contours..." 1897 sigmas = sorted([numpy.log10(special.erfc((n+1)/numpy.sqrt(2))/2.) for n in range(opts.max_sigma)]) 1898 if opts.plot_poisson_pdf: 1899 contplt = pylab.contour(xarray, yarray, zarray, sigmas, colors = 'k', linestyles='dashdot', label='N-\sigma', zorder = 1) 1900 cb.add_lines(contplt) 1901 else: 1902 fix_zarray(zarray,sigmas) 1903 pylab.contourf(xarray, yarray, zarray, sigmas+[0.0], cmap = pylab.cm.gray_r, alpha = 0.7) 1904 #for n in range(len(sigmas)-1): 1905 # pylab.contourf(xarray, yarray, zarray, [sorted(sigmas)[n], sorted(sigmas)[n+1]], cmap = pylab.cm.gray_r, label = 'N-\sigma', alpha = .2*n+.1) 1906 1907 # plot a line to show where the extrapolated data begins to be used 1908 if plot_fg and opts.extrapolate is not None: 1909 pylab.plot( [bkg_stats.max(), bkg_stats.max()], [fg_cumrate.min(), ymax*10**0.5], 'k-', linewidth = 2, zorder = 3 ) 1910 1911 1912 # 1913 # finalize plot settings 1914 # 1915 pylab.grid() 1916 pylab.legend(loc = 'lower left', scatterpoints = 1, numpoints = 3) 1917 pylab.xlabel( stat_label, fontsize=16 ) 1918 # reset xlabel ticks 1919 for xl in pylab.gca().get_xticklabels(): 1920 xl.set_size(16) 1921 pylab.ylabel( "Cumulative Rate (%s$^{-1}$)" % opts.time_units, fontsize=16 ) 1922 # reset ylabel ticks 1923 for yl in pylab.gca().get_yticklabels(): 1924 yl.set_size(16) 1925 if opts.title: 1926 if opts.param_name is not None: 1927 t = 'Cumulative Histogram %s %s' % (param_label.title(), param_parser.param_range_by_group(bkg_category.param_group))#, match_category.veto_cat.replace('_',' ').title()) 1928 else: 1929 t = 'Cumulative Histogram' 1930 t = 'Cumulative Histogram of H1L1 triggers with $3.48\mathrm{M_\odot} \le \mathcal{M} < 7.40\mathrm{M_\odot}$' 1931 pylab.title(t) 1932 1933 # set axes limits and scale 1934 if plot_fg: 1935 xmin = min(fg_stats.min(), bkg_stats.min()) 1936 else: 1937 xmin = bkg_stats.min() 1938 if not opts.lin_x: 1939 pylab.gca().semilogx() 1940 xmax = xmax*10**(0.3) 1941 else: 1942 xmax = xmax+.1*xmax 1943 if not opts.lin_y: 1944 pylab.gca().semilogy() 1945 ymin, ymax = ymin*10**(-0.5), ymax*10**(0.5) 1946 else: 1947 ymin, ymax = ymin - .1*ymin, ymax + .1*ymax 1948 # overrule with input options 1949 if opts.xmin is not None: 1950 xmin = opts.xmin 1951 if opts.xmax is not None: 1952 xmax = opts.xmax 1953 if opts.ymin is not None: 1954 ymin = opts.ymin 1955 if opts.ymax is not None: 1956 ymax = opts.ymax 1957 1958 pylab.xlim( xmin, xmax ) 1959 pylab.ylim( ymin, ymax ) 1960 1961 # store plot info 1962 plot_description = 'cumulative_F%i' % ( len(fnameList) ) 1963 name = InspiralUtils.set_figure_tag( plot_description, datatype_plotted = fg_datatype.upper(), open_box = 'all_data' in fg_datatype or 'exclude_play' in fg_datatype) 1964 fname = InspiralUtils.set_figure_name(InspiralUtilsOpts, name) 1965 #fname = re.sub('.png', '.pdf', fname) 1966 fname_thumb = InspiralUtils.savefig_pylal( filename=fname, dpi=opts.dpi ) 1967 fnameList.append(fname) 1968 tagList.append(name) 1969 1970 # get loudest event info for this plot group and add to comments 1971 comments += 'On instruments: %s Coinc. ifos: %s Veto Category: %s' %(''.join(sorted(match_category.on_instruments)), ''.join(sorted(match_category.ifos)), match_category.veto_cat) 1972 if opts.param_name is not None: 1973 comments += ' Param-group: %s' %(param_parser.param_range_by_group(match_category.param_group)) 1974 comments += ' \n' 1975 comments += "Background livetime: %.2f %s \n" % (T_bkg, opts.time_units) 1976 if opts.add_background_file: 1977 comments+= "Added background livetime: %.2f %s \n" % (T_add_bkg, opts.time_units) 1978 comments += "Loudest background event%s: \n" % (len(loudest_bkg_events) > 1 and 's' or '') 1979 comments += '<table cellpadding="5", border="1">\n' 1980 comments += '<tr><th>End Time</th><th>UTC</th><th>ifos</th><th>Total Mass</th><th>%s</th></tr>\n' % stat_label.replace(r'$', '') 1981 for le_id in loudest_bkg_events: 1982 le = data.data_index[le_id] 1983 comments += '<tr><td>%.2f</td><td>%s</td><td>%s</td><td>%.2f</td><td>%.2f</td></tr>' %( le.data.get_value('end_time'), printutils.format_end_time_in_utc(int(le.data.get_value('end_time'))), le.data.get_value('ifos'), le.data.get_value('mtotal'), le.data.get_value('stat') ) 1984 comments += '</table>\n' 1985 if plot_fg: 1986 comments += " %s livetime: %.3f %s \n" % (fg_datatype.replace('_', '-').title(), T_fg, opts.time_units) 1987 comments += "Loudest %s event%s: \n" % (fg_datatype, len(loudest_events) > 1 and 's' or '') 1988 comments += '<table cellpadding="5", border="1">\n' 1989 comments += '<tr><th>End Time</th><th>UTC</th><th>ifos</th><th>Total Mass</th><th>%s</th><th>Measured FAR</th>%s<th>Probability given background</tr>\n' % (stat_label.replace(r'$', ''), opts.extrapolate is not None and '<th>Estimated FAR</th>' or '') 1990 for le_id in loudest_events: 1991 le = data.data_index[le_id] 1992 if opts.add_background_file and le.data.stat >= added_bkg[0][0]: 1993 T_far = T_add_bkg 1994 else: 1995 T_far = T_bkg 1996 far = (len(bkg_stats)-bisect.bisect_left(bkg_stats, le.data.stat))/T_far 1997 if far == 0: 1998 far = '< 1/%.2f %s-1' % ( T_far, opts.time_units ) 1999 else: 2000 far = '1/%.2e %s-1' % (1./far, opts.time_units) 2001 comments += '<tr><td>%.2f</td><td>%s</td><td>%s</td><td>%.2f</td><td>%.2f</td><td>%s</td>' %( le.data.get_value('end_time'), printutils.format_end_time_in_utc(int(le.data.get_value('end_time'))), le.data.get_value('ifos'), le.data.get_value('mtotal'), le.data.get_value('stat'), far ) 2002 if opts.extrapolate is not None: 2003 if opts.extrapolate.upper() == "POWER": 2004 far_est = numpy.exp( Beta_hat[0,0] + Beta_hat[1,0]*numpy.log(le.data.stat) ) 2005 elif opts.extrapolate.upper() == "GAUSSIAN": 2006 far_est = numpy.exp( Beta_hat[0,0] + Beta_hat[1,0]*le.data.stat**(2./opts.x_power) ) 2007 else: 2008 far_est = amp * special.erfc( numpy.power(le.data.stat, 1./opts.x_power)/numpy.sqrt(2*sigmasq) ) / (2. * T_fg) 2009 comments += '<td>1/%.2e %s-1</td>' % ( 1./far_est, opts.time_units ) 2010 x = le.data.stat 2011 if x <= bkg_stats.max(): 2012 lmbda = bkg_cumnum[pylab.find( bkg_stats >= x )[0]] 2013 comments += "<td>%.2e</td>" % numpy.power(10, poissonPDFlog10( 1, lmbda )) 2014 else: 2015 lmbda = bkg_cumnum[-1] 2016 comments += "<td>< %.2e</td>" % numpy.power(10, poissonPDFlog10( 1, lmbda )) 2017 comments += '</tr>' 2018 comments += '</table>\n' 2019 if opts.extrapolate is not None: 2020 if opts.extrapolate.upper() == 'POWER': 2021 comments += " Fitted y = AxB Results: " 2022 comments += "A = %.2e B = %.2f " % (numpy.exp(Beta_hat[0,0]), Beta_hat[1,0]) 2023 elif opts.extrapolate.upper() == 'GAUSSIAN': 2024 comments += " Fitted y = Aexp(-x2/2σ2)/sqrt(2πσ2) Results: " 2025 sigmasq = -1./(2.*Beta_hat[1,0]) 2026 amp = numpy.exp(Beta_hat[0,0] + numpy.log(2*numpy.pi*sigmasq)/2.) 2027 comments += "σ2 = %.2f A = %.2f " %( sigmasq, amp ) 2028 comments += "Extrapolated from: β0 = %.2f β1 = %.2f " %( Beta_hat[0,0], Beta_hat[1,0] ) 2029 else: 2030 comments += " Fitted y = Aexp(-x2/2σ2)/sqrt(2πσ2) to non-cumulative plot. Results: " 2031 comments += "σ2 = %.2f A = %.2f " %( sigmasq, amp ) 2032 comments += " Goodness of fit: " 2033 comments += "χ2 = %.2f Reduced χ2 = %.2f" %( noncum_chisq, noncum_chisq / noncum_Ndof ) 2034 2035 comments += '<hr />\n' 2036 2037 if opts.verbose: 2038 print >> sys.stdout, "Writing html file and cache." 2039 2040 # create html of closed box plots 2041 comment = comments 2042 plothtml = InspiralUtils.write_html_output( InspiralUtilsOpts, input_args, fnameList, tagList, comment = comments, add_box_flag = True ) 2043 InspiralUtils.write_cache_output( InspiralUtilsOpts, plothtml, fnameList ) 2044 2045 2046 if __name__ == "__main__": 2047 main() 2048

Source Code for Module pylal.lalapps_cbc_plotrates