1  from __future__ import division 
  2  import numpy 
  3  import bisect 
  4  import sys 
  5   
  6  from glue.ligolw import lsctables 
  7  from glue.ligolw import dbtables 
  8  from lal import rate 
  9   
 10   
 12      ''' 
 13      This function marginalizes the loudest event likelihood over unknown 
 14      Monte Carlo errors, assumed to be independent between each experiment. 
 15      ''' 
 16      if mcerrs is None: 
 17          mcerrs = [0]*len(VTs) 
 18   
 19       
 20       
 21      likely = 1 
 22      for vA,dvA,mc in zip(VTs,lambs,mcerrs): 
 23          if mc == 0: 
 24               
 25               
 26              likely *= (1+mu*vA*dvA)*numpy.exp(-mu*vA) 
 27          else: 
 28               
 29               
 30              k = (vA/mc)**2  
 31              likely *= (1+mu*vA*(1/k+dvA))*(1+mu*vA/k)**(-(k+1)) 
 32   
 33      return likely 
  34   
 35   
 37      ''' 
 38      This function marginalizes the loudest event likelihood over unknown 
 39      Monte Carlo and calibration errors. The vector VTs is the sensitive 
 40      volumes for independent searches and lambs is the vector of loudest 
 41      event likelihood. The statistical errors are assumed to be independent 
 42      between each experiment while the calibration errors are applied 
 43      the same in each experiment. 
 44      ''' 
 45      if calerr == 0: 
 46          return margLikelihoodMonteCarlo(VTs,lambs,mu,mcerrs) 
 47   
 48      std = calerr 
 49      mean = 0  
 50   
 51      fracerrs = numpy.linspace(0.33,3,1e2)  
 52      errdist = numpy.exp(-(numpy.log(fracerrs)-mean)**2/(2*std**2))/(fracerrs*std)  
 53      errdist /= errdist.sum()  
 54   
 55      likely = sum([ pd*margLikelihoodMonteCarlo(delta*VTs,lambs,mu,mcerrs) for delta, pd in zip(fracerrs,errdist)])  
 56   
 57      return likely 
  58   
 59   
 61      ''' 
 62      Returns an array of elements of the integrand dP = p(mu) dmu 
 63      for a density p(mu) defined at sample values mu ; samples need 
 64      not be equally spaced.  Uses a simple trapezium rule. 
 65      Number of dP elements is 1 - (number of mu samples). 
 66      ''' 
 67      dmu = mu[1:] - mu[:-1] 
 68      bin_mean = (pdf[1:] + pdf[:-1]) /2 
 69      return dmu * bin_mean 
  70   
 71   
 73      """ 
 74      Takes a function pofmu defined at rate sample values mu and 
 75      normalizes it to be a suitable pdf. Both mu and pofmu must be 
 76      arrays or lists of the same length. 
 77      """ 
 78      if min(pofmu) < 0: 
 79          raise ValueError, "Probabilities cannot be negative, don't \ 
 80          ask me to normalize a function with negative values!" 
 81      if min(mu) < 0:   
 82          raise ValueError, "Rates cannot be negative, don't \ 
 83          ask me to normalize a function over a negative domain!" 
 84   
 85      dp = integral_element(mu, pofmu) 
 86      return mu, pofmu/sum(dp) 
  87   
 88   
 90      """ 
 91      Returns the upper limit mu_high of confidence level alpha for a 
 92      posterior distribution post on the given parameter mu. 
 93      The posterior need not be normalized. 
 94      """ 
 95      if 0 < alpha < 1: 
 96          dp = integral_element(mu_in, post) 
 97          high_idx = bisect.bisect_left( dp.cumsum()/dp.sum(), alpha ) 
 98           
 99           
100           
101          mu_high = mu_in[high_idx] 
102      elif alpha == 1: 
103          mu_high = numpy.max(mu_in[post > 0]) 
104      else: 
105          raise ValueError, "Confidence level must be in (0,1]." 
106   
107      return mu_high 
 108   
109   
111      """ 
112      Returns the lower limit mu_low of confidence level alpha for a 
113      posterior distribution post on the given parameter mu. 
114      The posterior need not be normalized. 
115      """ 
116      if 0 < alpha < 1: 
117          dp = integral_element(mu_in, post) 
118          low_idx = bisect.bisect_right( dp.cumsum()/dp.sum(), 1-alpha ) 
119           
120           
121           
122          mu_low = mu_in[low_idx] 
123      elif alpha == 1: 
124          mu_low = numpy.min(mu_in[post > 0]) 
125      else: 
126          raise ValueError, "Confidence level must be in (0,1]." 
127   
128      return mu_low 
 129   
130   
132      ''' 
133      Returns the minimal-width confidence interval [mu_low, mu_high] of 
134      confidence level alpha for a posterior distribution post on the parameter mu. 
135      ''' 
136      if not 0 < alpha < 1: 
137          raise ValueError, "Confidence level must be in (0,1)." 
138   
139       
140      alpha_step = 0.01 
141   
142       
143      mu_low = numpy.min(mu) 
144      mu_high = numpy.max(mu) 
145   
146       
147      for ai in numpy.arange( 0, 1-alpha, alpha_step ): 
148          ml = compute_lower_limit( mu, post, 1 - ai ) 
149          mh = compute_upper_limit( mu, post, alpha + ai) 
150          if mh - ml < mu_high - mu_low: 
151              mu_low = ml 
152              mu_high = mh 
153   
154      return mu_low, mu_high 
 155   
156   
158      ''' 
159      Integrates a pdf over mu taking only bins where 
160      the mean over the bin is above a given threshold 
161      This gives the coverage of the HPD interval for 
162      the given threshold. 
163      ''' 
164      dp = integral_element(mu, pdf) 
165      bin_mean = (pdf[1:] + pdf[:-1]) /2 
166   
167      return dp[bin_mean > thresh].sum() 
 168   
169   
171      ''' 
172      For a PDF post over samples mu_in, find a density 
173      threshold such that the region having higher density 
174      has coverage of at least alpha, and less than alpha 
175      plus a given tolerance. 
176      ''' 
177      norm_post = normalize_pdf(mu_in, post) 
178       
179      p_minus = 0.0 
180      p_plus = max(post) 
181      while abs(hpd_coverage(mu_in, post, p_minus)-hpd_coverage(mu_in, post, p_plus)) >= tol: 
182          test = (p_minus + p_plus) /2 
183          if hpd_coverage(mu_in, post, test) >= alpha:  
184              p_minus = p_test 
185          else:                                         
186              p_plus = p_test 
187       
188       
189       
190   
191      return p_minus 
 192   
193   
195      ''' 
196      Returns the minimum and maximum rate values of the HPD 
197      (Highest Posterior Density) credible interval for a posterior 
198      post defined at the sample values mu_in.  Samples need not be 
199      uniformly spaced and posterior need not be normalized. 
200   
201      Will not return a correct credible interval if the posterior 
202      is multimodal and the correct interval is not contiguous; 
203      in this case will over-cover by including the whole range from 
204      minimum to maximum mu. 
205      ''' 
206      if alpha == 1: 
207          nonzero_samples = mu_in[post > 0] 
208          mu_low = numpy.min(nonzero_samples) 
209          mu_high = numpy.max(nonzero_samples) 
210      elif 0 < alpha < 1: 
211           
212           
213          pthresh = hpd_threshold(mu_in, post, alpha, tol = tolerance) 
214          samples_over_threshold = mu_in[post > pthresh] 
215          mu_low = numpy.min(samples_over_threshold) 
216          mu_high = numpy.max(samples_over_threshold) 
217   
218      return mu_low, mu_high 
 219   
220   
222       
223      if logbins: 
224          logd = numpy.log(dbins) 
225          dlogd = logd[1:] - logd[:-1] 
226          dreps = numpy.exp( (numpy.log(dbins[1:]) + numpy.log(dbins[:-1])) / 2. )  
227          vol = numpy.sum(4. * numpy.pi * (dreps ** 3.) * eff * dlogd) 
228          verr = numpy.sqrt( numpy.sum((4. * numpy.pi * (dreps ** 3.) * err * dlogd) ** 2.) )  
229      else: 
230          dd = dbins[1:] - dbins[:-1] 
231          dreps = (dbins[1:] + dbins[:-1]) / 2.  
232          vol = numpy.sum(4. * numpy.pi * (dreps ** 2.) * eff * dd ) 
233          verr = numpy.sqrt( numpy.sum((4. * numpy.pi * (dreps ** 2.) * err * dd) ** 2.) )  
234   
235      return vol, verr 
 236   
237   
239      ''' 
240      Compute the efficiency as a function of distance for the given sets of found 
241      and missed injection distances. 
242      Note that injections that do not fit into any dbin get lost :(. 
243      ''' 
244      efficiency = numpy.zeros(len(dbins) - 1) 
245      error = numpy.zeros(len(dbins) - 1) 
246      for j, dlow in enumerate(dbins[:-1]): 
247          dhigh = dbins[j + 1] 
248          found = numpy.sum( (dlow <= f_dist) * (f_dist < dhigh) ) 
249          missed = numpy.sum( (dlow <= m_dist) * (m_dist < dhigh) ) 
250          if found + missed == 0: missed = 1.0      
251          efficiency[j] = found / (found + missed)  
252          error[j] = numpy.sqrt( efficiency[j] * (1 - efficiency[j]) / (found + missed) ) 
253   
254      return efficiency, error 
 255   
256   
258   
259      if len(found) == 0:  
260          return numpy.zeros(len(dbins)-1), numpy.zeros(len(dbins)-1), 0, 0 
261   
262       
263      f_dist = numpy.array([l.distance for l in found]) 
264      m_dist = numpy.array([l.distance for l in missed]) 
265   
266       
267      eff, err = compute_efficiency(f_dist, m_dist, dbins) 
268      vol, verr = integrate_efficiency(dbins, eff, err) 
269   
270      return eff, err, vol, verr 
 271   
272   
273 -def volume_montecarlo(found, missed, distribution_param, distribution, limits_param, max_param=None, min_param=None): 
 274      ''' 
275      Compute the sensitive volume and standard error using a direct Monte Carlo integral 
276   
277      * distribution_param, D: parameter of the injections used to generate a distribution over distance 
278        - may be 'distance', 'chirp_distance" 
279      * distribution: form of the distribution over the parameter 
280        - 'log' (uniform in log D), 'uniform' (uniform in D), 'distancesquared' (uniform in D**2), 
281          'volume' (uniform in D***3) 
282        - It is assumed that injections were carried out over a range of D such that sensitive 
283          volume due to signals at distances < D_min is negligible and efficiency at distances 
284          > D_max is negligibly small 
285      * limits_param, Dlim: parameter specifying limits in which injections were made 
286        - may be 'distance', 'chirp_distance' 
287      * max_param: maximum value of Dlim out to which injections were made, if None the maximum  
288        value among all found and missed injections will be used 
289      * min_param: minimum value of Dlim at which injections were made - needed to normalize 
290        the log distance integral correctly.  If None, for the log distribution the minimum 
291        value among all found and missed injections will be used 
292      ''' 
293      d_power = { 
294          'log'             : 3., 
295          'uniform'         : 2., 
296          'distancesquared' : 1., 
297          'volume'          : 0. 
298      }[distribution] 
299      mchirp_power = { 
300          'log'             : 0., 
301          'uniform'         : 5. / 6., 
302          'distancesquared' : 5. / 3., 
303          'volume'          : 5. / 2. 
304      }[distribution] 
305   
306      found_d = numpy.array([inj.distance for inj in found]) 
307      missed_d = numpy.array([inj.distance for inj in missed]) 
308   
309       
310      if limits_param == 'chirp_distance': 
311          mchirp_standard_bns = 1.4 * (2. ** (-1. / 5.)) 
312          found_mchirp = numpy.array([inj.mchirp for inj in found]) 
313          missed_mchirp = numpy.array([inj.mchirp for inj in missed]) 
314          all_mchirp = numpy.concatenate((found_mchirp, missed_mchirp)) 
315          max_mchirp = numpy.max(all_mchirp) 
316          if max_param is not None: 
317               
318              max_distance = max_param * (max_mchirp / mchirp_standard_bns) ** (5. / 6.) 
319      elif limits_param == 'distance': 
320          max_distance = max_param 
321      else: raise NotImplementedError("%s is not a recognized parameter" % limits_param) 
322   
323       
324      if max_param == None: 
325          max_distance = max(numpy.max(found_d), numpy.max(missed_d)) 
326   
327       
328      montecarlo_vtot = (4. / 3.) * numpy.pi * max_distance ** 3. 
329   
330       
331      if distribution_param == 'distance': 
332          found_weights = found_d ** d_power 
333          missed_weights = missed_d ** d_power 
334      elif distribution_param == 'chirp_distance': 
335           
336           
337          found_weights = found_d ** d_power * \ 
338                          found_mchirp ** mchirp_power 
339          missed_weights = missed_d ** d_power * \ 
340                           missed_mchirp ** mchirp_power 
341      else: raise NotImplementedError("%s is not a recognized distance parameter" % distance_param) 
342   
343      all_weights = numpy.concatenate((found_weights, missed_weights)) 
344   
345       
346       
347      mc_weight_samples = numpy.concatenate((found_weights, 0 * missed_weights)) 
348      mc_sum = sum(mc_weight_samples) 
349   
350      if limits_param == 'distance': 
351          mc_norm = sum(all_weights) 
352      elif limits_param == 'chirp_distance': 
353           
354           
355           
356          mc_norm = sum(all_weights * (max_mchirp / all_mchirp) ** (5. / 2.)) 
357   
358       
359      mc_prefactor = montecarlo_vtot / mc_norm 
360   
361       
362      if limits_param == 'distance': 
363          Ninj = len(mc_weight_samples) 
364      elif limits_param == 'chirp_distance': 
365           
366           
367          if distribution == 'log': 
368               
369              if min_param is not None: 
370                  min_distance = min_param * (numpy.min(all_mchirp) / mchirp_standard_bns) ** (5. / 6.) 
371              else: 
372                  min_distance = min(numpy.min(found_d), numpy.min(missed_d)) 
373              logrange = numpy.log(max_distance / min_distance) 
374              Ninj = len(mc_weight_samples) + (5. / 6.) * sum(numpy.log(max_mchirp / all_mchirp) / logrange) 
375          else: 
376              Ninj = sum((max_mchirp / all_mchirp) ** mchirp_power) 
377   
378       
379      mc_sample_variance = sum(mc_weight_samples ** 2.) / Ninj - (mc_sum / Ninj) ** 2. 
380   
381       
382       
383      return mc_prefactor * mc_sum, mc_prefactor * (Ninj * mc_sample_variance) ** 0.5 
 384   
385   
387      ''' 
388      For a given set of injections (sim_inspiral rows), return the subset 
389      of injections that fall within the given mass range. 
390      ''' 
391      if bin_type == "Mass1_Mass2": 
392          m1bins = numpy.concatenate((mbins.lower()[0],numpy.array([mbins.upper()[0][-1]]))) 
393          m1lo = m1bins[bin_num] 
394          m1hi = m1bins[bin_num+1] 
395          m2bins = numpy.concatenate((mbins.lower()[1],numpy.array([mbins.upper()[1][-1]]))) 
396          m2lo = m2bins[bin_num2] 
397          m2hi = m2bins[bin_num2+1] 
398          newinjs = [l for l in injs if ( (m1lo<= l.mass1 <m1hi and m2lo<= l.mass2 <m2hi) or (m1lo<= l.mass2 <m1hi and m2lo<= l.mass1 <m2hi))] 
399          return newinjs 
400   
401      mbins = numpy.concatenate((mbins.lower()[0],numpy.array([mbins.upper()[0][-1]]))) 
402      mlow = mbins[bin_num] 
403      mhigh = mbins[bin_num+1] 
404      if bin_type == "Chirp_Mass": 
405          newinjs = [l for l in injs if (mlow <= l.mchirp < mhigh)] 
406      elif bin_type == "Total_Mass": 
407          newinjs = [l for l in injs if (mlow <= l.mass1+l.mass2 < mhigh)] 
408      elif bin_type == "Component_Mass":  
409          newinjs = [l for l in injs if (mlow <= l.mass1 < mhigh)] 
410      elif bin_type == "BNS_BBH": 
411          if bin_num == 0 or bin_num == 2:  
412              newinjs = [l for l in injs if (mlow <= l.mass1 < mhigh and mlow <= l.mass2 < mhigh)] 
413          else: 
414              newinjs = [l for l in injs if (mbins[0] <= l.mass1 < mbins[1] and mbins[2] <= l.mass2 < mbins[3])]  
415              newinjs += [l for l in injs if (mbins[0] <= l.mass2 < mbins[1] and mbins[2] <= l.mass1 < mbins[3])]  
416   
417      return newinjs 
 418   
419   
420 -def compute_volume_vs_mass(found, missed, mass_bins, bin_type, dbins=None, 
421                             distribution_param=None, distribution=None, limits_param=None, 
422                             max_param=None, min_param=None): 
 423      """ 
424      Compute the average luminosity an experiment was sensitive to given the sets 
425      of found and missed injections and assuming luminosity is uniformly distributed 
426      in space. 
427   
428      If distance_param and distance_distribution are not None, use an unbinned 
429      Monte Carlo integral (which optionally takes max_distance and min_distance 
430      parameters) for the volume and error 
431      Otherwise use a simple efficiency * distance**2 binned integral 
432      In either case use distance bins to return the efficiency in each bin 
433      """ 
434       
435      if distribution_param is not None and distribution is not None and limits_param is not None: 
436          mc_unbinned = True 
437      else: 
438          mc_unbinned = False 
439   
440       
441      volArray = rate.BinnedArray(mass_bins) 
442      vol2Array = rate.BinnedArray(mass_bins) 
443   
444       
445      foundArray = rate.BinnedArray(mass_bins) 
446      missedArray = rate.BinnedArray(mass_bins) 
447   
448       
449      effvmass = [] 
450      errvmass = [] 
451   
452      if bin_type == "Mass1_Mass2":  
453          for j,mc1 in enumerate(mass_bins.centres()[0]): 
454              for k,mc2 in enumerate(mass_bins.centres()[1]): 
455   
456                   
457                  newfound = filter_injections_by_mass(found, mass_bins, j, bin_type, k) 
458                  newmissed = filter_injections_by_mass(missed, mass_bins, j, bin_type, k) 
459   
460                  foundArray[(mc1,mc2)] = len(newfound) 
461                  missedArray[(mc1,mc2)] = len(newmissed) 
462   
463                   
464                  meaneff, efferr, meanvol, volerr = mean_efficiency_volume(newfound, newmissed, dbins) 
465                  effvmass.append(meaneff) 
466                  errvmass.append(efferr) 
467                  if mc_unbinned: 
468                      meanvol, volerr = volume_montecarlo(newfound, newmissed, distribution_param, 
469                                              distribution, limits_param, max_param, min_param) 
470                  volArray[(mc1,mc2)] = meanvol 
471                  vol2Array[(mc1,mc2)] = volerr 
472   
473          return volArray, vol2Array, foundArray, missedArray, effvmass, errvmass 
474   
475   
476      for j,mc in enumerate(mass_bins.centres()[0]): 
477   
478           
479          newfound = filter_injections_by_mass(found, mass_bins, j, bin_type) 
480          newmissed = filter_injections_by_mass(missed, mass_bins, j, bin_type) 
481   
482          foundArray[(mc,)] = len(newfound) 
483          missedArray[(mc,)] = len(newmissed) 
484   
485           
486          meaneff, efferr, meanvol, volerr = mean_efficiency_volume(newfound, newmissed, dbins) 
487          effvmass.append(meaneff) 
488          errvmass.append(efferr) 
489           
490          if mc_unbinned: 
491              meanvol, volerr = volume_montecarlo(newfound, newmissed, distribution_param, 
492                                      distribution, limits_param, max_param, min_param) 
493          volArray[(mc,)] = meanvol 
494          vol2Array[(mc,)] = volerr 
495   
496      return volArray, vol2Array, foundArray, missedArray, effvmass, errvmass 
 497   
498   
500      ''' 
501      Performs a linear least squares to log(vols) as a function of x. 
502      ''' 
503      if numpy.min(vols) == 0: 
504          print >> sys.stderr, "Warning: cannot fit log volume derivative, one or more volumes are zero!" 
505          print >> sys.stderr, vols 
506          return (0,0) 
507   
508      coeffs, resids, rank, svs, rcond = numpy.polyfit(x, numpy.log(vols), 1, full=True) 
509      if coeffs[0] < 0:  
510          print >> sys.stderr, "Warning: Derivative fit resulted in Lambda =", coeffs[0] 
511          coeffs[0] = 0 
512          print >> sys.stderr, "The value Lambda = 0 was substituted" 
513   
514      return coeffs 
 515   
516   
517 -def get_loudest_event(connection, coinc_table="coinc_inspiral", datatype="exclude_play"): 
 518   
519      far_threshold_query = """ 
520  SELECT coinc_event.instruments, MIN(combined_far) FROM %s JOIN coinc_event ON (%s.coinc_event_id == coinc_event.coinc_event_id) JOIN experiment_map ON (coinc_event.coinc_event_id == experiment_map.coinc_event_id) JOIN experiment_summary ON ( experiment_summary.experiment_summ_id == experiment_map.experiment_summ_id) WHERE experiment_summary.datatype == "%s" GROUP BY coinc_event.instruments; 
521  """ % (coinc_table, coinc_table, datatype) 
522   
523      for inst, far in connection.cursor().execute(far_threshold_query): 
524          inst = frozenset(lsctables.instrument_set_from_ifos(inst)) 
525          yield inst, far 
526   
527      return 
 528