Package pylal :: Module trigger_fits
[hide private]
[frames] | no frames]

Source Code for Module pylal.trigger_fits

  1  from __future__ import division 
  2   
  3  # Copyright T. Dent 2015 (thomas.dent@aei.mpg.de) 
  4  # 
  5  # This program is free software; you can redistribute it and/or modify it 
  6  # under the terms of the GNU General Public License as published by the 
  7  # Free Software Foundation; either version 2 of the License, or (at your 
  8  # option) any later version. 
  9  # 
 10  # This program is distributed in the hope that it will be useful, but 
 11  # WITHOUT ANY WARRANTY; without even the implied warranty of 
 12  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General 
 13  # Public License for more details. 
 14   
 15  """ 
 16  For some set of values above a threshold, e.g. trigger SNRs, the functions 
 17  in this module perform maximum likelihood fits with 1-sigma uncertainties 
 18  to various simple functional forms of PDF, all normalized to 1. 
 19  You can also obtain the fitted function and its (inverse) CDF and perform 
 20  a Kolmogorov-Smirnov test. 
 21   
 22  Usage: 
 23  # call the fit function directly if the threshold is known 
 24  alpha, sigma_alpha = fit_exponential(snrs, 5.5) 
 25   
 26  # apply a threshold explicitly 
 27  alpha, sigma_alpha = fit_above_thresh('exponential', snrs, thresh=6.25) 
 28   
 29  # let the code work out the threshold from the smallest value via the default thresh=None 
 30  alpha, sigma_alpha = fit_above_thresh('exponential', snrs) 
 31   
 32  # or only fit the largest N values, i.e. tail fitting 
 33  thresh = tail_threshold(snrs, N=500) 
 34  alpha, sigma_alpha = fit_above_thresh('exponential', snrs, thresh) 
 35   
 36  # obtain the fitted function directly 
 37  xvals = numpy.xrange(5.5, 10.5, 20) 
 38  exponential_fit = expfit(xvals, alpha, thresh) 
 39   
 40  # or access function by name 
 41  exponential_fit_1 = fit_fn('exponential', xvals, alpha, thresh) 
 42   
 43  # get the KS test statistic and p-value - see scipy.stats.kstest 
 44  ks_stat, ks_pval = KS_test('exponential', snrs, alpha, thresh) 
 45  """ 
 46   
 47  import numpy 
 48  from scipy.stats import kstest 
 49   
50 -def fit_exponential(vals, thresh):
51 ''' 52 Maximum likelihood fit for the coefficient alpha for a distribution of 53 discrete values p(x) = alpha exp(-alpha (x-x_t)) above a threshold x_t. 54 55 vals: sequence of real numbers none of which lies below thresh 56 thresh: threshold used in the fitting formula 57 ''' 58 vals = numpy.array(vals) 59 if min(vals) < thresh: 60 raise RuntimeError("Values to be fit must all be above threshold!") 61 alpha = 1. / (numpy.mean(vals) - thresh) 62 # sigma is measurement standard deviation = (-d^2 log L/d alpha^2)^(-1/2) 63 sigma_alpha = alpha / len(vals)**0.5 64 return alpha, sigma_alpha
65
66 -def fit_rayleigh(vals, thresh):
67 ''' 68 Maximum likelihood fit for the coefficient alpha for a distribution of 69 discrete values p(x) = alpha x exp(-alpha (x**2-x_t**2)/2) above a 70 threshold x_t. 71 72 vals: sequence of real numbers none of which lies below thresh 73 thresh: threshold used in the fitting formula 74 ''' 75 vals = numpy.array(vals) 76 if min(vals) < thresh: 77 raise RuntimeError("Values to be fit must all be above threshold!") 78 alpha = 2. / (numpy.mean(vals**2.) - thresh**2.) 79 # sigma is measurement standard deviation = (-d^2 log L/d alpha^2)^(-1/2) 80 sigma_alpha = alpha / len(vals)**0.5 81 return alpha, sigma_alpha
82
83 -def fit_power(vals, thresh):
84 ''' 85 Maximum likelihood fit for the coefficient alpha for a distribution of 86 discrete values p(x) = ((alpha-1)/x_t) (x/x_t)**-alpha above a threshold 87 x_t. 88 89 vals: sequence of real numbers none of which lies below thresh 90 thresh: threshold used in the fitting formula 91 ''' 92 vals = numpy.array(vals) 93 if min(vals) < thresh: 94 raise RuntimeError("Values to be fit must all be above threshold!") 95 alpha = numpy.mean(numpy.log(vals/thresh))**-1. + 1. 96 # sigma is measurement standard deviation = (-d^2 log L/d alpha^2)^(-1/2) 97 sigma_alpha = (alpha - 1.) / len(vals)**0.5 98 return alpha, sigma_alpha
99
100 -def expfit(xvals, alpha, thresh):
101 ''' 102 The fitted exponential function normalized to 1 above threshold 103 104 xvals: the values at which the fit PDF is to be evaluated 105 alpha: the fitted exponent factor 106 thresh: threshold value for the fitting process 107 108 To normalize to a given total count multiply by the count. 109 ''' 110 xvals = numpy.array(xvals) 111 fit = alpha * numpy.exp(-alpha * (xvals - thresh)) 112 # set fitted values below threshold to 0 113 numpy.putmask(fit, xvals < thresh, 0.) 114 return fit
115
116 -def expfit_cum(xvals, alpha, thresh):
117 ''' 118 The integral of the exponential fit above a given value (reverse CDF) 119 normalized to 1 above threshold 120 121 To normalize to a given total count multiply by the count. 122 ''' 123 xvals = numpy.array(xvals) 124 cum_fit = numpy.exp(-alpha * (xvals - thresh)) 125 # set fitted values below threshold to 0 126 numpy.putmask(cum_fit, xvals < thresh, 0.) 127 return cum_fit
128
129 -def rayleighfit(xvals, alpha, thresh):
130 ''' 131 The fitted Rayleigh function normalized to 1 above threshold 132 133 xvals: the values at which the fit PDF is to be evaluated 134 alpha: the fitted exponent factor 135 thresh: threshold value for the fitting process 136 137 To normalize to a given total count multiply by the count. 138 ''' 139 if thresh < 0: 140 raise RuntimeError('Threshold for Rayleigh distribution cannot be negative!') 141 xvals = numpy.array(xvals) 142 fit = alpha * xvals * numpy.exp(-alpha * (xvals**2. - thresh**2.) / 2.) 143 # set fitted values below threshold to 0 144 numpy.putmask(fit, xvals < thresh, 0.) 145 return fit
146
147 -def rayleighfit_cum(xvals, alpha, thresh):
148 ''' 149 The integral of the Rayleigh fit above the x-values given (reverse CDF) 150 normalized to 1 above threshold 151 152 To normalize to a given total count multiply by the count. 153 ''' 154 if thresh < 0: 155 raise RuntimeError('Threshold for Rayleigh distribution cannot be negative!') 156 xvals = numpy.array(xvals) 157 cum_fit = numpy.exp(-alpha * (xvals**2. - thresh**2.) / 2.) 158 # set fitted values below threshold to 0 159 numpy.putmask(cum_fit, xvals < thresh, 0.) 160 return cum_fit
161
162 -def powerfit(xvals, alpha, thresh):
163 ''' 164 The fitted power-law function normalized to 1 above threshold 165 166 xvals: the values at which the fit PDF is to be evaluated 167 alpha: the fitted power 168 thresh: threshold value for the fitting process 169 170 To normalize to a given total count multiply by the count. 171 ''' 172 xvals = numpy.array(xvals) 173 fit = (alpha - 1.) * xvals**(-alpha) * thresh**(alpha - 1.) 174 # set fitted values below threshold to 0 175 numpy.putmask(fit, xvals < thresh, 0.) 176 return fit
177
178 -def powerfit_cum(xvals, alpha, thresh):
179 ''' 180 The integral of the power-law fit above the x-values given (reverse CDF) 181 normalized to 1 above threshold 182 183 To normalize to a given total count multiply by the count. 184 ''' 185 xvals = numpy.array(xvals) 186 cum_fit = xvals**(1. - alpha) * thresh**(alpha - 1.) 187 # set fitted values below threshold to 0 188 numpy.putmask(cum_fit, xvals < thresh, 0.) 189 return cum_fit
190 191 fitdict = { 192 'exponential' : fit_exponential, 193 'rayleigh' : fit_rayleigh, 194 'power' : fit_power 195 } 196 197 fndict = { 198 'exponential' : expfit, 199 'rayleigh' : rayleighfit, 200 'power' : powerfit 201 } 202 203 cum_fndict = { 204 'exponential' : expfit_cum, 205 'rayleigh' : rayleighfit_cum, 206 'power' : powerfit_cum, 207 } 208
209 -def fit_above_thresh(distr, vals, thresh=None):
210 ''' 211 Maximum likelihood fit for the coefficient alpha for a distribution of 212 discrete values p(x) = alpha exp(-alpha*x) above a given threshold. 213 Values below threshold will be discarded. If no threshold is specified, 214 the minimum sample value will be used. 215 216 distr: name of distribution, either 'exponential' or 'rayleigh' or 'power' 217 218 vals: sequence of real numbers 219 220 thresh: threshold to apply before fitting - if thresh=None, use the lowest 221 value 222 ''' 223 vals = numpy.array(vals) 224 if thresh == None: 225 thresh = min(vals) 226 else: 227 vals = vals[vals >= thresh] 228 return fitdict[distr](vals, thresh)
229
230 -def tail_threshold(vals, N=1000):
231 ''' 232 Determine a threshold above which there are N louder values 233 ''' 234 vals = numpy.array(vals) 235 if len(vals) < N: 236 raise RuntimeError('Not enough input values to determine threshold') 237 vals.sort() 238 return min(vals[-N:])
239
240 -def fit_fn(distr, xvals, alpha, thresh):
241 ''' 242 The fitted function normalized to 1 above threshold 243 ''' 244 return fndict[distr](xvals, alpha, thresh)
245
246 -def cum_fit(distr, xvals, alpha, thresh):
247 ''' 248 The integral of the fitted function above a given value (reverse CDF) 249 normalized to 1 above threshold 250 ''' 251 return cum_fndict[distr](xvals, alpha, thresh)
252
253 -def KS_test(distr, vals, alpha, thresh=None):
254 ''' 255 Perform Kolmogorov-Smirnov test of the given set of discrete values 256 above a given threshold for the fitted distribution function 257 ex.: KS_test('exponential', vals, alpha, thresh) 258 If no threshold is specified, the minimum sample value will be used. 259 260 Returns the KS test statistic and its p-value: lower p means less 261 probable under the hypothesis of a perfect fit 262 ''' 263 vals = numpy.array(vals) 264 if thresh == None: 265 thresh = min(vals) 266 else: 267 vals = vals[vals >= thresh] 268 cdf_fn = lambda x: 1 - cum_fndict[distr](x, alpha, thresh) 269 return kstest(vals, cdf_fn)
270