Package pylal :: Module trigger_fits
Source Code for Module pylal.trigger_fits

  1  from __future__ import division 
  2   
  3  # Copyright T. Dent 2015 (thomas.dent@aei.mpg.de) 
  4  # 
  5  # This program is free software; you can redistribute it and/or modify it 
  6  # under the terms of the GNU General Public License as published by the 
  7  # Free Software Foundation; either version 2 of the License, or (at your 
  8  # option) any later version. 
  9  # 
 10  # This program is distributed in the hope that it will be useful, but 
 11  # WITHOUT ANY WARRANTY; without even the implied warranty of 
 12  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General 
 13  # Public License for more details. 
 14   
 15  """ 
 16  For some set of values above a threshold, e.g. trigger SNRs, the functions 
 17  in this module perform maximum likelihood fits with 1-sigma uncertainties 
 18  to various simple functional forms of PDF, all normalized to 1. 
 19  You can also obtain the fitted function and its (inverse) CDF and perform 
 20  a Kolmogorov-Smirnov test. 
 21   
 22  Usage: 
 23  # call the fit function directly if the threshold is known 
 24  alpha, sigma_alpha = fit_exponential(snrs, 5.5) 
 25   
 26  # apply a threshold explicitly 
 27  alpha, sigma_alpha = fit_above_thresh('exponential', snrs, thresh=6.25) 
 28   
 29  # let the code work out the threshold from the smallest value via the default thresh=None 
 30  alpha, sigma_alpha = fit_above_thresh('exponential', snrs) 
 31   
 32  # or only fit the largest N values, i.e. tail fitting 
 33  thresh = tail_threshold(snrs, N=500) 
 34  alpha, sigma_alpha = fit_above_thresh('exponential', snrs, thresh) 
 35   
 36  # obtain the fitted function directly 
 37  xvals = numpy.xrange(5.5, 10.5, 20) 
 38  exponential_fit = expfit(xvals, alpha, thresh) 
 39   
 40  # or access function by name 
 41  exponential_fit_1 = fit_fn('exponential', xvals, alpha, thresh) 
 42   
 43  # get the KS test statistic and p-value - see scipy.stats.kstest 
 44  ks_stat, ks_pval = KS_test('exponential', snrs, alpha, thresh) 
 45  """ 
 46   
 47  import numpy 
 48  from scipy.stats import kstest 
 49   
 50 -def fit_exponential(vals, thresh): 
 51      ''' 
 52      Maximum likelihood fit for the coefficient alpha for a distribution of 
 53      discrete values p(x) = alpha exp(-alpha (x-x_t)) above a threshold x_t. 
 54   
 55      vals: sequence of real numbers none of which lies below thresh 
 56      thresh: threshold used in the fitting formula  
 57      ''' 
 58      vals = numpy.array(vals) 
 59      if min(vals) < thresh: 
 60          raise RuntimeError("Values to be fit must all be above threshold!") 
 61      alpha = 1. / (numpy.mean(vals) - thresh) 
 62      # sigma is measurement standard deviation = (-d^2 log L/d alpha^2)^(-1/2) 
 63      sigma_alpha = alpha / len(vals)**0.5 
 64      return alpha, sigma_alpha 
 65   
 66 -def fit_rayleigh(vals, thresh): 
 67      ''' 
 68      Maximum likelihood fit for the coefficient alpha for a distribution of 
 69      discrete values p(x) = alpha x exp(-alpha (x**2-x_t**2)/2) above a  
 70      threshold x_t. 
 71   
 72      vals: sequence of real numbers none of which lies below thresh 
 73      thresh: threshold used in the fitting formula 
 74      ''' 
 75      vals = numpy.array(vals) 
 76      if min(vals) < thresh: 
 77          raise RuntimeError("Values to be fit must all be above threshold!") 
 78      alpha = 2. / (numpy.mean(vals**2.) - thresh**2.) 
 79      # sigma is measurement standard deviation = (-d^2 log L/d alpha^2)^(-1/2) 
 80      sigma_alpha = alpha / len(vals)**0.5 
 81      return alpha, sigma_alpha 
 82   
 83 -def fit_power(vals, thresh): 
 84      ''' 
 85      Maximum likelihood fit for the coefficient alpha for a distribution of 
 86      discrete values p(x) = ((alpha-1)/x_t) (x/x_t)**-alpha above a threshold  
 87      x_t. 
 88   
 89      vals: sequence of real numbers none of which lies below thresh 
 90      thresh: threshold used in the fitting formula 
 91      ''' 
 92      vals = numpy.array(vals) 
 93      if min(vals) < thresh: 
 94          raise RuntimeError("Values to be fit must all be above threshold!") 
 95      alpha = numpy.mean(numpy.log(vals/thresh))**-1. + 1. 
 96      # sigma is measurement standard deviation = (-d^2 log L/d alpha^2)^(-1/2) 
 97      sigma_alpha = (alpha - 1.) / len(vals)**0.5 
 98      return alpha, sigma_alpha 
 99   
100 -def expfit(xvals, alpha, thresh): 
101      ''' 
102      The fitted exponential function normalized to 1 above threshold 
103   
104      xvals: the values at which the fit PDF is to be evaluated 
105      alpha: the fitted exponent factor 
106      thresh: threshold value for the fitting process  
107   
108      To normalize to a given total count multiply by the count. 
109      ''' 
110      xvals = numpy.array(xvals) 
111      fit = alpha * numpy.exp(-alpha * (xvals - thresh)) 
112      # set fitted values below threshold to 0 
113      numpy.putmask(fit, xvals < thresh, 0.) 
114      return fit 
115   
116 -def expfit_cum(xvals, alpha, thresh): 
117      ''' 
118      The integral of the exponential fit above a given value (reverse CDF) 
119      normalized to 1 above threshold 
120   
121      To normalize to a given total count multiply by the count. 
122      ''' 
123      xvals = numpy.array(xvals) 
124      cum_fit = numpy.exp(-alpha * (xvals - thresh)) 
125      # set fitted values below threshold to 0 
126      numpy.putmask(cum_fit, xvals < thresh, 0.) 
127      return cum_fit 
128   
129 -def rayleighfit(xvals, alpha, thresh): 
130      ''' 
131      The fitted Rayleigh function normalized to 1 above threshold 
132   
133      xvals: the values at which the fit PDF is to be evaluated 
134      alpha: the fitted exponent factor 
135      thresh: threshold value for the fitting process  
136   
137      To normalize to a given total count multiply by the count. 
138      ''' 
139      if thresh < 0: 
140          raise RuntimeError('Threshold for Rayleigh distribution cannot be negative!') 
141      xvals = numpy.array(xvals) 
142      fit = alpha * xvals * numpy.exp(-alpha * (xvals**2. - thresh**2.) / 2.) 
143      # set fitted values below threshold to 0 
144      numpy.putmask(fit, xvals < thresh, 0.) 
145      return fit 
146   
147 -def rayleighfit_cum(xvals, alpha, thresh): 
148      ''' 
149      The integral of the Rayleigh fit above the x-values given (reverse CDF) 
150      normalized to 1 above threshold 
151   
152      To normalize to a given total count multiply by the count. 
153      ''' 
154      if thresh < 0: 
155          raise RuntimeError('Threshold for Rayleigh distribution cannot be negative!') 
156      xvals = numpy.array(xvals) 
157      cum_fit = numpy.exp(-alpha * (xvals**2. - thresh**2.) / 2.) 
158      # set fitted values below threshold to 0 
159      numpy.putmask(cum_fit, xvals < thresh, 0.) 
160      return cum_fit 
161   
162 -def powerfit(xvals, alpha, thresh): 
163      ''' 
164      The fitted power-law function normalized to 1 above threshold 
165   
166      xvals: the values at which the fit PDF is to be evaluated 
167      alpha: the fitted power 
168      thresh: threshold value for the fitting process  
169   
170      To normalize to a given total count multiply by the count. 
171      ''' 
172      xvals = numpy.array(xvals) 
173      fit = (alpha - 1.) * xvals**(-alpha) * thresh**(alpha - 1.) 
174      # set fitted values below threshold to 0 
175      numpy.putmask(fit, xvals < thresh, 0.) 
176      return fit 
177   
178 -def powerfit_cum(xvals, alpha, thresh): 
179      ''' 
180      The integral of the power-law fit above the x-values given (reverse CDF) 
181      normalized to 1 above threshold 
182   
183      To normalize to a given total count multiply by the count. 
184      ''' 
185      xvals = numpy.array(xvals) 
186      cum_fit = xvals**(1. - alpha) * thresh**(alpha - 1.) 
187      # set fitted values below threshold to 0 
188      numpy.putmask(cum_fit, xvals < thresh, 0.) 
189      return cum_fit 
190   
191  fitdict = { 
192      'exponential' : fit_exponential, 
193      'rayleigh'    : fit_rayleigh, 
194      'power'       : fit_power 
195  } 
196   
197  fndict = { 
198      'exponential' : expfit, 
199      'rayleigh'    : rayleighfit, 
200      'power'       : powerfit 
201  } 
202   
203  cum_fndict = { 
204      'exponential' : expfit_cum, 
205      'rayleigh'    : rayleighfit_cum, 
206      'power'       : powerfit_cum, 
207  } 
208   
209 -def fit_above_thresh(distr, vals, thresh=None): 
210      ''' 
211      Maximum likelihood fit for the coefficient alpha for a distribution of 
212      discrete values p(x) = alpha exp(-alpha*x) above a given threshold. 
213      Values below threshold will be discarded.  If no threshold is specified,  
214      the minimum sample value will be used. 
215   
216      distr: name of distribution, either 'exponential' or 'rayleigh' or 'power' 
217   
218      vals: sequence of real numbers 
219   
220      thresh: threshold to apply before fitting - if thresh=None, use the lowest 
221      value 
222      ''' 
223      vals = numpy.array(vals) 
224      if thresh == None: 
225          thresh = min(vals) 
226      else: 
227          vals = vals[vals >= thresh] 
228      return fitdict[distr](vals, thresh) 
229   
230 -def tail_threshold(vals, N=1000): 
231      ''' 
232      Determine a threshold above which there are N louder values 
233      ''' 
234      vals = numpy.array(vals) 
235      if len(vals) < N: 
236          raise RuntimeError('Not enough input values to determine threshold') 
237      vals.sort() 
238      return min(vals[-N:]) 
239   
240 -def fit_fn(distr, xvals, alpha, thresh): 
241      ''' 
242      The fitted function normalized to 1 above threshold 
243      ''' 
244      return fndict[distr](xvals, alpha, thresh) 
245   
246 -def cum_fit(distr, xvals, alpha, thresh): 
247      ''' 
248      The integral of the fitted function above a given value (reverse CDF) 
249      normalized to 1 above threshold 
250      ''' 
251      return cum_fndict[distr](xvals, alpha, thresh) 
252   
253 -def KS_test(distr, vals, alpha, thresh=None): 
254      ''' 
255      Perform Kolmogorov-Smirnov test of the given set of discrete values  
256      above a given threshold for the fitted distribution function 
257      ex.: KS_test('exponential', vals, alpha, thresh) 
258      If no threshold is specified, the minimum sample value will be used. 
259   
260      Returns the KS test statistic and its p-value: lower p means less 
261      probable under the hypothesis of a perfect fit 
262      ''' 
263      vals = numpy.array(vals) 
264      if thresh == None: 
265          thresh = min(vals) 
266      else: 
267          vals = vals[vals >= thresh] 
268      cdf_fn = lambda x: 1 - cum_fndict[distr](x, alpha, thresh) 
269      return kstest(vals, cdf_fn) 
270