1 from __future__ import division
2
3
4
5
6
7
8
9
10
11
12
13
14
15 """
16 For some set of values above a threshold, e.g. trigger SNRs, the functions
17 in this module perform maximum likelihood fits with 1-sigma uncertainties
18 to various simple functional forms of PDF, all normalized to 1.
19 You can also obtain the fitted function and its (inverse) CDF and perform
20 a Kolmogorov-Smirnov test.
21
22 Usage:
23 # call the fit function directly if the threshold is known
24 alpha, sigma_alpha = fit_exponential(snrs, 5.5)
25
26 # apply a threshold explicitly
27 alpha, sigma_alpha = fit_above_thresh('exponential', snrs, thresh=6.25)
28
29 # let the code work out the threshold from the smallest value via the default thresh=None
30 alpha, sigma_alpha = fit_above_thresh('exponential', snrs)
31
32 # or only fit the largest N values, i.e. tail fitting
33 thresh = tail_threshold(snrs, N=500)
34 alpha, sigma_alpha = fit_above_thresh('exponential', snrs, thresh)
35
36 # obtain the fitted function directly
37 xvals = numpy.xrange(5.5, 10.5, 20)
38 exponential_fit = expfit(xvals, alpha, thresh)
39
40 # or access function by name
41 exponential_fit_1 = fit_fn('exponential', xvals, alpha, thresh)
42
43 # get the KS test statistic and p-value - see scipy.stats.kstest
44 ks_stat, ks_pval = KS_test('exponential', snrs, alpha, thresh)
45 """
46
47 import numpy
48 from scipy.stats import kstest
49
51 '''
52 Maximum likelihood fit for the coefficient alpha for a distribution of
53 discrete values p(x) = alpha exp(-alpha (x-x_t)) above a threshold x_t.
54
55 vals: sequence of real numbers none of which lies below thresh
56 thresh: threshold used in the fitting formula
57 '''
58 vals = numpy.array(vals)
59 if min(vals) < thresh:
60 raise RuntimeError("Values to be fit must all be above threshold!")
61 alpha = 1. / (numpy.mean(vals) - thresh)
62
63 sigma_alpha = alpha / len(vals)**0.5
64 return alpha, sigma_alpha
65
67 '''
68 Maximum likelihood fit for the coefficient alpha for a distribution of
69 discrete values p(x) = alpha x exp(-alpha (x**2-x_t**2)/2) above a
70 threshold x_t.
71
72 vals: sequence of real numbers none of which lies below thresh
73 thresh: threshold used in the fitting formula
74 '''
75 vals = numpy.array(vals)
76 if min(vals) < thresh:
77 raise RuntimeError("Values to be fit must all be above threshold!")
78 alpha = 2. / (numpy.mean(vals**2.) - thresh**2.)
79
80 sigma_alpha = alpha / len(vals)**0.5
81 return alpha, sigma_alpha
82
84 '''
85 Maximum likelihood fit for the coefficient alpha for a distribution of
86 discrete values p(x) = ((alpha-1)/x_t) (x/x_t)**-alpha above a threshold
87 x_t.
88
89 vals: sequence of real numbers none of which lies below thresh
90 thresh: threshold used in the fitting formula
91 '''
92 vals = numpy.array(vals)
93 if min(vals) < thresh:
94 raise RuntimeError("Values to be fit must all be above threshold!")
95 alpha = numpy.mean(numpy.log(vals/thresh))**-1. + 1.
96
97 sigma_alpha = (alpha - 1.) / len(vals)**0.5
98 return alpha, sigma_alpha
99
100 -def expfit(xvals, alpha, thresh):
101 '''
102 The fitted exponential function normalized to 1 above threshold
103
104 xvals: the values at which the fit PDF is to be evaluated
105 alpha: the fitted exponent factor
106 thresh: threshold value for the fitting process
107
108 To normalize to a given total count multiply by the count.
109 '''
110 xvals = numpy.array(xvals)
111 fit = alpha * numpy.exp(-alpha * (xvals - thresh))
112
113 numpy.putmask(fit, xvals < thresh, 0.)
114 return fit
115
117 '''
118 The integral of the exponential fit above a given value (reverse CDF)
119 normalized to 1 above threshold
120
121 To normalize to a given total count multiply by the count.
122 '''
123 xvals = numpy.array(xvals)
124 cum_fit = numpy.exp(-alpha * (xvals - thresh))
125
126 numpy.putmask(cum_fit, xvals < thresh, 0.)
127 return cum_fit
128
130 '''
131 The fitted Rayleigh function normalized to 1 above threshold
132
133 xvals: the values at which the fit PDF is to be evaluated
134 alpha: the fitted exponent factor
135 thresh: threshold value for the fitting process
136
137 To normalize to a given total count multiply by the count.
138 '''
139 if thresh < 0:
140 raise RuntimeError('Threshold for Rayleigh distribution cannot be negative!')
141 xvals = numpy.array(xvals)
142 fit = alpha * xvals * numpy.exp(-alpha * (xvals**2. - thresh**2.) / 2.)
143
144 numpy.putmask(fit, xvals < thresh, 0.)
145 return fit
146
148 '''
149 The integral of the Rayleigh fit above the x-values given (reverse CDF)
150 normalized to 1 above threshold
151
152 To normalize to a given total count multiply by the count.
153 '''
154 if thresh < 0:
155 raise RuntimeError('Threshold for Rayleigh distribution cannot be negative!')
156 xvals = numpy.array(xvals)
157 cum_fit = numpy.exp(-alpha * (xvals**2. - thresh**2.) / 2.)
158
159 numpy.putmask(cum_fit, xvals < thresh, 0.)
160 return cum_fit
161
163 '''
164 The fitted power-law function normalized to 1 above threshold
165
166 xvals: the values at which the fit PDF is to be evaluated
167 alpha: the fitted power
168 thresh: threshold value for the fitting process
169
170 To normalize to a given total count multiply by the count.
171 '''
172 xvals = numpy.array(xvals)
173 fit = (alpha - 1.) * xvals**(-alpha) * thresh**(alpha - 1.)
174
175 numpy.putmask(fit, xvals < thresh, 0.)
176 return fit
177
179 '''
180 The integral of the power-law fit above the x-values given (reverse CDF)
181 normalized to 1 above threshold
182
183 To normalize to a given total count multiply by the count.
184 '''
185 xvals = numpy.array(xvals)
186 cum_fit = xvals**(1. - alpha) * thresh**(alpha - 1.)
187
188 numpy.putmask(cum_fit, xvals < thresh, 0.)
189 return cum_fit
190
191 fitdict = {
192 'exponential' : fit_exponential,
193 'rayleigh' : fit_rayleigh,
194 'power' : fit_power
195 }
196
197 fndict = {
198 'exponential' : expfit,
199 'rayleigh' : rayleighfit,
200 'power' : powerfit
201 }
202
203 cum_fndict = {
204 'exponential' : expfit_cum,
205 'rayleigh' : rayleighfit_cum,
206 'power' : powerfit_cum,
207 }
208
210 '''
211 Maximum likelihood fit for the coefficient alpha for a distribution of
212 discrete values p(x) = alpha exp(-alpha*x) above a given threshold.
213 Values below threshold will be discarded. If no threshold is specified,
214 the minimum sample value will be used.
215
216 distr: name of distribution, either 'exponential' or 'rayleigh' or 'power'
217
218 vals: sequence of real numbers
219
220 thresh: threshold to apply before fitting - if thresh=None, use the lowest
221 value
222 '''
223 vals = numpy.array(vals)
224 if thresh == None:
225 thresh = min(vals)
226 else:
227 vals = vals[vals >= thresh]
228 return fitdict[distr](vals, thresh)
229
231 '''
232 Determine a threshold above which there are N louder values
233 '''
234 vals = numpy.array(vals)
235 if len(vals) < N:
236 raise RuntimeError('Not enough input values to determine threshold')
237 vals.sort()
238 return min(vals[-N:])
239
240 -def fit_fn(distr, xvals, alpha, thresh):
241 '''
242 The fitted function normalized to 1 above threshold
243 '''
244 return fndict[distr](xvals, alpha, thresh)
245
246 -def cum_fit(distr, xvals, alpha, thresh):
247 '''
248 The integral of the fitted function above a given value (reverse CDF)
249 normalized to 1 above threshold
250 '''
251 return cum_fndict[distr](xvals, alpha, thresh)
252
253 -def KS_test(distr, vals, alpha, thresh=None):
254 '''
255 Perform Kolmogorov-Smirnov test of the given set of discrete values
256 above a given threshold for the fitted distribution function
257 ex.: KS_test('exponential', vals, alpha, thresh)
258 If no threshold is specified, the minimum sample value will be used.
259
260 Returns the KS test statistic and its p-value: lower p means less
261 probable under the hypothesis of a perfect fit
262 '''
263 vals = numpy.array(vals)
264 if thresh == None:
265 thresh = min(vals)
266 else:
267 vals = vals[vals >= thresh]
268 cdf_fn = lambda x: 1 - cum_fndict[distr](x, alpha, thresh)
269 return kstest(vals, cdf_fn)
270