1 import os
2 import sys
3 import numpy
4 import copy
5 import pickle
6 import tempfile
7
8 -def ROC(clean_ranks, glitch_ranks):
9 """
10 Calculates ROC curves based on the ranks assigned by a classifier to clean and glitch aux triggers.
11 """
12 clean_ranks_sorted = numpy.sort(clean_ranks)
13 glitch_ranks_sorted = numpy.sort(glitch_ranks)
14 number_of_false_alarms=[]
15 number_of_true_alarms=[]
16 FAP = []
17 Eff = []
18 for i,rank in enumerate(clean_ranks_sorted):
19 FAP.append(compute_FAP(clean_ranks_sorted, glitch_ranks_sorted, rank))
20 Eff.append(compute_Eff(clean_ranks_sorted, glitch_ranks_sorted, rank))
21
22 return numpy.asarray(FAP), numpy.asarray(Eff)
23
24
26 """
27 Compute false alarm probability for a given rank using array of ranks for clean and glitch samples.
28 Input arrays clean_ranks and glitch_ranks mst be sorted in ascending order.
29 """
30 number_of_false_alarms = len(clean_ranks) - numpy.searchsorted(clean_ranks,rank)
31 FAP = number_of_false_alarms / float(len(clean_ranks))
32 return FAP
33
35 """
36 Compute detection probability for a given rank using array of ranks for clean and glitch samples.
37 Input arrays clean_ranks and glitch_ranks mst be sorted in ascending order.
38 """
39 number_of_true_alarms = len(glitch_ranks) - numpy.searchsorted(glitch_ranks,rank)
40 Eff = number_of_true_alarms / float(len(glitch_ranks))
41 return Eff
42
43
45 """
46 Splits 2-d record array in N equal parts.
47 If the the number of elements in array is not divisible by Nparts, all but last sub arrays are equal.
48 It returns list of sub-arrays.
49 """
50
51 subarrays = []
52 n = int(len(array) / float(Nparts))
53
54 for i in range(Nparts):
55 if i == Nparts - 1:
56 subarrays.append(array[:][i*n:])
57 else:
58 subarrays.append(array[:][i*n:(i+1)*n])
59
60 return subarrays
61
63 """
64 Returns only clean samples from the set of random samples. By definition, a sample in unclean if there is a KW trigger with 0.1 seconds time window.
65 """
66
67 return Triggers[numpy.nonzero(Triggers['unclean'] == 0.0)[0],:]
68
70 """
71 Returns new recarray containing only those samples (triggers) that are in segments.
72 segments is a list of tuples of the form (t1,t2).
73 This function is not as efficient as it could be. It can be speed up if Triggers and segments are sorted.
74 But sorting would mix glitches and clean samples. We sacrifice performance for clarity ( but should keep in mind optional speed up).
75 """
76
77 out_of_segments_indices = []
78
79 for i in range(len(Triggers)):
80 time = Triggers[i]['GPS_s'] + Triggers[i]['GPS_ms']*10**(-3)
81 insegment = False
82 for seg in segments:
83 if (time >= seg[0]) and (time <= seg[1]):
84 insegment = True
85 if not insegment:
86 out_of_segments_indices.append(i)
87
88 return numpy.delete(Triggers,out_of_segments_indices,0)
89
90
92
93 if DQ_category == 'DQ2':
94 return Triggers[numpy.nonzero(Triggers['DQ2'] == 1.0)[0],:]
95 elif DQ_category == 'DQ3':
96 return Triggers[numpy.nonzero((Triggers['DQ2'] == 0.0) * (Triggers['DQ3'] == 1.0))[0],:]
97 elif DQ_category == 'DQ4':
98 return Triggers[numpy.nonzero((Triggers['DQ2'] == 0.0) * (Triggers['DQ3'] == 0.0) * (Triggers['DQ4'] == 1.0))[0],:]
99 elif DQ_category == 'DQ23':
100 return Triggers[numpy.nonzero((Triggers['DQ2'] == 1.0) + (Triggers['DQ3'] == 1.0))[0],:]
101 elif DQ_category == 'DQ234':
102 return Triggers[numpy.nonzero((Triggers['DQ2'] == 1.0) + (Triggers['DQ3'] == 1.0) + (Triggers['DQ4'] == 1.0))[0],:]
103 elif DQ_category == 'aDQ2':
104 return Triggers[numpy.nonzero(Triggers['DQ2'] == 0.0)[0],:]
105 elif DQ_category == 'aDQ23':
106 return Triggers[numpy.nonzero((Triggers['DQ2'] == 0.0) * (Triggers['DQ3'] == 0.0))[0],:]
107 elif DQ_category == 'aDQ234':
108 return Triggers[numpy.nonzero((Triggers['DQ2'] == 0.0) * (Triggers['DQ3'] == 0.0) * (Triggers['DQ4'] == 0.0))[0],:]
109 elif DQ_category == 'ALL':
110 return Triggers
111 else:
112 raise ValueError("Unknown DQ category")
113
115
116 """
117 Reads in KW auxiliary triggers from files. Triggers are storead in the 2-D array.
118 The rows of the array are labelled by the names of the variables, which are read off of the first line of the input file.
119 The columns are populated by the values of the corresponding variables.
120 Every line (except the first) of the input file(s) corresponds to a column (or a KW trigger) in the array.
121 """
122 for (i,f) in enumerate(files):
123 flines = open(f).readlines()
124 variables = flines[0].split()
125 formats = ['g8' for a in range(len(variables))]
126 if i > 0:
127 KWAuxTriggers = numpy.concatenate((KWAuxTriggers ,numpy.loadtxt(f,skiprows=1, dtype={'names': variables,'formats':formats})),axis=0)
128 else:
129 KWAuxTriggers = numpy.loadtxt(f,skiprows=1, dtype={'names': variables,'formats':formats})
130
131 return KWAuxTriggers
132
134 """
135 Shuffle segmented trigger packets. The trigger packets are segmented with each dT seconds usig G
136 PS time.
137 """
138 gps = KWAuxTriggers['GPS']
139 gps_start_time = int(numpy.min(gps))
140 gps_end_time = int(numpy.max(gps))+1
141 duration = (gps_end_time - gps_start_time)
142 n_minutes = int(duration / dT) + 1
143 start_times = gps_start_time + dT * numpy.arange(n_minutes)
144 numpy.random.shuffle(start_times)
145 end_times = start_times + dT
146
147 start_indexes = numpy.searchsorted(gps, start_times)
148 end_indexes = numpy.searchsorted(gps, end_times)
149 n_triggers = len(KWAuxTriggers)
150 ShuffledKWAuxTriggers = numpy.empty((n_triggers,), dtype=KWAuxTriggers.dtype)
151 current_start_index = 0
152 current_end_index = 0
153 for (start_index, end_index) in zip(start_indexes, end_indexes):
154 if start_index < end_index:
155 current_end_index = current_start_index + end_index - start_index
156 ShuffledKWAuxTriggers[:][current_start_index:current_end_index] = KWAuxTriggers[:][start_index:end_index]
157 current_start_index = current_end_index
158
159 return ShuffledKWAuxTriggers
160
162
163 """
164 Irrelevant channels and trigger parameters are excluded.
165 excludechannels is a list of irrelevant channel names to be excluded.
166 excludeparameters is a list of comma separated parameters. i.e. dur,freq,npts
167 """
168
169 KWAuxvariables = list(KWAuxTriggers.dtype.names)
170 filteredvariables = copy.copy(KWAuxvariables)
171
172
173 if not excludeparameters and excludechannels:
174 return KWAuxTriggers
175
176 if excludeparameters:
177 ExParams = excludeparameters.split(",")
178 for exp in ExParams:
179 for pvar in KWAuxvariables:
180 if exp.strip() in pvar.strip():
181 if pvar in filteredvariables:
182 filteredvariables.remove(pvar)
183
184 if excludechannels:
185 for exc in excludechannels:
186 for cvar in KWAuxvariables:
187 if cvar in filteredvariables:
188 if exc.strip() in cvar.strip():
189 filteredvariables.remove(cvar)
190
191
192
193
194
195 n_triggers = len(KWAuxTriggers)
196 formats = ['g8' for a in range(len(filteredvariables))]
197 FilteredKWAuxTriggers = numpy.empty((n_triggers,),dtype={'names':filteredvariables,'formats':formats})
198
199 for fvariable in filteredvariables:
200 FilteredKWAuxTriggers[fvariable] = KWAuxTriggers[fvariable]
201
202 return FilteredKWAuxTriggers
203
204
205
206
208
209 """
210 Converts KW auxiliary triggers into MVSC triggers.
211 KWAuxGlitchTriggers - KW triggers corresponding to glitches in DARM
212 KWAuxCleanTriggers - KW triggers correspondingto clean DARM data.
213 """
214 KWvariables = list(KWAuxGlitchTriggers.dtype.names)
215 if ExcludeVariables:
216 for variable in ExcludeVariables:
217 KWvariables.remove(variable)
218
219
220 if 'GPS' in KWvariables:
221 KWvariables.remove('GPS')
222
223 MVSCvariables = ['index', 'i', 'w', 'GPS_s', 'GPS_ms']+ KWvariables + ['glitch-rank']
224 formats = ['i','i','g8','i', 'i'] + ['g8' for a in range(len(MVSCvariables) - 5)]
225 n_triggers = len(KWAuxGlitchTriggers) + len(KWAuxCleanTriggers)
226
227 i_row = numpy.concatenate((numpy.ones(len(KWAuxGlitchTriggers)), numpy.zeros(len(KWAuxCleanTriggers))))
228 index_row = numpy.arange(1, n_triggers + 1)
229 w_row = numpy.ones(n_triggers)
230 glitch_rank_row = numpy.zeros(n_triggers)
231
232 MVSCTriggers = numpy.empty((n_triggers,), dtype={'names': MVSCvariables,'formats':formats})
233 MVSCTriggers['index'] = index_row
234 MVSCTriggers['i'] = i_row
235 MVSCTriggers['w'] = w_row
236 MVSCTriggers['glitch-rank'] = glitch_rank_row
237
238 MVSCTriggers['GPS_s'] = (numpy.concatenate((KWAuxGlitchTriggers['GPS'], KWAuxCleanTriggers['GPS'])) // 1.0).astype('int')
239 MVSCTriggers['GPS_ms'] = ((numpy.around((numpy.concatenate((KWAuxGlitchTriggers['GPS'], KWAuxCleanTriggers['GPS'])) % 1.0), decimals=3) * 10**3) // 1.0).astype('int')
240 for variable in MVSCvariables:
241 if not variable in ['index', 'i', 'w', 'glitch-rank', 'GPS_s', 'GPS_ms']:
242 MVSCTriggers[variable] = numpy.concatenate((KWAuxGlitchTriggers[variable], KWAuxCleanTriggers[variable]))
243
244
245 return MVSCTriggers
246
248
249 """
250 Write MVSC triggers to file.
251 If Classified = False, triggers are treated as unclassfied and saved in the input file for MVSC.
252 If Classified = True, triggers as saved in the same format as output of MVSC.
253 """
254 if not Classified:
255 Unclassified_variables = list(MVSCTriggers.dtype.names)
256 for var in ['index', 'i', 'w', 'glitch-rank']:
257 if var in Unclassified_variables: Unclassified_variables.remove(var)
258 Unclassified_variables.append('i')
259 formats = []
260 for var in Unclassified_variables:
261 if var in ['GPS_s', 'GPS_ms', 'i']:
262 formats.append('i')
263 else:
264 formats.append('g8')
265
266
267 Triggers = numpy.empty(MVSCTriggers.shape, dtype={'names': Unclassified_variables,'formats':formats})
268
269 for variable in Unclassified_variables:
270 Triggers[variable] = MVSCTriggers[variable]
271
272 else:
273 Triggers = MVSCTriggers
274
275
276 file = open(output_filename, "w")
277
278 if Classified:
279 first_line = " ".join(list(Triggers.dtype.names))
280 file.write(first_line + "\n")
281 else:
282 first_line = str(len(list(Triggers.dtype.names)[:-1]))
283 second_line = " ".join(list(Triggers.dtype.names)[:-1])
284 file.write(first_line + "\n")
285 file.write(second_line + "\n")
286
287
288 if len(Triggers.shape):
289 for i in range(len(Triggers)):
290 line = " ".join(["%0.3f" % (var) for var in Triggers[i]])
291 file.write(line + "\n")
292 file.close()
293 else:
294 line = " ".join(["%0.3f" % (Triggers[var]) for var in list(Triggers.dtype.names)])
295 file.write(line + "\n")
296 file.close()
297
298
299
301
302 """
303 Reads in MVSC triggers from files. MVSC triggers are storead in the 2-D array.
304 The rows of the array are labelled by the names of the variables, which are read off of the first(or second) line of the input file,
305 depending on whether the triggers were classifed or not.
306 The columns are populated by the values of the corresponding variables.
307 Every line (except the first 1 or 2 lines) of the input file(s) corresponds to a column (or a MVSC trigger) in the array.
308 """
309 if Classified == True:
310 varline = 0
311 nskiplines = 1
312 else:
313 varline = 1
314 nskiplines = 2
315 if len(files)==0:
316 print "Error: Empty input file list."
317 sys.exit(1)
318 flines = open(files[0]).readlines()
319 variables = flines[varline].split()
320 if Classified == False:
321 variables.append("i")
322 formats = []
323 for var in variables:
324 if var in ['GPS_s', 'GPS_ms', 'i', 'index']:
325 formats.append('i')
326 else:
327 formats.append('g8')
328
329 for file in files[1:]:
330 flines.extend(open(file).readlines()[nskiplines:])
331
332
333 tmpfile = tempfile.TemporaryFile()
334 tmpfile.writelines(flines)
335 tmpfile.seek(0)
336 if not (len(flines) == nskiplines):
337 trigs = numpy.loadtxt(tmpfile,skiprows=nskiplines, dtype={'names': variables,'formats':formats})
338 if not trigs.shape:
339 trigs = trigs.reshape((1,))
340 MVSCTriggers = trigs
341 else:
342 MVSCTriggers = numpy.empty((0,), dtype={'names': variables,'formats':formats})
343
344 tmpfile.close()
345 return MVSCTriggers
346
347
348
349
351
352 """
353 Reads in the pickled output of CV data (eg: kwl1-35.track.9.pickle) and inverts the data storage. Returns a list of gwtrg's removed by the Cveto method.
354 returned gwtrg's are labeled by tcent (and only tcent) and are associated with a given vconfig (vchan, vthr, vwin) and vstats (dsec, c_dsec, etc)
355 input arg:
356 filename: the file that is to be loaded. This must be a string
357 output arg:
358 gwtrg_vtd_tcent: a list of gwtrgs (labeled by tcent) with associated vconfig and vstats. the storage structure is: under each column: [tcent, [vconfig], [vstats]]
359 check below for exact formats of vconfig and vstats.
360 """
361
362
363 col_kw = {'tstart':0, 'tstop':1, 'tcent':2, 'fcent':3, 'uenergy':4, 'nenergy':5, 'npix':6, 'signif':7}
364
365
366 file = open(filename, 'r')
367 tfp = pickle.load(file)
368
369
370 h = dict([[tfp[0][i], i] for i in range(len(tfp[0]))])
371
372
373 c_livetime = tfp[1][h['livetime']]
374 c_ngwtrg = tfp[1][h['#gwtrg']]
375 ngwtrg_vtd = tfp[1][h['#gwtrg']] - tfp[len(tfp) - 1][h['#gwtrg']] + tfp[len(tfp) -1][h['vact']]
376
377
378 c_dsec = 0.0
379 c_csec = 0.0
380 c_vact = 0.0
381
382
383 gwtrg_vtd_tcent = []
384
385
386
387 for lineidx in range(len(tfp)):
388 line = tfp[lineidx]
389 if line[0] == 'livetime':
390 pass
391 else:
392
393 c_dsec += line[h['dsec']]
394 c_csec += line[h['csec']]
395 c_vact += line[h['vact']]
396 vconfig = [line[h['vchan']], line[h['vthr']], line[h['vwin']]]
397 vstats = [line[h['livetime']], line[h['#gwtrg']], line[h['dsec']], line[h['csec']], line[h['vact']], line[h['vsig']], c_livetime, c_ngwtrg, c_dsec, c_csec, c_vact, lineidx]
398 gwtrg_vtd = line[h['gwtrg_vetoed']]
399
400 if gwtrg_vtd[0] != 'NONE':
401 for trg in gwtrg_vtd:
402 gwtrg_vtd_tcent.append([trg[col_kw['tcent']], vconfig, vstats])
403
404
405
406 return sorted(gwtrg_vtd_tcent, key=lambda gwtrg: gwtrg[0])
407
409
410 """
411 converts a channel name into an integer (or vice versa) using channels (a file) as a dictionary.
412 channels should correspond to a file in which each line is a channel name, and the line number defines the int.
413 the idea is to convert between OVL channel integers and channel names quickly.
414 """
415
416 channels_file=open(channels, 'r')
417
418
419 if isinstance(chan, str):
420 chan_int=0
421 for line in channels_file:
422 if line == chan:
423 return chan_int
424 else:
425 chan_int+=1
426 return False
427
428
429 if isinstance(chan, (int,float)):
430 chan_int=int(chan)
431 if chan_int < len(channels_file) and chan_int >= 0:
432 return channels_file[chan_int]
433 else:
434 return False
435