pylal.KW_veto

1 #!/usr/bin/env python 2 # 3 # Copyright (C) 2009 Tomoki Isogai 4 # 5 # This program is free software; you can redistribute it and/or modify it 6 # under the terms of the GNU General Public License as published by the 7 # Free Software Foundation; either version 3 of the License, or (at your 8 # option) any later version. 9 # 10 # This program is distributed in the hope that it will be useful, but 11 # WITHOUT ANY WARRANTY; without even the implied warranty of 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General 13 # Public License for more details. 14 # 15 # You should have received a copy of the GNU General Public License along 16 # with this program; if not, write to the Free Software Foundation, Inc., 17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 18 19 """ 20 Tomoki Isogai (isogait@carleton.edu) 21 22 Utility functions for KW_veto codes 23 24 """ 25 26 from __future__ import division 27 28 import os 29 import sys 30 from math import floor, ceil 31 import cPickle 32 import glob 33 import shutil 34 import tempfile 35 36 import sqlite3 37 38 from glue.segments import segment, segmentlist 39 from glue import segmentsUtils 40 41 42 # ============================================================================= 43 # 44 # Database Utility 45 # 46 # ============================================================================= 47

48 -def get_connection_filename(filename, tmp_path = None, replace_file = False, verbose = False):

49 """ 50 copied from glue.ligolw.dbtables by Kipp Cannon 51 I can't directly import from glue because dbtables collides with other tables 52 53 Utility code for moving database files to a (presumably local) 54 working location for improved performance and reduced fileserver 55 load. 56 """ 57 def mktmp(path, verbose = False): 58 if not os.path.isdir(path): 59 os.makedirs(path) 60 fd, filename = tempfile.mkstemp(suffix = ".sqlite", dir = path) 61 os.close(fd) 62 if verbose: 63 print >>sys.stderr, "using '%s' as workspace" % filename 64 return filename

65 66 def truncate(filename, verbose = False): 67 if verbose: 68 print >>sys.stderr, "'%s' exists, truncating ..." % filename 69 try: 70 fd = os.open(filename, os.O_WRONLY | os.O_TRUNC) 71 except: 72 if verbose: 73 print >>sys.stderr, "cannot truncate '%s': %s" % (filename, str(e)) 74 return 75 os.close(fd) 76 if verbose: 77 print >>sys.stderr, "done." 78 79 def cpy(srcname, dstname, verbose = False): 80 if verbose: 81 print >>sys.stderr, "copying '%s' to '%s' ..." % (srcname, dstname) 82 shutil.copy(srcname, dstname) 83 84 database_exists = os.access(filename, os.F_OK) 85 86 if tmp_path is not None: 87 target = mktmp(tmp_path, verbose) 88 if database_exists: 89 if replace_file: 90 # truncate database so that if this job 91 # fails the user won't think the database 92 # file is valid 93 truncate(filename, verbose = verbose) 94 else: 95 # need to copy existing database to work 96 # space for modifications 97 i = 1 98 while True: 99 try: 100 cpy(filename, target, verbose) 101 except IOError, e: 102 import errno 103 import time 104 if e.errno == errno.ENOSPC: 105 if i < 5: 106 if verbose: 107 print >>sys.stderr, "Warning: attempt %d: no space left on device, sleeping and trying again ..." % i 108 time.sleep(10) 109 i += 1 110 continue 111 else: 112 if verbose: 113 print >>sys.stderr, "Warning: attempt %d: no space left on device: working with original file" % i 114 os.remove(target) 115 target = filename 116 else: 117 raise e 118 break 119 else: 120 target = filename 121 if database_exists and replace_file: 122 truncate(target, verbose = verbose) 123 124 del mktmp 125 del truncate 126 del cpy 127 128 return target 129 130 # ============================================================================= 131 # 132 # Reading and Writing Utility 133 # 134 # ============================================================================= 135

136 -def save_db(cursor, table_name, filename, working_filename, order_by = None, verbose=True):

137 """ 138 Originally written by Nickolas Fotopoulos, modified. 139 Convert a dictionary to SQLite database for later use and also save to 140 specified format file. 141 Supported file extensions are: 142 * .pickle - Python pickle file (dictionary serialized unchanged) 143 * .pickle.gz - gzipped pickle (dictionary serialized unchanged) 144 * .mat - Matlab v4 file (dictionary keys become variable names; requires 145 Scipy) 146 * .txt - ASCII text 147 * .txt.gz - gzipped ASCII text 148 * .bd - sqlite database 149 150 FIXME: add xml 151 """ 152 if verbose: 153 print >> sys.stderr, "saving data in %s..." % filename 154 155 if filename == '': 156 raise ValueError, "Empty filename" 157 158 ext = os.path.splitext(filename)[-1] 159 160 ## .db (SQLite) file (save all table no matter what specified) 161 if ext == '.db': 162 shutil.copy(working_filename,filename) 163 return 164 165 # FIXME: in theory it's suceptible to SQL injection attack 166 cmd = "select * from %s"%table_name 167 if order_by is not None: 168 cmd += " order by %s"%order_by 169 table = cursor.execute(cmd) 170 171 ## no save -> print out in stdout 172 if ext == '.stdout': 173 print "\n".join(["# column %d: "%(i+1)+c[0] for i, c in enumerate(cursor.description)]) 174 for row in table: 175 print "\t".join(map(str,row)) 176 return 177 178 ## Set up file_handle 179 file_handle = file(filename, 'wb') 180 181 # For gz files, bind file_handle to a gzip file and find the new extension 182 if ext == '.gz': 183 import gzip 184 file_handle = gzip.GzipFile(fileobj=file_handle, mode='wb') 185 ext = os.path.splitext(filename[:-len(ext)])[-1] 186 187 if ext == '.txt': 188 lines = [] 189 lines.append("\n".join(["# column %d: "%(i+1)+c[0] for i, c in enumerate(cursor.description)])+"\n") 190 for row in table: 191 lines.append("\t".join(map(str,row))) 192 file_handle.write("\n".join(lines)) 193 return 194 195 if ext == '.pickle' or '.mat': 196 # create dictionary whose keys correspond to columns 197 columns = [c[0] for c in cursor.description] 198 # initialize 199 data_dict = {} 200 for c in columns: 201 data_dict[c] = [] 202 # insert data 203 for row in table: 204 for c, r in zip(columns,row): 205 data_dict[c].append(r) 206 207 if ext == '.mat': 208 import scipy.io 209 scipy.io.savemat(filename, data_dict) 210 return 211 212 if ext == '.pickle': 213 import cPickle 214 output = cPickle.dumps(data_dict, protocol = 2) 215 file_handle.write(output) 216 return 217 218 else: 219 raise ValueError, "Unrecognized file extension"

220

221 -def load_db(dbFile, tmp_path, verbose):

222 """ 223 Connect to database. 224 Also, return a dictionary having variables from KW_veto_calc in a form: 225 {variable name: variable value} 226 """ 227 if verbose: 228 print >> sys.stderr, "connecting to database %s..."%dbFile 229 working_filename = get_connection_filename(\ 230 dbFile, tmp_path=tmp_path, verbose=verbose) 231 connection = sqlite3.connect(working_filename) 232 cursor = connection.cursor() 233 params = {} 234 for v in cursor.execute("select var_name, value from params").fetchall(): 235 params[v[0]] = v[1] 236 237 return cursor, connection, working_filename, params

238

239 -def get_candidate(cursor, critical_usedPer):

240 """ 241 This function returns a tuple: 242 (threshold, used percentage, # of coincident KW triggers above the threshold, # of total KW triggers above the threshold, # of vetoed GW triggers, veto efficiency, dead time, dead time percentage, (used percentage) / (random used percentage), (veto efficiency) / (dead time percentage)) that corresponds to threshold to be used for veto. 243 This returns just None for non candidate channel. 244 """ 245 return cursor.execute("select * from result where usedPercentage > ? order by threshold asc",(critical_usedPer,)).fetchone()

246 247

248 -def rename(src):

249 """ 250 If src alaready exists, this function rename it so that new file/directory 251 won't overwite 252 """ 253 index = 0; dst = os.path.normpath(src) 254 while os.path.exists(dst): 255 index += 1 256 dst = "%s.%d"%(os.path.normpath(src),index) 257 if index > 0: 258 print >> sys.stderr, "Warning: %s already exists, renaming the existing one to %s and continuing..."%(src,dst) 259 os.renames(src,dst)

260

261 -def load_segs_db(cur,tableName):

262 """ 263 Load segments in glue.segments.segmentlist format 264 FIXME: possibly SQL Injection attack 265 """ 266 seg_list = segmentlist([segment(s[0], s[1]) for s in\ 267 cur.execute("select start_time, end_time from %s"%tableName)]) 268 269 return seg_list.coalesce()

270

271 -def write_segs_db(cur,seg_list,tableName):

272 """ 273 Write glue.segments.segmentlist format segments into database. 274 FIXME: possibly SQL Injection attack 275 """ 276 seg_list.coalesce() 277 cur.execute("create table %s (start_time int, end_time int)"%tableName) 278 cur.executemany("insert into %s values (?,?)"%tableName,seg_list) 279 280 return cur

281

282 -def write_segs(segs,file_name):

283 """ 284 Take glue.segments.segmentlist and write in a ascii file in a form compatible 285 to the segment database. 286 """ 287 open(file_name,'w').write("\n".join(["%d %d"%(s[0],s[1]) for s in segs]))

288 289 # read segment file

290 -def read_segfile(segfile):

291 """ 292 Read segments in segwizard form with sanity check and return 293 glue.segments.segmentlist 294 """ 295 try: 296 seg_list =\ 297 segmentsUtils.fromsegwizard(open(segfile),coltype=float,strict=False) 298 except(IOError): 299 print >> sys.stderr, """ 300 Error: file contains no segments or glue.segmentsUtils.fromsegwizard is 301 not reading segments correctly. Please check the seg file. 302 (possibly comments in the file is causing this) 303 %s 304 """%segfile 305 raise 306 307 seg_list.coalesce() 308 if seg_list == [] or abs(seg_list) == 0: 309 print >> sys.stderr, """ 310 Warning: File contains no segments or glue.segmentsUtils.fromsegwizard is 311 not reading segments correctly. Please check the seg file. 312 (Possibly comments in the file is causing this.) 313 Attempting to ignore. 314 """ 315 return seg_list

316

317 -def read_segfile_xml(segfile,verbose):

318 """ 319 Read segment file in ligolw xml type and return in glue.segments.segmentlist 320 format. 321 """ 322 from glue.ligolw import ligolw 323 from glue.ligolw import table 324 from glue.ligolw import utils 325 326 def ContentHandler(xmldoc): 327 return ligolw.PartialLIGOLWContentHandler(xmldoc, lambda name, attrs:\ 328 (name == ligolw.Table.tagName) and\ 329 (table.Table.TableName(attrs["Name"]) in ["segment"]))

330 try: 331 table.use_in(ligolw.PartialLIGOLWContentHandler) 332 except AttributeError: 333 # old glue did not allow .use_in(). 334 # FIXME: remove when we can require the latest version of glue 335 pass 336 337 xmldoc = utils.load_url(segfile, verbose = verbose,gz = segfile.endswith(".gz"), contenthandler = ContentHandler) 338 seg_list = segmentlist() 339 for table_elem in xmldoc.getElements(lambda e:\ 340 (e.tagName == ligolw.Table.tagName)): 341 for row in table_elem: 342 seg_list.append(segment(row.start_time, row.end_time)) 343 xmldoc.unlink() 344 return seg_list 345

346 -def find_version_xml(segfile,seg,verbose):

347 """ 348 Find out the version of the flag for the given seg. 349 """ 350 from glue.ligolw import ligolw 351 from glue.ligolw import table 352 from glue.ligolw import utils 353 354 def ContentHandler(xmldoc): 355 return ligolw.PartialLIGOLWContentHandler(xmldoc, lambda name, attrs:\ 356 (name == ligolw.Table.tagName) and\ 357 (table.Table.TableName(attrs["Name"]) in ["segment_definer","segment_summary"]))

358 try: 359 table.use_in(ligolw.PartialLIGOLWContentHandler) 360 except AttributeError: 361 # old glue did not allow .use_in(). 362 # FIXME: remove when we can require the latest version of glue 363 pass 364 365 xmldoc = utils.load_url(segfile, verbose = verbose,gz = segfile.endswith(".gz"), contenthandler = ContentHandler) 366 for n, table_elem in enumerate(xmldoc.getElements(lambda e:\ 367 (e.tagName == ligolw.Table.tagName))): 368 if n == 0: 369 definer = {} 370 for row in table_elem: 371 if row.name != "RESULT": 372 definer[str(row.segment_def_id).split(":")[-1]] = row.version 373 374 if n == 1: 375 for row in table_elem: 376 if seg[0] >= row.start_time and seg[1] <= row.end_time: 377 if str(row.segment_def_id).split(":")[-1] in definer.keys(): 378 xmldoc.unlink() 379 return definer[str(row.segment_def_id).split(":")[-1]] 380 381

382 -def get_segment(eventTime, pos_window, neg_window):

383 """ 384 Given the time of trigger, this aplies +/- window and round to integers 385 in a way it broadens the window 386 """ 387 return segment(floor(eventTime - neg_window), ceil(eventTime + pos_window))

388

389 -def get_result_files(result_dir,verbose):

390 """ 391 Make a list of the result files from KW_veto_calc and check sanity. 392 """ 393 files_list = [os.path.join(result_dir,f) for f in os.listdir(result_dir) if f.endswith("-data.db")] 394 if verbose: 395 print >> sys.stderr, "result files:", files_list 396 if files_list==[]: # check if there is at least one file 397 print >> sys.stderr, "Error: no result files found in %s"%result_dir 398 sys.exit(1) 399 400 # check if they have the same name tag 401 if os.path.commonprefix(files_list)=="": 402 print >> sys.stderr, """ 403 Error: Possibly files from different runs are coexisting. 404 Please check the result_dir. 405 """ 406 sys.exit(1) 407 408 return files_list

409

410 -def get_files_from_globs(globs):

411 """ 412 Get individual file names from names with globs. 413 Input could be a mixture of glob and ',' seperated file list, something like 414 '../segs/*,H1_segs.txt' 415 also there might be white space between file names 416 """ 417 file_globs = [f.strip() for f in globs.split(",") if f.strip()!=""] 418 all_files = [] 419 for f in file_globs: 420 files = glob.glob(f) 421 if files == []: 422 print >> sys.stderr, "Error: %s not found"%f 423 sys.exit(1) 424 else: 425 all_files.extend(files) 426 427 return all_files

428

Source Code for Module pylal.KW_veto_utils