1   
  2   
  3   
  4   
  5   
  6   
  7   
  8   
  9   
 10   
 11   
 12   
 13   
 14   
 15   
 16   
 17   
 18  """ 
 19  Auxiliary functions for running and postprocessing HACR pipeline code. 
 20  """ 
 21   
 22  from __future__ import division 
 23   
 24  import MySQLdb 
 25  import sys 
 26  import re 
 27  import datetime 
 28   
 29  from lal import LIGOTimeGPS, GPSToUTC 
 30   
 31  from pylal import git_version 
 32   
 33  from glue.ligolw import lsctables 
 34   
 35  __author__  = 'Duncan Macleod <duncan.macleod@ligo.org>' 
 36  __version__ = git_version.id 
 37  __date__    = git_version.date 
 38   
 39   
 40   
 41   
 42   
 43  _comment = re.compile('[#%]') 
 44  _delim   = re.compile('[\t\,\s]+') 
 45   
 46 -def trigger(line, columns=lsctables.SnglBurst.__slots__): 
  47      """ 
 48      Convert a line from an Omega-format ASCII file into a SnglBurst object. 
 49      """ 
 50   
 51      if isinstance(line, str): 
 52          dat = map(float, _delim.split(line.rstrip())) 
 53      else: 
 54          dat = map(float, line) 
 55   
 56      if len(dat)==8: 
 57          peak, peak_offset, freq, band, duration, n_pix, snr, totPower = dat 
 58          peak = LIGOTimeGPS(peak+peak_offset) 
 59          start    = LIGOTimeGPS(peak-duration/2) 
 60          stop     = LIGOTimeGPS(peak+duration/2) 
 61          av_freq  = freq 
 62          av_band  = band 
 63          err_freq = 0 
 64      else: 
 65          raise ValueError("Wrong number of columns in ASCII line. Cannot read.") 
 66   
 67       
 68      t = lsctables.SnglBurst() 
 69   
 70       
 71      if 'start_time' in columns:     t.start_time    = start.gpsSeconds 
 72      if 'start_time_ns' in columns:  t.start_time_ns = start.gpsNanoSeconds 
 73      if 'peak_time' in columns:      t.peak_time     = peak.gpsSeconds 
 74      if 'peak_time_ns' in columns:   t.peak_time_ns  = peak.gpsNanoSeconds 
 75      if 'stop_time' in columns:      t.stop_time     = stop.gpsSeconds 
 76      if 'stop_time_ns' in columns:   t.stop_time_ns  = stop.gpsNanoSeconds 
 77   
 78       
 79      if 'ms_start_time' in columns:     t.ms_start_time    = start.gpsSeconds 
 80      if 'ms_start_time_ns' in columns:  t.ms_start_time_ns = start.gpsNanoSeconds 
 81      if 'ms_stop_time' in columns:      t.ms_stop_time     = stop.gpsSeconds 
 82      if 'ms_stop_time_ns' in columns:   t.ms_stop_time_ns  = stop.gpsNanoSeconds 
 83   
 84       
 85      if 'duration' in columns:     t.duration    = duration 
 86      if 'ms_duration' in columns:  t.ms_duration = duration 
 87   
 88       
 89      if 'central_freq' in columns:         t.central_freq         = freq 
 90      if 'peak_frequency' in columns:       t.peak_frequency       = av_freq 
 91      if 'peak_frequency_eror' in columns:  t.peak_frequency_error = err_freq 
 92      if 'bandwidth' in columns:            t.bandwidth            = av_band 
 93      if 'flow' in columns:                 t.flow                 = freq-band/2 
 94      if 'fhigh' in columns:                t.fhigh                = freq+band/2 
 95   
 96       
 97      if 'ms_bandwidth' in columns: t.ms_bandwidth = band 
 98      if 'ms_flow' in columns:      t.ms_flow      = freq-band/2 
 99      if 'ms_fhigh' in columns:     t.ms_fhigh     = freq+band/2 
100   
101       
102      if 'snr' in columns:       t.snr       = snr 
103      if 'ms_snr' in columns:    t.ms_snr    = snr 
104       
105   
106       
107      if 'numPixels' in columns or 'param_two_value' in columns: 
108          t.param_two_name   = 'numPixels' 
109          t.param_two_value  = n_pix 
110      if 'totPower' in columns or 'param_three_value' in columns: 
111          t.param_three_name = 'cluster_number' 
112          t.param_two_name   = totPower 
113   
114      return t 
 115   
116   
117   
118   
119   
120 -def find_databases(host, user="reader", passwd="readonly", match=None): 
 121      """ 
122      Query for all databases on the given host 
123      """ 
124      connection = MySQLdb.connect(host=host, user=user, passwd=passwd) 
125      cursor = connection.cursor() 
126      cursor.execute("SHOW DATABASES") 
127      out = cursor.fetchall() 
128      connection.close() 
129      databases = [db[0] for db in out] 
130      if isinstance(match, str): 
131          match = re.compile(match)  
132      if match: 
133          databases = [db for db in databases if match.search(db)] 
134      return databases 
 135   
136 -def find_geo_databases(start_time, end_time, host, user="reader",\ 
137                         passwd="readonly"): 
 138      """ 
139      Query for all databases named geoYYYYMM whose year and month match the given 
140      [start_time, end_time) interval. 
141      """ 
142       
143      match = "\Ageo\d\d\d\d\d\d\Z" 
144      alldbs = find_databases(host, user=user, passwd=passwd, match=match) 
145   
146       
147      start_date = datetime.date(*GPSToUTC(int(start_time))[:3]) 
148      end_date   = datetime.date(*GPSToUTC(int(end_time))[:3]) 
149   
150       
151      databases = [] 
152      d  = start_date 
153      dt = datetime.timedelta(days=28) 
154      while d<=end_date: 
155          db = 'geo%s' % d.strftime("%Y%m") 
156          if db not in databases: 
157              databases.append(db) 
158          d += dt 
159   
160      return databases 
 161   
162   
163   
164   
165   
166 -def find_channels(host, database=None, user="reader", passwd="readonly",\ 
167                    match=None): 
 168      """ 
169      Query for all channels in the given HACR database on the given host 
170      """ 
171      if not database: 
172          database = find_databases(host, user=user, passwd=passwd)[-1] 
173      connection = MySQLdb.connect(host=host, user=user, passwd=passwd,\ 
174                                   db=database) 
175      cursor = connection.cursor() 
176      query = "select channel from job where monitorName = 'chacr'" 
177      cursor.execute(query) 
178      out = cursor.fetchall() 
179      connection.close() 
180      channels = [ch[0] for ch in out] 
181      if isinstance(match, str):  
182          match = re.compile(match) 
183      if match: 
184          channels = [ch for ch in channels if match.search(ch)] 
185      return channels 
 186   
187   
188   
189   
190   
191 -def get_triggers(start_time, end_time, channel, host, columns=None, pid=None,\ 
192                   user="reader", passwd="readonly", verbose=False): 
 193      """ 
194      Query the given HACR MySQL host for HACR triggers in a given 
195      [start_time, stop_time) semi-open interval. Returns a LIGOLw SnglBurstTable. 
196      """ 
197      ifo = channel[0:2] 
198      if ifo != "G1": 
199          raise NotImplementedError("Access to non-GEO databases hasn't been "+\ 
200                                    "implemented yet.") 
201   
202       
203       
204       
205   
206      out = lsctables.New(lsctables.SnglBurstTable, columns=columns) 
207      append = out.append 
208   
209       
210      if columns: 
211           
212          if isinstance(columns[0], str): 
213              columns = map(str.lower, columns) 
214          if isinstance(columns[0], unicode): 
215              columns = map(unicode.lower, columns) 
216           
217          for c in out.columnnames: 
218              if c.lower() not in columns: 
219                  idx = out.columnnames.index(c) 
220                  out.columnnames.pop(idx) 
221                  out.columntypes.pop(idx) 
222      else: 
223          columns = lsctables.SnglBurst.__slots__ 
224       
225       
226      databases  = find_geo_databases(start_time, end_time, host, user=user,\ 
227                                      passwd=passwd) 
228   
229       
230       
231       
232   
233      connection = MySQLdb.connect(host=host, user=user, passwd=passwd) 
234      cursor     = connection.cursor() 
235   
236       
237       
238       
239   
240      for db in databases: 
241          cursor.execute("use %s" % db) 
242   
243           
244          query = "select process_id, channel from job where monitorName = "+\ 
245                  "'chacr' and channel = '%s'" % channel 
246          numchan = int(cursor.execute(query)) 
247          result  = cursor.fetchall() 
248          if numchan == 0: 
249              raise AttributeError("%s not found in database %s.\n"\ 
250                                   % (channel, db)) 
251          elif numchan > 1: 
252              sys.stderr.write("warning: Multiple process_ids found for "+\ 
253                               "channel %s in monitor chacr. Triggers from all "\ 
254                               "processes will be amalgamated.\n" % channel) 
255   
256           
257          process_ids = [int(process[0]) for process in result\ 
258                         if pid==None or int(process[0]) == pid] 
259   
260           
261          for pid in process_ids: 
262              query = "select gps_start, gps_offset, freq_central, bandwidth, "\ 
263                      "duration, num_pixels, snr, totPower from mhacr where "\ 
264                      "process_id = %s AND gps_start >= %s AND gps_start < %s "\ 
265                      "order by gps_start asc" % (pid, start_time, end_time) 
266              ntrigs = cursor.execute(query) 
267              result = cursor.fetchall() 
268              if ntrigs: 
269                  for row in result: 
270                      append(trigger(row, columns=columns)) 
271   
272      connection.close() 
273      return out 
 274