Package pylal :: Module hdf5utils
[hide private]
[frames] | no frames]

Source Code for Module pylal.hdf5utils

  1  # Copyright (C) 2012 Duncan M. Macleod 
  2  #  
  3  # This program is free software; you can redistribute it and/or modify it 
  4  # under the terms of the GNU General Public License as published by the 
  5  # Free Software Foundation; either version 3 of the License, or (at your 
  6  # option) any later version. 
  7  #    
  8  # This program is distributed in the hope that it will be useful, but 
  9  # WITHOUT ANY WARRANTY; without even the implied warranty of 
 10  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General 
 11  # Public License for more details. 
 12  #  
 13  # You should have received a copy of the GNU General Public License along 
 14  # with this program; if not, write to the Free Software Foundation, Inc., 
 15  # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA. 
 16   
 17  """ 
 18  This module provides a bunch of user-friendly wrappers to the HDF5 
 19  data format, for reading and writing SWIG-bound TimeSeries and 
 20  FrequencySeries objects from LAL. Issues should be raised on redmine: 
 21  https://bugs.ligo.org/redmine/projects/lalsuite 
 22  """ 
 23   
 24  # ============================================================================= 
 25  # Preamble 
 26  # ============================================================================= 
 27   
 28  from __future__ import division 
 29  import numpy 
 30  import h5py 
 31  import lal 
 32   
 33  from pylal import seriesutils 
 34  from pylal import git_version 
 35   
 36  # set metadata 
 37  __author__ = "Duncan M. Macleod <duncan.macleod@ligo.org>" 
 38  __version__ = git_version.id 
 39  __date__ = git_version.date 
 40   
 41  # ============================================================================= 
 42  # Useful global variables 
 43  # ============================================================================= 
 44   
 45  # type code dict 
 46  _numpy_lal_typemap = {numpy.int16: lal.LAL_I2_TYPE_CODE,\ 
 47                        numpy.int32: lal.LAL_I4_TYPE_CODE,\ 
 48                        numpy.int64: lal.LAL_I8_TYPE_CODE,\ 
 49                        numpy.uint16: lal.LAL_U2_TYPE_CODE,\ 
 50                        numpy.uint32: lal.LAL_U4_TYPE_CODE,\ 
 51                        numpy.uint64: lal.LAL_U8_TYPE_CODE,\ 
 52                        numpy.float32: lal.LAL_S_TYPE_CODE,\ 
 53                        numpy.float64: lal.LAL_D_TYPE_CODE,\ 
 54                        numpy.complex64: lal.LAL_C_TYPE_CODE,\ 
 55                        numpy.complex128: lal.LAL_Z_TYPE_CODE} 
 56  _lal_numpy_typemap = dict((v,k) for k, v in _numpy_lal_typemap.items()) 
 57   
 58  # ============================================================================= 
 59  # Data reading 
 60  # ============================================================================= 
 61   
62 -def readTimeSeries(h5file, name, group="", start=None, duration=None,\ 63 datatype=None):
64 """ 65 Read a 1-D array from an HDF5 file into a LAL TimeSeries. 66 67 Returns SWIG-bound LAL TimeSeries. 68 69 Arguments: 70 71 h5file : [ h5py.File | str ] 72 open HDF5 file object, or path to HDF5 file on disk. 73 name : str 74 name of data object in HDF5 file relative in it's group. 75 76 Keyword arguments: 77 78 group : str 79 name of HDF5 Group containing data object required. 80 start : LIGOTimeGPS 81 GPS start time of data requested, defaults to first data point. 82 duration : float 83 length of data (seconds) requested, default to all data. 84 datatype : int 85 LAL typecode for output datatype, defaults to type of data found. 86 """ 87 own = False 88 if not isinstance(h5file, h5py.File): 89 h5file = h5py.File(h5file, "r") 90 own = True 91 92 # read data 93 dataset, metadata = readArray(h5file, name, group=group) 94 if own: 95 h5file.close() 96 97 # parse metadata 98 epoch = lal.LIGOTimeGPS(metadata.pop("epoch", 0)) 99 f0 = float(metadata.pop("f0", 0)) 100 deltaT = float(metadata.pop("dx", 0)) 101 sampleUnits = lal.lalDimensionlessUnit # FIXME 102 103 # get series type 104 if datatype is None: 105 datatype = _numpy_lal_typemap[dataset.dtype.type] 106 TYPECODE = seriesutils._typestr[datatype] 107 numpytype = _lal_numpy_typemap[datatype] 108 109 # cut data to size 110 if duration is None: 111 length = dataset.size 112 else: 113 length = int(duration / deltaT) 114 if start is None: 115 startidx = 0 116 else: 117 start = float(epoch) + (float(start)-float(epoch))//deltaT*deltaT 118 if start < epoch: 119 start = epoch 120 startidx = int(max(0, (float(start)-float(epoch))/deltaT)) 121 epoch = lal.LIGOTimeGPS(start) 122 endidx = min(dataset.size, startidx + length) 123 length = endidx-startidx 124 125 # build series 126 createTimeSeries = getattr(lal, "Create%sTimeSeries" % TYPECODE) 127 series = createTimeSeries(name, epoch, f0, deltaT, sampleUnits,\ 128 length) 129 series.data.data = dataset[startidx:endidx].astype(numpytype) 130 131 return series
132
133 -def readFrequencySeries(h5file, name, group=None, fmin=None, fmax=None,\ 134 datatype=None):
135 """ 136 Read FrequencySeries object from HDF5 file. 137 138 Returns SWIG-bound LAL FrequencySeries. 139 140 Arguments: 141 142 h5file : [ h5py.File | str ] 143 open HDF5 file object, or path to HDF5 file on disk. 144 name : str 145 name of data object in HDF5 file relative in it's group. 146 147 Keyword arguments: 148 149 group : str 150 name of HDF5 Group containing data object required. 151 fmin : float 152 lower frequency bound on data returned 153 fmax : float 154 upper frequency bound on data returned 155 datatype : int 156 LAL typecode for output datatype, defaults to type of data found. 157 """ 158 own = False 159 if not isinstance(h5file, h5py.File): 160 h5file = h5py.File(h5file, "r") 161 own = True 162 163 # read data 164 dataset, metadata = readArray(h5file, name, group=group) 165 if own: 166 h5file.close() 167 168 # parse metadata 169 epoch = lal.LIGOTimeGPS(metadata.pop("epoch", 0)) 170 f0 = float(metadata.pop("f0", 0)) 171 deltaF = float(metadata.pop("dx", 0)) 172 sampleUnits = lal.lalDimensionlessUnit # FIXME 173 174 # get series type 175 if not datatype: 176 datatype = _numpy_lal_typemap[dataset.dtype.type] 177 TYPECODE = seriesutils._typestr[datatype] 178 numpytype = _lal_numpy_typemap[datatype] 179 180 # cut data to size 181 if fmin is None: 182 fmin = f0 183 else: 184 fmin = f0 + (fmin-f0)//deltaF*deltaF 185 startidx = int(max(0, (fmin-f0)/deltaF)) 186 if fmax is None: 187 fmax = f0 + dataset.size * deltaF 188 else: 189 fmax = f0 + (fmax-f0)//deltaF*deltaF 190 endidx = int(min(dataset.size, (fmax-fmin)/deltaF)) 191 length = endidx-startidx 192 193 # build series 194 createFrequencySeries = getattr(lal, "Create%sFrequencySeries" % TYPECODE) 195 series = createFrequencySeries(name, epoch, f0, deltaF, sampleUnits,\ 196 length) 197 series.data.data = dataset[startidx:endidx].astype(numpytype) 198 199 return series
200
201 -def readVectorSequence(h5file, name, group=None, start=None, duration=None,\ 202 fmin=None, fmax=None, datatype=None):
203 """ 204 Read VectorSequence object from HDF5 file. 205 206 Returns SWIG-bound LAL VectorSequence, LIGOTimeGPS epoch, float deltaT, 207 float f0, float deltaF. 208 209 Arguments: 210 211 h5file : [ h5py.File | str ] 212 open HDF5 file object, or path to HDF5 file on disk. 213 name : str 214 name of data object in HDF5 file relative in it's group. 215 216 Keyword arguments: 217 218 group : str 219 name of HDF5 Group containing data object required. 220 start : LIGOTimeGPS 221 GPS start time of data requested, defaults to first data point. 222 duration : float 223 length of data (seconds) requested, default to all data. 224 fmin : float 225 lower frequency bound on data returned 226 fmax : float 227 upper frequency bound on data returned 228 datatype : int 229 LAL typecode for output datatype, defaults to type of data found. 230 """ 231 own = False 232 if not isinstance(h5file, h5py.File): 233 h5file = h5py.File(h5file, "r") 234 own = True 235 236 # read data 237 dataset, metadata = readArray(h5file, name, group=group) 238 if own: 239 h5file.close() 240 241 # parse metadata 242 epoch = lal.LIGOTimeGPS(metadata.pop("epoch", 0)) 243 deltaT = float(metadata.pop("dx", 0)) 244 f0 = float(metadata.pop("f0", 0)) 245 deltaF = float(metadata.pop("dy", 0)) 246 247 # get series type 248 if not datatype: 249 datatype = _numpy_lal_typemap[dataset.dtype.type] 250 TYPECODE = seriesutils._typestr[datatype] 251 numpytype = _lal_numpy_typemap[datatype] 252 253 # cut data to size 254 if duration is None: 255 length = dataset.shape[0] 256 else: 257 length = int(duration / deltaT) 258 if start is None: 259 x0 = 0 260 else: 261 start = float(epoch) + (float(start)-float(epoch))//deltaT*deltaT 262 if start < epoch: 263 start = epoch 264 x0 = int(max(0, (float(start)-float(epoch))/deltaT)) 265 epoch = lal.LIGOTimeGPS(start) 266 length = min(dataset.size, x0 + length) - x0 267 if fmin is None: 268 fmin = f0 269 else: 270 fmin = f0 + (fmin-f0)//deltaF*deltaF 271 y0 = int(max(0, (fmin-f0)/deltaF)) 272 if fmax is None: 273 fmax = f0 + dataset.shape[1] * deltaF 274 else: 275 fmax = f0 + ((fmax-f0)//deltaF)*deltaF 276 vectorLength = int(min(dataset.size, (fmax-fmin)/deltaF)) - y0 277 278 # build series 279 createVectorSequence = getattr(lal, "Create%sVectorSequence" % TYPECODE) 280 sequence = createVectorSequence(length, vectorLength) 281 282 for i,x in enumerate(numpy.arange(length)+x0): 283 # get correct array 284 sequence.data[i] = dataset[x,:][y0:y0+vectorLength].astype(numpytype) 285 286 return sequence, epoch, deltaT, f0, deltaF 287 288 series.data.data = dataset[startidx:endidx].astype(numpytype) 289 290 return series
291
292 -def readArray(h5file, name, group=None):
293 """ 294 Read numpy.ndarray from h5py.File object h5file. 295 296 Returns numpy.ndarray and metadata dict. 297 298 Arguments: 299 300 h5file : [ h5py.File | str ] 301 open HDF5 file object, or path to HDF5 file on disk. 302 name : str 303 name of data object in HDF5 file relative in it's group. 304 305 Keyword arguments: 306 307 group : str 308 name of HDF5 Group containing data object required. 309 """ 310 # format key 311 if group.endswith(name): 312 group = group[:len(name)] 313 key = "%s/%s" % (group, name) 314 315 # get dataset 316 dataset = h5file[key] 317 318 return dataset[...], dict(dataset.attrs)
319 320 # ============================================================================= 321 # Data writing 322 # ============================================================================= 323
324 -def writeTimeSeries(h5file, series, group=None, **kwargs):
325 """ 326 Write TimeSeries object to the given h5py.File object h5file. 327 328 No return. 329 330 Arguments: 331 332 h5file : h5py.File 333 open HDF5 file object 334 series : TimeSeries 335 SWIG-bound LAL TimeSeries object 336 337 Keyword arguments: 338 339 group : str 340 name of HDF5 Group to write to, group is generated if required. 341 342 All other keyword arguments are passed to 343 h5py.highlevel.Group.create_dataset. 344 """ 345 h5group = _create_groups(h5file, group) 346 seriesToDataset(h5group, series, **kwargs) 347 return
348
349 -def writeFrequencySeries(h5file, series, group=None, **kwargs):
350 """ 351 Write FrequencySeries object to the given h5py.File object h5file. 352 353 No return. 354 355 Arguments: 356 357 h5file : h5py.File 358 open HDF5 file object 359 series : FrequencySeries 360 SWIG-bound LAL FrequencySeries object 361 362 Keyword arguments: 363 364 group : str 365 name of HDF5 Group to write to, group is generated if required. 366 367 All other keyword arguments are passed to 368 h5py.highlevel.Group.create_dataset. 369 """ 370 h5group = _create_groups(h5file, group) 371 seriesToDataset(h5group, series, **kwargs) 372 return
373
374 -def seriesToDataset(h5group, series, **kwargs):
375 """ 376 Write {Time,Frequency}Series object to a new data set in the 377 given h5py.Group object h5group. 378 379 Returns h5py.highlevel.Dataset. 380 381 Arguments: 382 383 h5group : h5py.highlevel.Group 384 Group object inside an HDF5 file 385 series : [ TimeSeries | FrequencySeries ] 386 LAL Time or FrequencySeries object 387 388 All kwargs are passed to h5py.highlevel.Group.create_dataset. 389 """ 390 # extract metadata 391 metadata = dict() 392 metadata["name"] = str(series.name) 393 metadata["epoch"] = float(series.epoch) 394 metadata["f0"] = float(series.f0) 395 metadata["dx"] = float(hasattr(series, "deltaT") and series.deltaT\ 396 or series.deltaF) 397 # FIXME metadata["sampleUnits"] = series.sampleUnits 398 399 # format data 400 data = series.data.data 401 402 # create dataset 403 h5dataset = arrayToDataset(h5group, metadata["name"], data, metadata,\ 404 **kwargs) 405 406 return h5dataset
407
408 -def arrayToDataset(h5group, name, array, metadata={}, **kwargs):
409 """ 410 Write the given numpy array object to a new data set in the given 411 h5py.highlevel.Group object h5group, including each element in the 412 metadata dict. 413 414 Returns h5py.highlevel.Dataset. 415 416 Arguments: 417 418 h5group : h5py.highlevel.Group 419 Group object inside an HDF5 file 420 name : str 421 descriptive name for the new dataset. 422 array: numpy.ndarray 423 data object to write to dataset 424 425 Keyword arguments: 426 427 metadata: dict 428 dict containing metadata to write as attributes in the 429 dataset, default={} (empty) 430 431 All kwargs are passed to h5py.highlevel.Group.create_dataset. 432 """ 433 # create dataset 434 h5dataset = h5group.create_dataset(name, data=array, **kwargs) 435 436 # create metadata 437 metadata = dict(metadata) 438 for key,value in metadata.iteritems(): 439 h5dataset.attrs.create(key, value) 440 441 return h5dataset
442
443 -def _create_groups(h5file, group):
444 """ 445 Helper to return a group from an h5py.File object, creating all parents 446 and the group itself if needed. 447 448 Returns a new h5py.highlevel.Group, except in the special case of 449 group=None, in which case the original h5py.File is returned. 450 451 Argument: 452 453 h5file : [ h5py.File | str ] 454 open HDF5 file object, or path to HDF5 file on disk. 455 group : str 456 name of Group to create. If a composite group is given 457 (e.g. "/data/spectra") the full tree will be generated 458 as appropriate. 459 """ 460 if group is None: 461 return h5file 462 group = group.lstrip("/") 463 if group in h5file.keys(): 464 h5group = h5file[group] 465 else: 466 groups = group.split("/") 467 h5group = h5file 468 for g in groups: 469 try: 470 h5group = h5group.create_group(g) 471 except ValueError: 472 pass 473 return h5group
474