glue.iterutils

1 # Copyright (C) 2007,2008,2010--2014 Kipp Cannon, Nickolas Fotopoulos 2 # 3 # This program is free software; you can redistribute it and/or modify it 4 # under the terms of the GNU General Public License as published by the 5 # Free Software Foundation; either version 2 of the License, or (at your 6 # option) any later version. 7 # 8 # This program is distributed in the hope that it will be useful, but 9 # WITHOUT ANY WARRANTY; without even the implied warranty of 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General 11 # Public License for more details. 12 # 13 # You should have received a copy of the GNU General Public License along 14 # with this program; if not, write to the Free Software Foundation, Inc., 15 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 16 17 18 # 19 # ============================================================================= 20 # 21 # Preamble 22 # 23 # ============================================================================= 24 # 25 26 27 """ 28 A collection of iteration utilities. 29 """ 30 31 32 import math 33 import numpy 34 import random 35 36 37 from glue import git_version 38 39 40 __author__ = "Kipp Cannon <kipp.cannon@ligo.org>" 41 __version__ = "git id %s" % git_version.id 42 __date__ = git_version.date 43 44 45 # 46 # ============================================================================= 47 # 48 # Iteration Tools 49 # 50 # ============================================================================= 51 # 52 53

54 -def MultiIter(*sequences):

55 """ 56 A generator for iterating over the elements of multiple sequences 57 simultaneously. With N sequences given as input, the generator 58 yields all possible distinct N-tuples that contain one element from 59 each of the input sequences. 60 61 Example: 62 63 >>> x = MultiIter([0, 1, 2], [10, 11]) 64 >>> list(x) 65 [(0, 10), (1, 10), (2, 10), (0, 11), (1, 11), (2, 11)] 66 67 The elements in each output tuple are in the order of the input 68 sequences, and the left-most input sequence is iterated over first. 69 70 Internally, the input sequences themselves are each iterated over 71 only once, so it is safe to pass generators as arguments. Also, 72 this generator is significantly faster if the longest input 73 sequence is given as the first argument. For example, this code 74 75 >>> lengths = range(1, 12) 76 >>> for x in MultiIter(*map(range, lengths)): 77 ... pass 78 ... 79 80 runs approximately 5 times faster if the lengths list is reversed. 81 """ 82 if len(sequences) > 1: 83 # FIXME: this loop is about 5% faster if done the other 84 # way around, if the last list is iterated over in the 85 # inner loop. but there is code, like snglcoinc.py, 86 # that has been optimized for the current order and 87 # would need to be reoptimized if this function were to be 88 # reversed. 89 head = tuple((x,) for x in sequences[0]) 90 for t in MultiIter(*sequences[1:]): 91 for h in head: 92 yield h + t 93 elif sequences: 94 for t in sequences[0]: 95 yield (t,)

96 97

98 -def choices(vals, n):

99 """ 100 A generator for iterating over all choices of n elements from the 101 input sequence vals. In each result returned, the original order 102 of the values is preserved. 103 104 Example: 105 106 >>> x = choices(["a", "b", "c"], 2) 107 >>> list(x) 108 [('a', 'b'), ('a', 'c'), ('b', 'c')] 109 110 The order of combinations in the output sequence is always the 111 same, so if choices() is called twice with two different sequences 112 of the same length the first combination in each of the two output 113 sequences will contain elements from the same positions in the two 114 different input sequences, and so on for each subsequent pair of 115 output combinations. 116 117 Example: 118 119 >>> x = choices(["a", "b", "c"], 2) 120 >>> y = choices(["1", "2", "3"], 2) 121 >>> zip(x, y) 122 [(('a', 'b'), ('1', '2')), (('a', 'c'), ('1', '3')), (('b', 'c'), ('2', '3'))] 123 124 Furthermore, the order of combinations in the output sequence is 125 such that if the input list has n elements, and one constructs the 126 combinations choices(input, m), then each combination in 127 choices(input, n-m).reverse() contains the elements discarded in 128 forming the corresponding combination in the former. 129 130 Example: 131 132 >>> x = ["a", "b", "c", "d", "e"] 133 >>> X = list(choices(x, 2)) 134 >>> Y = list(choices(x, len(x) - 2)) 135 >>> Y.reverse() 136 >>> zip(X, Y) 137 [(('a', 'b'), ('c', 'd', 'e')), (('a', 'c'), ('b', 'd', 'e')), (('a', 'd'), ('b', 'c', 'e')), (('a', 'e'), ('b', 'c', 'd')), (('b', 'c'), ('a', 'd', 'e')), (('b', 'd'), ('a', 'c', 'e')), (('b', 'e'), ('a', 'c', 'd')), (('c', 'd'), ('a', 'b', 'e')), (('c', 'e'), ('a', 'b', 'd')), (('d', 'e'), ('a', 'b', 'c'))] 138 """ 139 if n == len(vals): 140 yield tuple(vals) 141 elif n > 1: 142 n -= 1 143 for i, v in enumerate(vals[:-n]): 144 v = (v,) 145 for c in choices(vals[i+1:], n): 146 yield v + c 147 elif n == 1: 148 for v in vals: 149 yield (v,) 150 elif n == 0: 151 yield () 152 else: 153 # n < 0 154 raise ValueError(n)

155 156

157 -def uniq(iterable):

158 """ 159 Yield the unique items of an iterable, preserving order. 160 http://mail.python.org/pipermail/tutor/2002-March/012930.html 161 162 Example: 163 164 >>> x = uniq([0, 0, 2, 6, 2, 0, 5]) 165 >>> list(x) 166 [0, 2, 6, 5] 167 """ 168 temp_dict = {} 169 for e in iterable: 170 if e not in temp_dict: 171 yield temp_dict.setdefault(e, e)

172 173

174 -def nonuniq(iterable):

175 """ 176 Yield the non-unique items of an iterable, preserving order. If an 177 item occurs N > 0 times in the input sequence, it will occur N-1 178 times in the output sequence. 179 180 Example: 181 182 >>> x = nonuniq([0, 0, 2, 6, 2, 0, 5]) 183 >>> list(x) 184 [0, 2, 0] 185 """ 186 temp_dict = {} 187 for e in iterable: 188 if e in temp_dict: 189 yield e 190 temp_dict.setdefault(e, e)

191 192

193 -def flatten(sequence, levels = 1):

194 """ 195 Example: 196 >>> nested = [[1,2], [[3]]] 197 >>> list(flatten(nested)) 198 [1, 2, [3]] 199 """ 200 if levels == 0: 201 for x in sequence: 202 yield x 203 else: 204 for x in sequence: 205 for y in flatten(x, levels - 1): 206 yield y

207 208 209 # 210 # ============================================================================= 211 # 212 # In-Place filter() 213 # 214 # ============================================================================= 215 # 216 217

218 -def inplace_filter(func, sequence):

219 """ 220 Like Python's filter() builtin, but modifies the sequence in place. 221 222 Example: 223 224 >>> l = range(10) 225 >>> inplace_filter(lambda x: x > 5, l) 226 >>> l 227 [6, 7, 8, 9] 228 229 Performance considerations: the function iterates over the 230 sequence, shuffling surviving members down and deleting whatever 231 top part of the sequence is left empty at the end, so sequences 232 whose surviving members are predominantly at the bottom will be 233 processed faster. 234 """ 235 target = 0 236 for source in range(len(sequence)): 237 if func(sequence[source]): 238 sequence[target] = sequence[source] 239 target += 1 240 del sequence[target:]

241 242 243 # 244 # ============================================================================= 245 # 246 # Return the Values from Several Ordered Iterables in Order 247 # 248 # ============================================================================= 249 # 250 251

252 -def inorder(*iterables, **kwargs):

253 """ 254 A generator that yields the values from several ordered iterables 255 in order. 256 257 Example: 258 259 >>> x = [0, 1, 2, 3] 260 >>> y = [1.5, 2.5, 3.5, 4.5] 261 >>> z = [1.75, 2.25, 3.75, 4.25] 262 >>> list(inorder(x, y, z)) 263 [0, 1, 1.5, 1.75, 2, 2.25, 2.5, 3, 3.5, 3.75, 4.25, 4.5] 264 >>> list(inorder(x, y, z, key=lambda x: x * x)) 265 [0, 1, 1.5, 1.75, 2, 2.25, 2.5, 3, 3.5, 3.75, 4.25, 4.5] 266 267 >>> x.sort(key=lambda x: abs(x-3)) 268 >>> y.sort(key=lambda x: abs(x-3)) 269 >>> z.sort(key=lambda x: abs(x-3)) 270 >>> list(inorder(x, y, z, key=lambda x: abs(x - 3))) 271 [3, 2.5, 3.5, 2.25, 3.75, 2, 1.75, 4.25, 1.5, 4.5, 1, 0] 272 273 >>> x = [3, 2, 1, 0] 274 >>> y = [4.5, 3.5, 2.5, 1.5] 275 >>> z = [4.25, 3.75, 2.25, 1.75] 276 >>> list(inorder(x, y, z, reverse = True)) 277 [4.5, 4.25, 3.75, 3.5, 3, 2.5, 2.25, 2, 1.75, 1.5, 1, 0] 278 >>> list(inorder(x, y, z, key = lambda x: -x)) 279 [4.5, 4.25, 3.75, 3.5, 3, 2.5, 2.25, 2, 1.75, 1.5, 1, 0] 280 281 NOTE: this function will never reverse the order of elements in 282 the input iterables. If the reverse keyword argument is False (the 283 default) then the input sequences must yield elements in increasing 284 order, likewise if the keyword argument is True then the input 285 sequences must yield elements in decreasing order. Failure to 286 adhere to this yields undefined results, and for performance 287 reasons no check is performed to validate the element order in the 288 input sequences. 289 """ 290 reverse = kwargs.pop("reverse", False) 291 keyfunc = kwargs.pop("key", lambda x: x) # default = identity 292 if kwargs: 293 raise TypeError("invalid keyword argument '%s'" % list(kwargs.keys())[0]) 294 nextvals = {} 295 for iterable in iterables: 296 next = iter(iterable).next 297 try: 298 nextval = next() 299 nextvals[next] = keyfunc(nextval), nextval, next 300 except StopIteration: 301 pass 302 if not nextvals: 303 # all sequences are empty 304 return 305 if reverse: 306 select = lambda seq: max(seq, key = lambda elem: elem[0]) 307 else: 308 select = lambda seq: min(seq, key = lambda elem: elem[0]) 309 values = nextvals.itervalues 310 if len(nextvals) > 1: 311 while 1: 312 _, val, next = select(values()) 313 yield val 314 try: 315 nextval = next() 316 nextvals[next] = keyfunc(nextval), nextval, next 317 except StopIteration: 318 del nextvals[next] 319 if len(nextvals) < 2: 320 break 321 # exactly one sequence remains, short circuit and drain it 322 (_, val, next), = values() 323 yield val 324 while 1: 325 yield next()

326 327 328 # 329 # ============================================================================= 330 # 331 # Random Sequences 332 # 333 # ============================================================================= 334 # 335 336

337 -def randindex(lo, hi, n = 1.):

338 """ 339 Yields integers in the range [lo, hi) where 0 <= lo < hi. Each 340 return value is a two-element tuple. The first element is the 341 random integer, the second is the natural logarithm of the 342 probability with which that integer will be chosen. 343 344 The CDF for the distribution from which the integers are drawn goes 345 as [integer]^{n}, where n > 0. Specifically, it's 346 347 CDF(x) = (x^{n} - lo^{n}) / (hi^{n} - lo^{n}) 348 349 n = 1 yields a uniform distribution; n > 1 favours larger 350 integers, n < 1 favours smaller integers. 351 """ 352 if not 0 <= lo < hi: 353 raise ValueError("require 0 <= lo < hi: lo = %d, hi = %d" % (lo, hi)) 354 if n <= 0.: 355 raise ValueError("n <= 0: %g" % n) 356 elif n == 1.: 357 # special case for uniform distribution 358 try: 359 lnP = math.log(1. / (hi - lo)) 360 except ValueError: 361 raise ValueError("[lo, hi) domain error") 362 hi -= 1 363 rnd = random.randint 364 while 1: 365 yield rnd(lo, hi), lnP 366 367 # CDF evaluated at index boundaries 368 lnP = numpy.arange(lo, hi + 1, dtype = "double")**n 369 lnP -= lnP[0] 370 lnP /= lnP[-1] 371 # differences give probabilities 372 lnP = tuple(numpy.log(lnP[1:] - lnP[:-1])) 373 if numpy.isinf(lnP).any(): 374 raise ValueError("[lo, hi) domain error") 375 376 beta = lo**n / (hi**n - lo**n) 377 n = 1. / n 378 alpha = hi / (1. + beta)**n 379 flr = math.floor 380 rnd = random.random 381 while 1: 382 index = int(flr(alpha * (rnd() + beta)**n)) 383 # the tuple look-up provides the second part of the 384 # range safety check on index 385 assert index >= lo 386 yield index, lnP[index - lo]

387

Source Code for Module glue.iterutils