1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27 """
28 While the ligolw module provides classes and parser support for reading and
29 writing LIGO Light Weight XML documents, this module supplements that code
30 with classes and parsers that add intelligence to the in-RAM document
31 representation.
32
33 In particular, the document tree associated with an Array element is
34 enhanced. During parsing, the Stream element in this module converts the
35 character data contained within it into the elements of a numpy array
36 object. The array has the appropriate dimensions and type. When the
37 document is written out again, the Stream element serializes the array back
38 into character data.
39
40 The array is stored as an attribute of the Array element.
41 """
42
43
44 import itertools
45 import numpy
46 import re
47 import sys
48 from xml.sax.saxutils import escape as xmlescape
49 from xml.sax.xmlreader import AttributesImpl as Attributes
50
51
52 from glue import git_version
53 from . import ligolw
54 from . import tokenizer
55 from . import types as ligolwtypes
56 from six.moves import map, range
57
58
59 __author__ = "Kipp Cannon <kipp.cannon@ligo.org>"
60 __version__ = "git id %s" % git_version.id
61 __date__ = git_version.date
62
63
64
65
66
67
68
69
70
71
72
73 -def get_array(xmldoc, name):
74 """
75 Scan xmldoc for an array named name. Raises ValueError if not
76 exactly 1 such array is found.
77 """
78 arrays = Array.getArraysByName(xmldoc, name)
79 if len(arrays) != 1:
80 raise ValueError("document must contain exactly one %s array" % Array.ArrayName(name))
81 return arrays[0]
82
83
84
85
86
87
88
89
90
91
92
93 -class ArrayStream(ligolw.Stream):
94 """
95 High-level Stream element for use inside Arrays. This element
96 knows how to parse the delimited character stream into the parent's
97 array attribute, and knows how to turn the parent's array attribute
98 back into a character stream.
99 """
100
101 Delimiter = ligolw.attributeproxy(u"Delimiter", default = u" ")
102
112
113 - def config(self, parentNode):
121
123
124 tokens = tuple(self._tokenizer.append(content))
125 next_index = self._index + len(tokens)
126 self._array_view[self._index : next_index] = tokens
127 self._index = next_index
128
130
131
132
133 self.appendData(self.Delimiter)
134 if self._index != len(self._array_view):
135 raise ValueError("length of Stream (%d elements) does not match array size (%d elements)" % (self._index, len(self._array_view)))
136 del self._array_view
137 del self._index
138
139 - def write(self, fileobj = sys.stdout, indent = u""):
140
141 linelen = self.parentNode.array.shape[0]
142 lines = self.parentNode.array.size // linelen if self.parentNode.array.size else 0
143 tokens = map(ligolwtypes.FormatFunc[self.parentNode.Type], self.parentNode.array.T.flat)
144 islice = itertools.islice
145 join = self.Delimiter.join
146 w = fileobj.write
147
148 w(self.start_tag(indent))
149 if lines:
150 newline = u"\n" + indent + ligolw.Indent
151 w(newline)
152 w(xmlescape(join(islice(tokens, linelen))))
153 newline = self.Delimiter + newline
154 for i in range(lines - 1):
155 w(newline)
156 w(xmlescape(join(islice(tokens, linelen))))
157 w(u"\n" + self.end_tag(indent) + u"\n")
158
159
160 -class Array(ligolw.Array):
161 """
162 High-level Array element.
163 """
167
168 Name = ligolw.attributeproxy(u"Name", enc = ArrayName.enc, dec = ArrayName)
169
171 """
172 Initialize a new Array element.
173 """
174 super(Array, self).__init__(*args)
175 self.array = None
176
178 """
179 Return a tuple of this array's dimensions. This is done by
180 querying the Dim children. Note that once it has been
181 created, it is also possible to examine an Array object's
182 .array attribute directly, and doing that is much faster.
183 """
184 return tuple(c.n for c in self.getElementsByTagName(ligolw.Dim.tagName))[::-1]
185
186 @classmethod
187 - def build(cls, name, array, dim_names = None):
188 """
189 Construct a LIGO Light Weight XML Array document subtree
190 from a numpy array object.
191
192 Example:
193
194 >>> import numpy, sys
195 >>> a = numpy.arange(12, dtype = "double")
196 >>> a.shape = (4, 3)
197 >>> Array.build(u"test", a).write(sys.stdout) # doctest: +NORMALIZE_WHITESPACE
198 <Array Type="real_8" Name="test:array">
199 <Dim>3</Dim>
200 <Dim>4</Dim>
201 <Stream Delimiter=" " Type="Local">
202 0 3 6 9
203 1 4 7 10
204 2 5 8 11
205 </Stream>
206 </Array>
207 """
208
209
210
211 elem = cls(Attributes({u"Type": ligolwtypes.FromNumPyType[str(array.dtype)]}))
212 elem.Name = name
213 if dim_names is None:
214 dim_names = [None] * len(array.shape)
215 elif len(dim_names) != len(array.shape):
216 raise ValueError("dim_names must be same length as number of dimensions")
217 for name, n in reversed(list(zip(dim_names, array.shape))):
218 child = elem.appendChild(ligolw.Dim())
219 if name is not None:
220 child.Name = name
221 child.n = n
222 elem.appendChild(ArrayStream(Attributes({u"Type": ArrayStream.Type.default, u"Delimiter": ArrayStream.Delimiter.default})))
223 elem.array = array
224 return elem
225
226 @classmethod
233
234
235
236
237
239 """
240 Break internal references within the document tree rooted
241 on this element to promote garbage collection.
242 """
243 super(Array, self).unlink()
244 self.array = None
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261 -def use_in(ContentHandler):
262 """
263 Modify ContentHandler, a sub-class of
264 glue.ligolw.LIGOLWContentHandler, to cause it to use the Array and
265 ArrayStream classes defined in this module when parsing XML
266 documents.
267
268 Example:
269
270 >>> from glue.ligolw import ligolw
271 >>> class MyContentHandler(ligolw.LIGOLWContentHandler):
272 ... pass
273 ...
274 >>> use_in(MyContentHandler)
275 <class 'glue.ligolw.array.MyContentHandler'>
276 """
277 def startStream(self, parent, attrs, __orig_startStream = ContentHandler.startStream):
278 if parent.tagName == ligolw.Array.tagName:
279 return ArrayStream(attrs).config(parent)
280 return __orig_startStream(self, parent, attrs)
281
282 def startArray(self, parent, attrs):
283 return Array(attrs)
284
285 ContentHandler.startStream = startStream
286 ContentHandler.startArray = startArray
287
288 return ContentHandler
289