glue.ligolw.utils.print

1 # xml_convert.py 2 3 # 4 # ============================================================================= 5 # 6 # Preamble 7 # 8 # ============================================================================= 9 # 10 11 12 """ 13 A collection of utilities to convert xml-tables to other formats, such as 14 wiki or html. 15 """ 16 import sys, re 17 18 from .. import ligolw 19 from .. import table 20 import six 21 22 __author__ = "Collin Capano <cdcapano@ligo.caltech.edu>" 23 from glue import git_version 24 __date__ = git_version.date 25 __version__ = git_version.id 26 27 28 # 29 # ============================================================================= 30 # 31 # Utilities 32 # 33 # ============================================================================= 34 # 35

36 -def set_output_format( output_format ):

37 """ 38 Sets output format; returns standard bits of table. These are: 39 ttx: how to start a title for a set of tables 40 xtt: how to end a title for a set of tables 41 tx: how to start a table 42 xt: how to close a table 43 capx: how to start a caption for the table 44 xcap: how to close a caption for the table 45 rx: how to start a row and the first cell in the row 46 xr: how to close a row and the last cell in the row 47 rspx: how to start a cell with a row span argument 48 xrsp: how to close the row span argument 49 cx: how to open a cell 50 xc: how to close a cell 51 """ 52 if output_format == 'wiki': 53 ttx = '== ' 54 xtt = ' ==' 55 tx = '' 56 xt = '' 57 capx = "'''" 58 xcap = "'''" 59 rx = '|' 60 xr = '|' 61 rspx = '|<|' 62 xrsp = '>' 63 cx = '|' 64 xc = '|' 65 hlx = '[' 66 hxl = ' ' 67 xhl = ']' 68 69 elif output_format == "html": 70 ttx = '<b>' 71 xtt = '</b><hr>' 72 tx = '<table border = "1">' 73 xt = '</table><br><br>' 74 capx = '<caption>' 75 xcap = '</caption>' 76 rx = '<tr>' 77 xr = '</tr>' 78 rspx = '<td rowspan=' 79 xrsp = '>' 80 cx = '<td>' 81 xc = '</td>' 82 hlx = '<a href="' 83 hxl = '">' 84 xhl = "</a>" 85 86 else: 87 raise ValueError("unrecognized output_format %s" % output_format) 88 89 return ttx, xtt, tx, xt, capx, xcap, rx, xr, cx, xc, rspx, xrsp, hlx, hxl, xhl

90 91

92 -def smart_round( val, decimal_places = 2 ):

93 """ 94 For floats >= 10.**-(decimal_places - 1), rounds off to the valber of decimal places specified. 95 For floats < 10.**-(decimal_places - 1), puts in exponential form then rounds off to the decimal 96 places specified. 97 @val: value to round; if val is not a float, just returns val 98 @decimal_places: number of decimal places to round to 99 """ 100 if isinstance(val, float) and val != 0.0: 101 if val >= 10.**-(decimal_places - 1): 102 conv_str = ''.join([ '%.', str(decimal_places), 'f' ]) 103 else: 104 conv_str = ''.join([ '%.', str(decimal_places), 'e' ]) 105 val = float( conv_str % val ) 106 107 return val

108

109 -def format_hyperlink( val, hlx, hxl, xhl ):

110 """ 111 Formats an html hyperlink into other forms. 112 113 @hlx, hxl, xhl: values returned by set_output_format 114 """ 115 if '<a href="' in str(val) and hlx != '<a href="': 116 val = val.replace('<a href="', hlx).replace('">', hxl, 1).replace('</a>', xhl) 117 118 return val

119

120 -def format_cell(val, round_floats = False, decimal_places = 2, format_links = False, 121 hlx = '', hxl = '', xhl = ''):

122 """ 123 Applys smart_round and format_hyperlink to values in a cell if desired. 124 """ 125 if round_floats: 126 val = smart_round(val, decimal_places = decimal_places) 127 if format_links: 128 val = format_hyperlink(val, hlx, hxl, xhl) 129 130 return val

131

132 -def format_header_cell(val):

133 """ 134 Formats given header column. This involves changing '_Px_' to '(', '_xP_' to ')' and 135 all other '_' to spaces. 136 """ 137 return re.sub('_', ' ', re.sub(r'(_Px_)', '(', re.sub(r'(_xP_)', ')', str(val) )))

138

139 -def get_row_data(row, column_name, cat_time_ns = True):

140 """ 141 Retrieves the requested column's data from the given row. 142 143 @cat_time_ns: If the column_name has "_time" in it, will concatenate 144 the column with any column having the same name but "_time_ns". 145 """ 146 column_name_ns = re.sub(r'_time', r'_time_ns', column_name) 147 try: 148 rowattrs = [attr for attr in row.__slots__] 149 except AttributeError: 150 rowattrs = [attr for attr in six.iterkeys(row.__dict__)] 151 152 if cat_time_ns and "_time" in column_name and column_name_ns in rowattrs: 153 return int(getattr(row, column_name)) + 10**(-9.)*int(getattr(row, column_name_ns)) 154 else: 155 return getattr(row, column_name)

156 # 157 # ============================================================================= 158 # 159 # Library API 160 # 161 # ============================================================================= 162 # 163

164 -def print_tables(xmldoc, output, output_format, tableList = [], columnList = [], 165 round_floats = True, decimal_places = 2, format_links = True, 166 title = None, print_table_names = True, unique_rows = False, 167 row_span_columns = [], rspan_break_columns = []):

168 """ 169 Method to print tables in an xml file in other formats. 170 Input is an xmldoc, output is a file object containing the 171 tables. 172 173 @xmldoc: document to convert 174 @output: file object to write output to; if None, will write to stdout 175 @output_format: format to convert to 176 @tableList: only convert the listed tables. Default is 177 to convert all the tables found in the xmldoc. Tables 178 not converted will not be included in the returned file 179 object. 180 @columnList: only print the columns listed, in the order given. 181 This applies to all tables (if a table doesn't have a listed column, it's just 182 skipped). To specify a column in a specific table, use table_name:column_name. 183 Default is to print all columns. 184 @round_floats: If turned on, will smart_round floats to specifed 185 number of places. 186 @format_links: If turned on, will convert any html hyperlinks to specified 187 output_format. 188 @decimal_places: If round_floats turned on, will smart_round to this 189 number of decimal places. 190 @title: Add a title to this set of tables. 191 @unique_rows: If two consecutive rows are exactly the same, will condense into 192 one row. 193 @print_table_names: If set to True, will print the name of each table 194 in the caption section. 195 @row_span_columns: For the columns listed, will 196 concatenate consecutive cells with the same values 197 into one cell that spans those rows. Default is to span no rows. 198 @rspan_break_column: Columns listed will prevent all cells 199 from rowspanning across two rows in which values in the 200 columns are diffrent. Default is to have no break columns. 201 """ 202 # get the tables to convert 203 if tableList == []: 204 tableList = [tb.getAttribute("Name") for tb in xmldoc.childNodes[0].getElementsByTagName(u'Table')] 205 206 # set the output 207 if output is None: 208 output = sys.stdout 209 210 # get table bits 211 ttx, xtt, tx, xt, capx, xcap, rx, xr, cx, xc, rspx, xrsp, hlx, hxl, xhl = set_output_format( output_format ) 212 213 # set the title if desired 214 if title is not None: 215 output.write("%s%s%s\n" %(ttx,str(title),xtt)) 216 # cycle over the tables in the xmldoc 217 for table_name in tableList: 218 this_table = table.get_table(xmldoc, table_name) 219 if columnList == []: 220 col_names = [ col.getAttribute("Name").split(":")[-1] 221 for col in this_table.getElementsByTagName(u'Column') ] 222 else: 223 requested_columns = [col.split(':')[-1] for col in columnList if not (':' in col and col.split(':')[0] != table_name) ] 224 requested_columns = sorted(set(requested_columns), key=requested_columns.index) 225 actual_columns = [actual_column.getAttribute("Name").split(":")[-1] 226 for actual_column in this_table.getElementsByTagName(u'Column') ] 227 col_names = [col for col in requested_columns if col in actual_columns] 228 # get the relevant row_span/break column indices 229 rspan_indices = [ n for n,col in enumerate(col_names) if col in row_span_columns or ':'.join([table_name,col]) in row_span_columns ] 230 break_indices = [ n for n,col in enumerate(col_names) if col in rspan_break_columns or ':'.join([table_name,col]) in rspan_break_columns ] 231 232 # start the table and print table name 233 output.write('%s\n' % tx) 234 if print_table_names: 235 output.write("%s%s%s\n" %(capx, table_name, xcap)) 236 output.write("%s%s%s%s%s\n" %(rx, cx, (xc+cx).join(format_header_cell(val) for val in col_names), xc, xr)) 237 238 # format the data in the table 239 out_table = [] 240 last_row = '' 241 for row in this_table: 242 out_row = [ str(format_cell( get_row_data(row, col_name), 243 round_floats = round_floats, decimal_places = decimal_places, 244 format_links = format_links, hlx = hlx, hxl = hxl, xhl = xhl )) 245 for col_name in col_names ] 246 if unique_rows and out_row == last_row: 247 continue 248 out_table.append(out_row) 249 last_row = out_row 250 251 rspan_count = {} 252 for mm, row in enumerate(out_table[::-1]): 253 this_row_idx = len(out_table) - (mm+1) 254 next_row_idx = this_row_idx - 1 255 # cheack if it's ok to do row-span 256 rspan_ok = rspan_indices != [] and this_row_idx != 0 257 if rspan_ok: 258 for jj in break_indices: 259 rspan_ok = out_table[this_row_idx][jj] == out_table[next_row_idx][jj] 260 if not rspan_ok: break 261 # cycle over columns in the row setting row span values 262 for nn, val in enumerate(row): 263 # check if this cell should be spanned; 264 # if so, delete it, update rspan_count and go on to next cell 265 if rspan_ok and nn in rspan_indices: 266 if val == out_table[next_row_idx][nn]: 267 out_table[this_row_idx][nn] = '' 268 if (this_row_idx, nn) in rspan_count: 269 rspan_count[(next_row_idx,nn)] = rspan_count[(this_row_idx,nn)] + 1 270 del rspan_count[(this_row_idx,nn)] 271 else: 272 rspan_count[(next_row_idx,nn)] = 2 273 elif (this_row_idx, nn) in rspan_count: 274 out_table[this_row_idx][nn] = ''.join([rspx, str(rspan_count[(this_row_idx,nn)]), xrsp, str(val), xc]) 275 else: 276 out_table[this_row_idx][nn] = ''.join([cx, str(val), xc]) 277 continue 278 # format cell appropriately 279 if (this_row_idx, nn) in rspan_count: 280 out_table[this_row_idx][nn] = ''.join([rspx, str(rspan_count[(this_row_idx,nn)]), xrsp, str(val), xc]) 281 else: 282 out_table[this_row_idx][nn] = ''.join([cx, str(val), xc]) 283 284 # print the table to output 285 for row in out_table: 286 output.write("%s%s%s\n" % (rx, ''.join(row), xr)) 287 288 # close the table and go on to the next 289 output.write('%s\n' % xt)

290

Source Code for Module glue.ligolw.utils.print_tables