1 | # -*- coding: utf-8 -*- |
---|
2 | """ |
---|
3 | ------------------------------------------------------------------------------ |
---|
4 | Mango 802.11 Reference Design Experiments Framework - HDF5 Log File Utilities |
---|
5 | ------------------------------------------------------------------------------ |
---|
6 | License: Copyright 2019 Mango Communications, Inc. All rights reserved. |
---|
7 | Use and distribution subject to terms in LICENSE.txt |
---|
8 | ------------------------------------------------------------------------------ |
---|
9 | |
---|
10 | This module provides utility functions for HDF to handle wlan_exp log data. |
---|
11 | |
---|
12 | For wlan_exp log data manipulation, it is necessary to define a common file |
---|
13 | format so that it is easy for multiple consumers, both in python and other |
---|
14 | languages, to access the data. To do this, HDF5 is used as the container |
---|
15 | format with a couple of additional conventions to hold the log data as well as |
---|
16 | other pieces of information. Below are the rules to create an HDF5 file that |
---|
17 | will contain wlan_exp log data: |
---|
18 | |
---|
19 | wlan_exp_log_data_container (equivalent to a HDF5 group): |
---|
20 | /: Root Group in HDF5 file |
---|
21 | |- Attributes: |
---|
22 | | |- 'wlan_exp_log' (1,) bool |
---|
23 | | |- 'wlan_exp_ver' (3,) uint32 |
---|
24 | | |- <user provided attributes in attr_dict> |
---|
25 | |- Datasets: |
---|
26 | | |- 'log_data' (1,) voidN (where N is the size of the data) |
---|
27 | |- Groups (created if gen_index==True): |
---|
28 | |- 'raw_log_index' |
---|
29 | |- Datasets: |
---|
30 | (dtype depends if largest offset in raw_log_index is < 2^32) |
---|
31 | |- <int> (N1,) uint32/uint64 |
---|
32 | |- <int> (N2,) uint32/uint64 |
---|
33 | |- ... |
---|
34 | |
---|
35 | Naming convention: |
---|
36 | |
---|
37 | log_data -- The binary data from a wlan_exp node's log. |
---|
38 | |
---|
39 | raw_log_index -- This is an index that has not been interpreted / filtered |
---|
40 | and corresponds 1-to-1 with what is in given log_data. |
---|
41 | The defining characteristic of a raw_log_index is that |
---|
42 | the dictionary keys are all integers (entry type IDs): |
---|
43 | { <int> : [<offsets>] } |
---|
44 | |
---|
45 | log_index -- A log_index is any index that is not a raw_log_index. In |
---|
46 | general, this will be a interpreted / filtered version of |
---|
47 | a raw_log_index. |
---|
48 | |
---|
49 | hdf5 -- A data container format used to store log_data, |
---|
50 | raw_log_index, and other user defined attributes. You can |
---|
51 | find more documentation on HDF / HDF5 at: |
---|
52 | http://www.hdfgroup.org/ |
---|
53 | http://www.h5py.org/ |
---|
54 | |
---|
55 | numpy -- A python package that allows easy and fast manipulation of |
---|
56 | large data sets. You can find more documentaiton on numpy at: |
---|
57 | http://www.numpy.org/ |
---|
58 | """ |
---|
59 | |
---|
60 | __all__ = ['np_arrays_to_hdf5', |
---|
61 | 'HDF5LogContainer', |
---|
62 | 'hdf5_open_file', |
---|
63 | 'hdf5_close_file', |
---|
64 | 'log_data_to_hdf5', |
---|
65 | 'hdf5_to_log_data', |
---|
66 | 'hdf5_to_log_index', |
---|
67 | 'hdf5_to_attr_dict'] |
---|
68 | |
---|
69 | import sys |
---|
70 | from . import util as log_util |
---|
71 | |
---|
72 | |
---|
73 | # Fix to support Python 2.x and 3.x |
---|
74 | if sys.version[0]=="3": unicode=str |
---|
75 | |
---|
76 | # ----------------------------------------------------------------------------- |
---|
77 | # HDF5 Log Container Class |
---|
78 | # ----------------------------------------------------------------------------- |
---|
79 | class HDF5LogContainer(log_util.LogContainer): |
---|
80 | """Class to define an HDF5 log container. |
---|
81 | |
---|
82 | Args: |
---|
83 | file_handle (h5py.File()): Handle of the HDF5 file |
---|
84 | name (str, optional): Name of the HDF5 group of the log container |
---|
85 | compression (bool, optional): HDF5 compression setting on the log container |
---|
86 | |
---|
87 | When an HDF5LogContainer is created, the underlying HDF5 file will not be |
---|
88 | modified unless one of the write_* methods are called. |
---|
89 | """ |
---|
90 | hdf5_group_name = None |
---|
91 | compression = None |
---|
92 | |
---|
93 | |
---|
94 | def __init__(self, file_handle, name=None, compression=None): |
---|
95 | super(HDF5LogContainer, self).__init__(file_handle) |
---|
96 | |
---|
97 | self.compression = compression |
---|
98 | |
---|
99 | if name is None: |
---|
100 | self.hdf5_group_name = "/" |
---|
101 | else: |
---|
102 | self.hdf5_group_name = name |
---|
103 | |
---|
104 | |
---|
105 | def is_valid(self): |
---|
106 | """Check that the HDF5 Log Container is valid. |
---|
107 | |
---|
108 | Returns: |
---|
109 | is_valid (bool): |
---|
110 | * True --> This is a valid HDF5 log file |
---|
111 | * False --> This is NOT a valid HDF5 log file |
---|
112 | """ |
---|
113 | import numpy as np |
---|
114 | import wlan_exp.version as version |
---|
115 | |
---|
116 | # Check the group handle but do not create one |
---|
117 | group_handle = self._get_group_handle() |
---|
118 | |
---|
119 | if group_handle is None: |
---|
120 | msg = "WARNING: Log container is not valid.\n" |
---|
121 | msg += " Could not find {0} in file.".format(self.hdf5_group_name) |
---|
122 | print(msg) |
---|
123 | return False |
---|
124 | |
---|
125 | try: |
---|
126 | if group_handle.attrs['wlan_exp_log']: |
---|
127 | # Require two attributes named 'wlan_exp_log' and 'wlan_exp_ver' |
---|
128 | ver = group_handle.attrs['wlan_exp_ver'] |
---|
129 | |
---|
130 | ver_str = version.wlan_exp_ver_str(ver[0], ver[1], ver[2]) |
---|
131 | ver_older_than_093 = (ver[0], ver[1], ver[2]) < (0, 9, 3) |
---|
132 | caller_desc = "HDF5 file '{0}' was written using version {1}".format(self.file_handle.filename, ver_str) |
---|
133 | |
---|
134 | status = version.wlan_exp_ver_check(major=ver[0], minor=ver[1], revision=ver[2], |
---|
135 | caller_desc=caller_desc) |
---|
136 | |
---|
137 | |
---|
138 | if (status == version.WLAN_EXP_VERSION_NEWER and |
---|
139 | (version.wlan_exp_ver() >= (0, 9, 3)) and ver_older_than_093): |
---|
140 | msg = "The HDF5 file uses a version older than 0.93, please convert using \n" |
---|
141 | msg += "the log_util_hdf5convert.py utility found the example directory in \n" |
---|
142 | msg += "releases prior to 1.0." |
---|
143 | print(msg) |
---|
144 | |
---|
145 | if (status == version.WLAN_EXP_VERSION_OLDER): |
---|
146 | print("Please update the wlan_exp installation to match the version on the HDF5 file.") |
---|
147 | |
---|
148 | else: |
---|
149 | msg = "WARNING: Log container is not valid.\n" |
---|
150 | msg += " 'wlan_exp_log' attribute indicates log container is not valid." |
---|
151 | print(msg) |
---|
152 | return False |
---|
153 | |
---|
154 | if group_handle['log_data']: |
---|
155 | # Require a dataset named 'log_data' |
---|
156 | if(group_handle['log_data'].dtype.kind != np.dtype(np.void).kind): |
---|
157 | # Require the 'log_data' dataset to be HDF5 opaque type (numpy void type) |
---|
158 | msg = "WARNING: Log container is not valid.\n" |
---|
159 | msg += " Log Data is not valid type. Must be an HDF5 opaque type." |
---|
160 | print(msg) |
---|
161 | return False |
---|
162 | except Exception as err: |
---|
163 | msg = "WARNING: Log container is not valid. The following error occurred:\n" |
---|
164 | msg += " {0}".format(err) |
---|
165 | print(msg) |
---|
166 | return False |
---|
167 | |
---|
168 | return True |
---|
169 | |
---|
170 | |
---|
171 | def write_log_data(self, log_data, append=True): |
---|
172 | """Write the log data to the log container. |
---|
173 | |
---|
174 | Args: |
---|
175 | log_data (bytes): Binary data from a WlanExpNode log |
---|
176 | append (bool, optional): Append to (True) or Overwrite (False) the current log data |
---|
177 | """ |
---|
178 | import numpy as np |
---|
179 | |
---|
180 | if not self._file_writeable(): |
---|
181 | raise AttributeError("File {0} is not writeable.".format(self.file_handle)) |
---|
182 | |
---|
183 | group_handle = self._get_valid_group_handle() |
---|
184 | |
---|
185 | np_dt = np.dtype('V1') |
---|
186 | log_data_length = len(log_data) |
---|
187 | |
---|
188 | # Raise an exception if the log data length is zero |
---|
189 | if (log_data_length == 0): |
---|
190 | raise AttributeError("Did not provide any log data.") |
---|
191 | |
---|
192 | # Get the log_data from the group data set |
---|
193 | ds = group_handle['log_data'] |
---|
194 | |
---|
195 | # Set length of current data |
---|
196 | if append: |
---|
197 | curr_length = ds.shape[0] |
---|
198 | else: |
---|
199 | curr_length = 0 |
---|
200 | |
---|
201 | # Get total length of data |
---|
202 | length = curr_length + log_data_length |
---|
203 | |
---|
204 | # Create empyt numpy container |
---|
205 | np_data = np.empty((log_data_length,), np_dt) |
---|
206 | |
---|
207 | # Redirect numpy array data pointer to the existing buffer object passed in by user |
---|
208 | np_data.data = log_data |
---|
209 | |
---|
210 | ds.resize((length,)) |
---|
211 | ds[curr_length:length,] = np_data |
---|
212 | |
---|
213 | |
---|
214 | def write_log_index(self, log_index=None): |
---|
215 | """Write the log index to the log container. |
---|
216 | |
---|
217 | Args: |
---|
218 | log_index (dict): Log index generated from wlan_exp log data |
---|
219 | |
---|
220 | If the log index currently exists in the HDF5 file, that log index |
---|
221 | will be replaced with this new log index. If log_index is provided |
---|
222 | then that log index will be written to the log container. Otherwise, |
---|
223 | a raw log index will be generated and added to the log container. |
---|
224 | """ |
---|
225 | import numpy as np |
---|
226 | |
---|
227 | if not self._file_writeable(): |
---|
228 | raise AttributeError("File {0} is not writeable.".format(self.file_handle)) |
---|
229 | |
---|
230 | index_name = "log_index" |
---|
231 | group_handle = self._get_valid_group_handle() |
---|
232 | |
---|
233 | if log_index is None: |
---|
234 | log_index = self._create_raw_log_index() |
---|
235 | |
---|
236 | if log_index is None: |
---|
237 | raise AttributeError("Unable to create raw log index for group: {0}\n".format(group_handle)) |
---|
238 | |
---|
239 | # Delete any existing 'log_index' in the group |
---|
240 | try: |
---|
241 | # Normally the try-catch would handle this error but in HDF5 1.8.9 |
---|
242 | # exceptions are not properly thrown when using h5py, so the check |
---|
243 | # needs to be coded this way to not get a lot of garbage output. |
---|
244 | # |
---|
245 | for group in group_handle.keys(): |
---|
246 | if (group == index_name): |
---|
247 | del group_handle[index_name] |
---|
248 | except KeyError: |
---|
249 | pass |
---|
250 | |
---|
251 | # Write the log index to the group |
---|
252 | try: |
---|
253 | index_grp = group_handle.create_group(index_name) |
---|
254 | |
---|
255 | for k, v in log_index.items(): |
---|
256 | # Check if highest-valued entry index can be represented as uint32 or requires uint64 |
---|
257 | if (v[-1] < 2**32): |
---|
258 | dtype = np.uint32 |
---|
259 | else: |
---|
260 | dtype = np.uint64 |
---|
261 | |
---|
262 | # Group names must be strings - keys here are known to be integers (entry_type_id values) |
---|
263 | index_grp.create_dataset(str(k), data=np.array(v, dtype=dtype), maxshape=(None,), compression=self.compression) |
---|
264 | except Exception as err: |
---|
265 | print("ERROR:\n {0}\n".format(err)) |
---|
266 | raise AttributeError("Unable to add log_index to log container: {0}\n".format(group_handle)) |
---|
267 | |
---|
268 | |
---|
269 | def write_attr_dict(self, attr_dict): |
---|
270 | """Add the given attribute dictionary to the opened log container. |
---|
271 | |
---|
272 | Args: |
---|
273 | attr_dict (dict): A dictionary of user provided attributes that will be added to the HDF5 group. |
---|
274 | """ |
---|
275 | import numpy as np |
---|
276 | |
---|
277 | if not self._file_writeable(): |
---|
278 | raise AttributeError("File {0} is not writeable.".format(self.file_handle)) |
---|
279 | |
---|
280 | default_attrs = ['wlan_exp_log', 'wlan_exp_ver'] |
---|
281 | group_handle = self._get_valid_group_handle() |
---|
282 | |
---|
283 | # Remove all current attributes, except default attributes |
---|
284 | for k in group_handle.attrs.keys(): |
---|
285 | if k not in default_attrs: |
---|
286 | del group_handle.attrs[k] |
---|
287 | |
---|
288 | # Write the attribute dictionary to the group |
---|
289 | for k, v in attr_dict.items(): |
---|
290 | try: |
---|
291 | if k not in default_attrs: |
---|
292 | if (type(k) is str): |
---|
293 | if ((type(v) is str) or (type(v) is unicode)): |
---|
294 | group_handle.attrs[k] = np.string_(v) |
---|
295 | else: |
---|
296 | group_handle.attrs[k] = v |
---|
297 | else: |
---|
298 | print("WARNING: Converting '{0}' to string to add attribute.".format(k)) |
---|
299 | group_handle.attrs[str(k)] = v |
---|
300 | except KeyError: |
---|
301 | print("WARNING: Could not add attribute '{0}' to group {1}".format(k, group_handle)) |
---|
302 | |
---|
303 | |
---|
304 | def get_log_data_size(self): |
---|
305 | """Get the current size of the log data in the log container. |
---|
306 | |
---|
307 | Returns: |
---|
308 | size (int): Number of bytes of log data in the log container |
---|
309 | """ |
---|
310 | |
---|
311 | group_handle = self._get_valid_group_handle() |
---|
312 | |
---|
313 | # Get the log_data from the group data set |
---|
314 | ds = group_handle['log_data'] |
---|
315 | |
---|
316 | # Return the length of the data |
---|
317 | return ds.shape[0] |
---|
318 | |
---|
319 | |
---|
320 | def get_log_data(self): |
---|
321 | """Get the log data from the log container. |
---|
322 | |
---|
323 | Returns: |
---|
324 | log_data (bytes): Bytes object of the log data in the container |
---|
325 | """ |
---|
326 | import numpy as np |
---|
327 | |
---|
328 | group_handle = self._get_valid_group_handle() |
---|
329 | |
---|
330 | # Get the log_data from the group data set |
---|
331 | ds = group_handle['log_data'] |
---|
332 | log_data_np = np.empty(shape=ds.shape, dtype=ds.dtype) |
---|
333 | |
---|
334 | # Use the h5py library's HDF5 -> numpy hooks to preserve the log_data size and void type |
---|
335 | ds.read_direct(log_data_np) |
---|
336 | |
---|
337 | # Point to the numpy array's underlying buffer to find the raw log_data to return |
---|
338 | log_data = bytes(log_data_np.data) |
---|
339 | |
---|
340 | return log_data |
---|
341 | |
---|
342 | |
---|
343 | def get_log_index(self, gen_index=True): |
---|
344 | """Get the raw log index from the log container. |
---|
345 | |
---|
346 | Args: |
---|
347 | gen_index (bool, optional): Generate the raw log index if the log index does not |
---|
348 | exist in the log container. |
---|
349 | |
---|
350 | Returns: |
---|
351 | log_index (dict): Log index from the log container |
---|
352 | """ |
---|
353 | error = False |
---|
354 | log_index = {} |
---|
355 | group_handle = self._get_valid_group_handle() |
---|
356 | |
---|
357 | # Get the raw_log_index group from the specified group |
---|
358 | try: |
---|
359 | index_group = group_handle["log_index"] |
---|
360 | |
---|
361 | for k, v in index_group.items(): |
---|
362 | # Re-construct the raw_log_index dictionary, using integers |
---|
363 | # (really entry_type IDs) as the keys and Python lists as values |
---|
364 | # the [:] slice here is important - flattening the returned numpy array before |
---|
365 | # listifying is *way* faster (>10x) than just v.toList() |
---|
366 | |
---|
367 | try: |
---|
368 | log_index[int(k)] = v[:].tolist() |
---|
369 | except ValueError: |
---|
370 | log_index[k] = v[:].tolist() |
---|
371 | |
---|
372 | # Alternative to [:].toList() above - adds safetly in assuring dictionary value is |
---|
373 | # Python list of ints, an requirement of downstream methods |
---|
374 | # raw_log_index[int(k)] = map(int, v[:]) #fastish |
---|
375 | except KeyError: |
---|
376 | error = True |
---|
377 | |
---|
378 | # If there was an error getting the raw_log_index from the file and |
---|
379 | # gen_index=True, then generate the raw_log_index from the log_data |
---|
380 | # in the file |
---|
381 | if error and gen_index: |
---|
382 | log_index = self._create_raw_log_index() |
---|
383 | |
---|
384 | # If the log index is empty or None, then raise an exception |
---|
385 | if not log_index: |
---|
386 | msg = "Unable to get log index from " |
---|
387 | msg += "group {0} of {1}.".format(self.hdf5_group_name, self.file_handle) |
---|
388 | raise AttributeError(msg) |
---|
389 | |
---|
390 | return log_index |
---|
391 | |
---|
392 | |
---|
393 | def get_attr_dict(self): |
---|
394 | """Get the attribute dictionary from the log container. |
---|
395 | |
---|
396 | Returns: |
---|
397 | attr_dict (dict): The dictionary of user provided attributes in the log container. |
---|
398 | """ |
---|
399 | import numpy as np |
---|
400 | |
---|
401 | attr_dict = {} |
---|
402 | group_handle = self._get_valid_group_handle() |
---|
403 | |
---|
404 | for k, v in group_handle.attrs.items(): |
---|
405 | try: |
---|
406 | if (type(v) == np.bytes_): |
---|
407 | attr_dict[k] = str(v) |
---|
408 | else: |
---|
409 | attr_dict[k] = v |
---|
410 | except KeyError: |
---|
411 | print("WARNING: Could not retreive attribute '{0}' from group {1}".format(k, group_handle)) |
---|
412 | |
---|
413 | return attr_dict |
---|
414 | |
---|
415 | |
---|
416 | def trim_log_data(self): |
---|
417 | """Trim the log data so that it has ends on a entry boundary.""" |
---|
418 | raise NotImplementedError |
---|
419 | |
---|
420 | |
---|
421 | # ------------------------------------------------------------------------- |
---|
422 | # Internal methods for the container |
---|
423 | # ------------------------------------------------------------------------- |
---|
424 | def _get_valid_group_handle(self): |
---|
425 | """Internal method to get a valid handle to the HDF5 group or raise an exception.""" |
---|
426 | group_handle = self._get_group_handle() |
---|
427 | |
---|
428 | # Create container if group is empty |
---|
429 | if not group_handle.attrs.keys(): |
---|
430 | self._create_container(group_handle) |
---|
431 | |
---|
432 | # Raise exception if group is not valid |
---|
433 | if not self.is_valid(): |
---|
434 | raise AttributeError("Log container not valid: {0}\n".format(group_handle)) |
---|
435 | |
---|
436 | return group_handle |
---|
437 | |
---|
438 | |
---|
439 | def _get_group_handle(self): |
---|
440 | """Internal method to get a handle to the HDF5 group.""" |
---|
441 | group_name = self.hdf5_group_name |
---|
442 | file_handle = self.file_handle |
---|
443 | |
---|
444 | # Using the root group? |
---|
445 | if (group_name == "/"): |
---|
446 | # Use the root group |
---|
447 | return file_handle |
---|
448 | |
---|
449 | # Check group exists in the file |
---|
450 | try: |
---|
451 | return file_handle[group_name] |
---|
452 | except KeyError: |
---|
453 | # Try to create the group |
---|
454 | try: |
---|
455 | return file_handle.create_group(group_name) |
---|
456 | except ValueError: |
---|
457 | msg = "Cannot create group {0} ".format(self.hdf5_group_name) |
---|
458 | msg += "in {0}".format(self.file_handle) |
---|
459 | raise AttributeError(msg) |
---|
460 | |
---|
461 | # Could not get the group handle, return None |
---|
462 | return None |
---|
463 | |
---|
464 | |
---|
465 | def _create_container(self, group): |
---|
466 | """Internal method to create a valid log data container.""" |
---|
467 | import numpy as np |
---|
468 | import wlan_exp.version as version |
---|
469 | |
---|
470 | # Add default attributes to the group |
---|
471 | group.attrs['wlan_exp_log'] = np.array([1], dtype=np.uint8) |
---|
472 | group.attrs['wlan_exp_ver'] = np.array(version.wlan_exp_ver(), dtype=np.uint32) |
---|
473 | |
---|
474 | # Create an empty numpy array of type 'V1' (ie one byte void) |
---|
475 | np_dt = np.dtype('V1') |
---|
476 | np_data = np.empty((0,), np_dt) |
---|
477 | |
---|
478 | # Create an empty re-sizeable data set for the numpy-formatted data |
---|
479 | group.create_dataset("log_data", data=np_data, maxshape=(None,), compression=self.compression) |
---|
480 | |
---|
481 | |
---|
482 | def _create_raw_log_index(self): |
---|
483 | """Internal method to create a raw log index pulling data from the HDF5 file.""" |
---|
484 | try: |
---|
485 | log_data = self.get_log_data() |
---|
486 | raw_log_index = log_util.gen_raw_log_index(log_data) |
---|
487 | except AttributeError: |
---|
488 | raw_log_index = None |
---|
489 | |
---|
490 | return raw_log_index |
---|
491 | |
---|
492 | |
---|
493 | def _file_writeable(self): |
---|
494 | """Internal method to check if the HDF5 file is writeable.""" |
---|
495 | if (self.file_handle.mode == 'r'): |
---|
496 | return False |
---|
497 | else: |
---|
498 | return True |
---|
499 | |
---|
500 | # End class() |
---|
501 | |
---|
502 | |
---|
503 | |
---|
504 | |
---|
505 | # ----------------------------------------------------------------------------- |
---|
506 | # Log HDF5 file Utilities |
---|
507 | # ----------------------------------------------------------------------------- |
---|
508 | def hdf5_open_file(filename, readonly=False, append=False, print_warnings=True): |
---|
509 | """Open an HDF5 file. |
---|
510 | |
---|
511 | Args: |
---|
512 | filename (str): Filename of the HDF5 file to open |
---|
513 | readonly (bool, optional): Open the file in read-only mode |
---|
514 | append (bool, optional): Append to the data in the current file |
---|
515 | print_warnings (bool, optional): Print warning messages |
---|
516 | |
---|
517 | Returns: |
---|
518 | file_handle (h5py.File): Handle for the HDF5 file |
---|
519 | |
---|
520 | |
---|
521 | Behavior of input attributes: |
---|
522 | +------------+------------+----------------------------------------------------------+ |
---|
523 | | readonly | append | Behavior | |
---|
524 | +============+============+==========================================================+ |
---|
525 | | True | T/F | File opened in read-only mode | |
---|
526 | +------------+------------+----------------------------------------------------------+ |
---|
527 | | False | True | File opened in append mode; created if it does not exist | |
---|
528 | +------------+------------+----------------------------------------------------------+ |
---|
529 | | False | False | If file with filename exists, then a new filename is | |
---|
530 | | | | generated using the log utilities. The new file is then | |
---|
531 | | | | created by the h5py File method (DEFAULT) | |
---|
532 | +------------+------------+----------------------------------------------------------+ |
---|
533 | """ |
---|
534 | |
---|
535 | import os |
---|
536 | import h5py |
---|
537 | |
---|
538 | file_handle = None |
---|
539 | |
---|
540 | # Get a file handle the log container file |
---|
541 | if readonly: |
---|
542 | # Open a HDF5 File Object in 'r' (Readonly) mode |
---|
543 | file_handle = h5py.File(filename, mode='r') |
---|
544 | else: |
---|
545 | # Determine a safe filename for the output HDF5 file |
---|
546 | if append: |
---|
547 | if os.path.isfile(filename): |
---|
548 | if print_warnings: |
---|
549 | print("WARNING: Opening existing file {0} in append mode".format(filename)) |
---|
550 | |
---|
551 | h5_filename = filename |
---|
552 | else: |
---|
553 | h5_filename = log_util._get_safe_filename(filename, print_warnings) |
---|
554 | |
---|
555 | if os.path.isfile(h5_filename): |
---|
556 | # Open an HDF5 File Object in 'a' (Read/Write if exists, create otherwise) mode |
---|
557 | file_handle = h5py.File(h5_filename, mode='a') |
---|
558 | else: |
---|
559 | # Open an HDF5 File Object in 'w' (Create file, truncate if exists) mode |
---|
560 | # |
---|
561 | # This is due to a bug in Anaconda where it does not throu the appropriate |
---|
562 | # IOError to be caught to create a file with the 'a' mode |
---|
563 | file_handle = h5py.File(h5_filename, mode='w') |
---|
564 | |
---|
565 | return file_handle |
---|
566 | |
---|
567 | # End def |
---|
568 | |
---|
569 | |
---|
570 | |
---|
571 | def hdf5_close_file(file_handle): |
---|
572 | """Close an HDF5 file. |
---|
573 | |
---|
574 | Args: |
---|
575 | file_handle (h5py.File): Handle for the HDF5 file |
---|
576 | """ |
---|
577 | file_handle.close() |
---|
578 | |
---|
579 | # End def |
---|
580 | |
---|
581 | |
---|
582 | |
---|
583 | def log_data_to_hdf5(log_data, filename, attr_dict=None, gen_index=True, overwrite=False): |
---|
584 | """Create an HDF5 file that contains the log_data, a raw_log_index, and any |
---|
585 | user attributes. |
---|
586 | |
---|
587 | Args: |
---|
588 | log_data (bytes): Binary data from a WlanExpNode log |
---|
589 | filename (str): Filename of the HDF5 file to appear on disk |
---|
590 | attr_dict (dict, optional): A dictionary of user provided attributes that will be added to the HDF5 group. |
---|
591 | gen_index (bool, optional): Generate the ``raw_log_index`` from the ``log_data`` and store it in the file. |
---|
592 | overwrite (bool, optional): If True method will overwrite existing file with filename |
---|
593 | |
---|
594 | If the requested filename already exists and ``overwrite==True`` this |
---|
595 | method will replace the existing file, destroying any data in the original file. |
---|
596 | |
---|
597 | If the filename already exists and ``overwrite==False`` this method will print a warning, |
---|
598 | then create a new filename with a unique date-time suffix. |
---|
599 | """ |
---|
600 | # Need to not print warnings if overwrite is True |
---|
601 | print_warnings = not overwrite |
---|
602 | |
---|
603 | # Open the file |
---|
604 | file_handle = hdf5_open_file(filename, print_warnings=print_warnings) |
---|
605 | |
---|
606 | # Actual filename (See HDF5 File docs) |
---|
607 | real_filename = file_handle.filename |
---|
608 | |
---|
609 | # Create an HDF5 Log Container |
---|
610 | container = HDF5LogContainer(file_handle) |
---|
611 | |
---|
612 | # Try to write the file components |
---|
613 | try: |
---|
614 | # Add the log data |
---|
615 | container.write_log_data(log_data) |
---|
616 | |
---|
617 | # Add the raw log index to the group |
---|
618 | # - Done this way to save processing time. log_data is already |
---|
619 | # in memory. Therefore, the default write_log_index which |
---|
620 | # pulls the log data out of the HDF5 file to create the raw log |
---|
621 | # index is not needed. |
---|
622 | if gen_index: |
---|
623 | raw_log_index = log_util.gen_raw_log_index(log_data) |
---|
624 | container.write_log_index(raw_log_index) |
---|
625 | |
---|
626 | # Add the attribute dictionary to the group |
---|
627 | if attr_dict is not None: |
---|
628 | container.write_attr_dict(attr_dict) |
---|
629 | |
---|
630 | except AttributeError as err: |
---|
631 | print("Error writing log file: {0}".format(err)) |
---|
632 | |
---|
633 | # Close the file |
---|
634 | hdf5_close_file(file_handle) |
---|
635 | |
---|
636 | # If overwrite use the os to move the temp file to a new file |
---|
637 | if overwrite and (real_filename != filename): |
---|
638 | import os |
---|
639 | os.remove(filename) |
---|
640 | os.rename(real_filename, filename) |
---|
641 | |
---|
642 | # End log_data_to_hdf5() |
---|
643 | |
---|
644 | |
---|
645 | |
---|
646 | def hdf5_to_log_data(filename, group_name=None): |
---|
647 | """Extract the log_data from an HDF5 Log Container |
---|
648 | |
---|
649 | Args: |
---|
650 | filename (str): Name of HDF5 file to open |
---|
651 | group_name (str, optional): Name of Group within the HDF5 file object |
---|
652 | (defaults to "\") |
---|
653 | |
---|
654 | Returns: |
---|
655 | log_data (bytes): Log data in the HDF5 file |
---|
656 | """ |
---|
657 | log_data = None |
---|
658 | |
---|
659 | # Open the file |
---|
660 | file_handle = hdf5_open_file(filename, readonly=True) |
---|
661 | |
---|
662 | # Create an HDF5 Log Container |
---|
663 | container = HDF5LogContainer(file_handle, group_name) |
---|
664 | |
---|
665 | # Try to read the file components |
---|
666 | try: |
---|
667 | # Extract the attribute dictionary |
---|
668 | log_data = container.get_log_data() |
---|
669 | |
---|
670 | except AttributeError as err: |
---|
671 | print("Error reading log file: {0}".format(err)) |
---|
672 | |
---|
673 | # Close the file |
---|
674 | hdf5_close_file(file_handle) |
---|
675 | |
---|
676 | return log_data |
---|
677 | |
---|
678 | # End log_data_to_hdf5() |
---|
679 | |
---|
680 | |
---|
681 | |
---|
682 | def hdf5_to_log_index(filename, group_name=None, gen_index=True): |
---|
683 | """Extract the log_index from an HDF5 Log Container |
---|
684 | |
---|
685 | Args: |
---|
686 | filename (str): Name of HDF5 file to open |
---|
687 | group_name (str, optional): Name of Group within the HDF5 file object |
---|
688 | (defaults to "\") |
---|
689 | gen_index (bool, optional): Generate the ``raw_log_index`` from the ``log_data`` and |
---|
690 | store it in the file if the ``log_index`` is not in the file. |
---|
691 | |
---|
692 | Returns: |
---|
693 | log_index (dict): Either the ``log_index`` from HDF5 file or a generated ``raw_log_index`` |
---|
694 | from ``log_data`` in HDF5 file |
---|
695 | |
---|
696 | """ |
---|
697 | log_index = None |
---|
698 | |
---|
699 | # Open the file |
---|
700 | file_handle = hdf5_open_file(filename, readonly=True) |
---|
701 | |
---|
702 | # Create an HDF5 Log Container |
---|
703 | container = HDF5LogContainer(file_handle, group_name) |
---|
704 | |
---|
705 | # Try to read the file components |
---|
706 | try: |
---|
707 | # Extract the log index |
---|
708 | log_index = container.get_log_index(gen_index) |
---|
709 | |
---|
710 | except AttributeError as err: |
---|
711 | print("Error reading log file: {0}".format(err)) |
---|
712 | |
---|
713 | # Close the file |
---|
714 | hdf5_close_file(file_handle) |
---|
715 | |
---|
716 | return log_index |
---|
717 | |
---|
718 | # End hdf5_to_log_index() |
---|
719 | |
---|
720 | |
---|
721 | |
---|
722 | def hdf5_to_attr_dict(filename=None, group_name=None): |
---|
723 | """Extract the attribute dictionary from an HDF5 Log Container. |
---|
724 | |
---|
725 | Args: |
---|
726 | filename (str): Name of HDF5 file to open |
---|
727 | group_name (str, optional): Name of Group within the HDF5 file object |
---|
728 | (defaults to "\") |
---|
729 | |
---|
730 | Returns: |
---|
731 | attr_dict (dict): The dictionary of user provided attributes in the HDF5 file |
---|
732 | """ |
---|
733 | attr_dict = None |
---|
734 | |
---|
735 | # Open the file |
---|
736 | file_handle = hdf5_open_file(filename, readonly=True) |
---|
737 | |
---|
738 | # Create an HDF5 Log Container |
---|
739 | container = HDF5LogContainer(file_handle, group_name) |
---|
740 | |
---|
741 | # Try to read the file components |
---|
742 | try: |
---|
743 | # Extract the attribute dictionary |
---|
744 | attr_dict = container.get_attr_dict() |
---|
745 | |
---|
746 | except AttributeError as err: |
---|
747 | print("Error reading log file: {0}".format(err)) |
---|
748 | |
---|
749 | # Close the file |
---|
750 | hdf5_close_file(file_handle) |
---|
751 | |
---|
752 | return attr_dict |
---|
753 | |
---|
754 | # End hdf5_to_attr_dict() |
---|
755 | |
---|
756 | |
---|
757 | |
---|
758 | |
---|
759 | def np_arrays_to_hdf5(filename, np_log_dict, attr_dict=None, compression=None): |
---|
760 | """Generate an HDF5 file from numpy arrays. |
---|
761 | |
---|
762 | Args: |
---|
763 | filename (str): Name of HDF5 file to open |
---|
764 | np_log_dict (Numpy Array): Numpy array to add to the HDF5 file |
---|
765 | attr_dict (dict, optional): A dictionary of user provided attributes that will be added to the HDF5 file. |
---|
766 | compression (bool, optional): HDF5 compression setting on the log container |
---|
767 | |
---|
768 | The np_log_dict input must be either: |
---|
769 | #. A dictionary with numpy record arrays as values; each array will be a dataset in |
---|
770 | the HDF5 file root group |
---|
771 | #. A dictionary of dictionaries like (1); each top-level value will be a group in the |
---|
772 | root HDF5 group, each numpy array will be a dataset in the group. |
---|
773 | |
---|
774 | **attr_dict** is optional. If provied, values in attr_dict will be copied to HDF5 |
---|
775 | group and dataset attributes. attr_dict values with keys matching np_log_dict keys |
---|
776 | will be used as dataset attributes named ``'<the_key>_INFO'``. Attribute dictionary |
---|
777 | entries may have an extra value with key ``'/'``, which will be used as the value for a |
---|
778 | group attribute named ``'INFO'``. |
---|
779 | |
---|
780 | Examples: |
---|
781 | :: |
---|
782 | |
---|
783 | # No groups - all datasets in root group |
---|
784 | np_log_dict = { |
---|
785 | 'RX_OFDM': np_array_of_rx_etries, |
---|
786 | 'TX_HIGH': np_array_of_tx_entries |
---|
787 | } |
---|
788 | |
---|
789 | attr_dict = { |
---|
790 | '/': 'Data from some_log_file.bin, node serial number W3-a-00001, written on 2014-03-18', |
---|
791 | 'RX_OFDM': 'Filtered Rx OFDM events, only good FCS receptions', |
---|
792 | 'TX_HIGH': 'Filtered Tx events, only DATA packets' |
---|
793 | } |
---|
794 | |
---|
795 | # Two groups, with two datasets in each group |
---|
796 | np_log_dict = { |
---|
797 | 'Log_Node_A': { |
---|
798 | 'RX_OFDM': np_array_of_rx_etries_A, |
---|
799 | 'TX_HIGH': np_array_of_tx_entries_A |
---|
800 | }, |
---|
801 | 'Log_Node_B': { |
---|
802 | 'RX_OFDM': np_array_of_rx_etries_B, |
---|
803 | 'TX_HIGH': np_array_of_tx_entries_B |
---|
804 | } |
---|
805 | } |
---|
806 | |
---|
807 | attr_dict = { |
---|
808 | '/': 'Written on 2014-03-18', |
---|
809 | 'Log_Node_A': { |
---|
810 | '/': 'Data from node_A_log_file.bin, node serial number W3-a-00001', |
---|
811 | 'RX_OFDM': 'Filtered Rx OFDM events, only good FCS receptions', |
---|
812 | 'TX_HIGH': 'Filtered Tx events, only DATA packets' |
---|
813 | } |
---|
814 | 'Log_Node_B': { |
---|
815 | '/': 'Data from node_B_log_file.bin, node serial number W3-a-00002', |
---|
816 | 'RX_OFDM': 'Filtered Rx OFDM events, only good FCS receptions', |
---|
817 | 'TX_HIGH': 'Filtered Tx events, only DATA packets' |
---|
818 | } |
---|
819 | } |
---|
820 | """ |
---|
821 | import h5py |
---|
822 | |
---|
823 | dk = list(np_log_dict.keys()) |
---|
824 | |
---|
825 | h5_filename = log_util._get_safe_filename(filename) |
---|
826 | hf = h5py.File(h5_filename, mode='w') |
---|
827 | |
---|
828 | try: |
---|
829 | # Copy any user-supplied attributes to root group |
---|
830 | # h5py uses the h5py.File handle to access the file itself and the root group |
---|
831 | hf.attrs['INFO'] = attr_dict['/'] |
---|
832 | except (KeyError, TypeError): |
---|
833 | # TypeError - attrs dictionary does not exist |
---|
834 | # KeyError - attrs dictionary exists but key does not |
---|
835 | pass |
---|
836 | |
---|
837 | if type(np_log_dict[dk[0]]) is dict: |
---|
838 | # np_log_dict is dictionary-of-dictionaries |
---|
839 | # Create an HDF5 file with one group per value in np_log_dict |
---|
840 | # with one dataset per value in np_log_dict[each key] |
---|
841 | # This is a good structure for one dictionary containing one key-value |
---|
842 | # per parsed log file, where the key is the log file name and the |
---|
843 | # value is another dictionary containing the log entry arrays |
---|
844 | |
---|
845 | for grp_k in np_log_dict.keys(): |
---|
846 | # Create one group per log file, using log file name as group name |
---|
847 | grp = hf.create_group(grp_k) |
---|
848 | |
---|
849 | try: |
---|
850 | grp.attrs['INFO'] = attr_dict[grp_k]['/'] |
---|
851 | except (KeyError, TypeError): |
---|
852 | pass |
---|
853 | |
---|
854 | for arr_k in np_log_dict[grp_k].keys(): |
---|
855 | # Create one dataset per numpy array of log data |
---|
856 | ds = grp.create_dataset(arr_k, data=np_log_dict[grp_k][arr_k], compression=compression) |
---|
857 | |
---|
858 | try: |
---|
859 | ds.attrs[arr_k + '_INFO'] = attr_dict[grp_k][arr_k] |
---|
860 | except (KeyError, TypeError): |
---|
861 | pass |
---|
862 | |
---|
863 | else: |
---|
864 | # np_log_dict is dictionary-of-arrays |
---|
865 | # Create HDF5 file with datasets in root, one per np_log_dict[each key] |
---|
866 | |
---|
867 | for arr_k in np_log_dict.keys(): |
---|
868 | # Create one dataset per numpy array of log data |
---|
869 | ds = hf.create_dataset(arr_k, data=np_log_dict[arr_k], compression=compression) |
---|
870 | |
---|
871 | try: |
---|
872 | ds.attrs[arr_k + '_INFO'] = attr_dict[arr_k] |
---|
873 | except (KeyError, TypeError): |
---|
874 | pass |
---|
875 | hf.close() |
---|
876 | return |
---|
877 | |
---|
878 | # End np_arrays_to_hdf5() |
---|
879 | |
---|
880 | |
---|
881 | |
---|
882 | |
---|