1 | #cython: boundscheck=False |
---|
2 | #cython: wraparound=False |
---|
3 | |
---|
4 | # This is a faster implementation of coll_util that uses cython and OpenMP. |
---|
5 | # On a 2013 Macbook Pro Retina, the speed of finding overlapping transmissions |
---|
6 | # within two tx vectors of ~135,000 transmissions is the following: |
---|
7 | # Pure Python (coll_util.py): 2.018 seconds |
---|
8 | # Cython, no OpenMP (coll_util_fast.pyx): 0.128 seconds |
---|
9 | # Cython, with OpenMP (coll_util_fast.pyx): 0.056 seconds |
---|
10 | # |
---|
11 | # The downside is that this file must be explicitly compiled, which is a process |
---|
12 | # that is potentially tricky depending on your platform. Unless you are dealing |
---|
13 | # with extremely large datasets, it probably isn't worth it. |
---|
14 | # |
---|
15 | # Should you choose to build this file, there are two approaches: |
---|
16 | # 1) Implicit building with pyximport |
---|
17 | # Pros: If it works on your platform, then it's basically just like an import |
---|
18 | # Cons: * I'm sure it's possible, but I haven't been able to get OpenMP to compile |
---|
19 | # in with this method. |
---|
20 | # * I'm sure it's possible, but I haven't been able to get this to work |
---|
21 | # under Windows. |
---|
22 | # In wlan_exp.log.util.py, find the "find_overlapping_tx_low" method. |
---|
23 | # Replace "import wlan_exp.log.coll_util as collision_utility" with the following: |
---|
24 | # import pyximport |
---|
25 | # pyximport.install(setup_args={'include_dirs':[np.get_include()]}) |
---|
26 | # import wlan_exp.log.coll_util_fast as collision_utility |
---|
27 | # |
---|
28 | # 2) Explicit building |
---|
29 | # Pros: * I've gotten this to work under Windows with the MinGW compiler, |
---|
30 | # but even then I still wasn't able to compile in OpenMP. |
---|
31 | # * OpenMP works under OSX with this method (provided you download |
---|
32 | # gcc from homebrew... the provided Clang doesn't play nice with |
---|
33 | # OpenMP) |
---|
34 | # |
---|
35 | # The setup.py file in this directly is like a makefile. It's currently set up |
---|
36 | # my platform, so you should look online how to construct it for yours. Note: |
---|
37 | # the "gcc" under Mac OSX Mavericks is not really gcc, but rather clang. I had |
---|
38 | # some trouble getting OpenMP compiled in with this, so I used homebrew to install |
---|
39 | # gcc-4.8. I think explicitly set the CC environment to gcc-4.8 in setup.py. |
---|
40 | # |
---|
41 | # To build the file, navigate to this folder in a terminal and enter: |
---|
42 | # python setup.py build_ext --inplace |
---|
43 | # |
---|
44 | # This will create a coll_util_fast.so, which is the compiled object. Now |
---|
45 | # you can simply "import wlan_exp.log.coll_util_fast as collision_utility" from |
---|
46 | # wlan_exp.log.util.py and it will import the already-compiled version. |
---|
47 | |
---|
48 | from cython.parallel cimport prange |
---|
49 | cimport cython |
---|
50 | |
---|
51 | import numpy as np |
---|
52 | cimport numpy as np |
---|
53 | |
---|
54 | DTYPE = np.uint64 |
---|
55 | ctypedef np.uint64_t DTYPE_t |
---|
56 | |
---|
57 | def _collision_idx_finder_l(np.ndarray[DTYPE_t, ndim=1] src_ts, np.ndarray[DTYPE_t, ndim=1] src_dur, np.ndarray[DTYPE_t, ndim=1] int_ts, np.ndarray[DTYPE_t, ndim=1] int_dur): |
---|
58 | |
---|
59 | import wlan_exp.log.util as log_util |
---|
60 | import wlan_exp.util as wlan_exp_util |
---|
61 | |
---|
62 | assert src_ts.dtype == DTYPE and src_dur.dtype == DTYPE and int_ts.dtype == DTYPE and int_dur.dtype == DTYPE |
---|
63 | |
---|
64 | cdef int num_src = src_ts.shape[0] |
---|
65 | cdef int num_int = src_ts.shape[0] |
---|
66 | |
---|
67 | cdef np.ndarray[DTYPE_t, ndim=1] src_coll_idx = np.zeros([num_src], dtype=DTYPE) |
---|
68 | cdef np.ndarray[DTYPE_t, ndim=1] int_coll_idx = np.zeros([num_int], dtype=DTYPE) |
---|
69 | |
---|
70 | cdef DTYPE_t curr_src_ts |
---|
71 | cdef DTYPE_t curr_src_dur |
---|
72 | cdef DTYPE_t curr_int_ts |
---|
73 | cdef DTYPE_t curr_int_dur |
---|
74 | |
---|
75 | int_ts_end = int_ts + int_dur |
---|
76 | |
---|
77 | cdef int src_idx, int_idx, int_idx_start |
---|
78 | |
---|
79 | int_idx_start = 0 |
---|
80 | |
---|
81 | cdef int int_idx_high, int_idx_low |
---|
82 | |
---|
83 | for src_idx in prange(num_src, nogil=True): |
---|
84 | |
---|
85 | curr_src_ts = src_ts[src_idx] |
---|
86 | curr_src_dur = src_dur[src_idx] |
---|
87 | |
---|
88 | int_idx = num_int/2 |
---|
89 | int_idx_high = num_int-1 |
---|
90 | int_idx_low = 0 |
---|
91 | |
---|
92 | #start in middle and branch out |
---|
93 | while 1: |
---|
94 | if (int_idx == int_idx_low or int_idx == int_idx_high): |
---|
95 | #We're done. No overlap |
---|
96 | break |
---|
97 | |
---|
98 | curr_int_ts = int_ts[int_idx] |
---|
99 | curr_int_dur = int_dur[int_idx] |
---|
100 | |
---|
101 | if ( curr_int_ts < (curr_src_ts + curr_src_dur) ) and ( curr_src_ts < (curr_int_ts + curr_int_dur) ): |
---|
102 | #We found an overlap |
---|
103 | src_coll_idx[src_idx] = src_idx |
---|
104 | int_coll_idx[src_idx] = int_idx |
---|
105 | break |
---|
106 | elif ( curr_int_ts < curr_src_ts ): |
---|
107 | #Overlap may be later -- move index forward |
---|
108 | int_idx_low = int_idx |
---|
109 | else: |
---|
110 | #Overlap may be earlier -- move index back |
---|
111 | int_idx_high = int_idx |
---|
112 | |
---|
113 | int_idx = (int_idx_high - int_idx_low)/2 + int_idx_low |
---|
114 | |
---|
115 | |
---|
116 | |
---|
117 | return (src_coll_idx,int_coll_idx) |
---|
118 | |
---|