Add urh
This commit is contained in:
@ -0,0 +1,240 @@
|
||||
# noinspection PyUnresolvedReferences
|
||||
cimport numpy as np
|
||||
import numpy as np
|
||||
from cpython cimport array
|
||||
import array
|
||||
import cython
|
||||
|
||||
from cython.parallel import prange
|
||||
from libc.stdlib cimport malloc, free
|
||||
from libcpp.algorithm cimport sort
|
||||
from libc.stdint cimport uint64_t
|
||||
|
||||
cpdef tuple k_means(float[:] data, unsigned int k=2):
|
||||
cdef float[:] centers = np.empty(k, dtype=np.float32)
|
||||
cdef list clusters = []
|
||||
cdef set unique = set(data)
|
||||
cdef unsigned long i
|
||||
|
||||
if len(unique) < k:
|
||||
print("Warning: less different values than k")
|
||||
k = len(unique)
|
||||
|
||||
for i in range(k):
|
||||
centers[i] = unique.pop()
|
||||
clusters.append([])
|
||||
|
||||
cdef float[:] old_centers = np.array(centers, dtype=np.float32)
|
||||
cdef float distance, min_distance, error = 1.0
|
||||
cdef unsigned int j, index = 0, N = len(data)
|
||||
|
||||
while error != 0:
|
||||
for i in range(k):
|
||||
clusters[i].clear()
|
||||
|
||||
for i in range(N):
|
||||
min_distance = 999999999
|
||||
for j in range(k):
|
||||
distance = (centers[j] - data[i]) * (centers[j] - data[i])
|
||||
if distance < min_distance:
|
||||
min_distance = distance
|
||||
index = j
|
||||
clusters[index].append(data[i])
|
||||
|
||||
old_centers = np.array(centers)
|
||||
for i in range(k):
|
||||
centers[i] = np.mean(clusters[i])
|
||||
|
||||
error = 0.0
|
||||
for i in range(k):
|
||||
error += old_centers[i] * old_centers[i] - centers[i] * centers[i]
|
||||
|
||||
return centers, clusters
|
||||
|
||||
|
||||
def segment_messages_from_magnitudes(cython.floating[:] magnitudes, float noise_threshold):
|
||||
"""
|
||||
Get the list of start, end indices of messages
|
||||
|
||||
:param magnitudes: Magnitudes of samples
|
||||
:param q: Factor which controls how many samples of previous above noise plateau must be under noise to be counted as noise
|
||||
:return:
|
||||
"""
|
||||
cdef list result = []
|
||||
|
||||
if len(magnitudes) == 0:
|
||||
return []
|
||||
|
||||
cdef unsigned long i, N = len(magnitudes), start = 0
|
||||
cdef unsigned long summed_message_samples = 0
|
||||
|
||||
# tolerance / robustness against outliers
|
||||
cdef unsigned int outlier_tolerance = 10
|
||||
cdef unsigned int conseq_above = 0, conseq_below = 0
|
||||
|
||||
# Three states: 1 = above noise, 0 = in noise, but not yet above k threshold (k * above_total), -1 = in noise
|
||||
cdef int state
|
||||
state = 1 if magnitudes[0] > noise_threshold else -1
|
||||
|
||||
cdef bint is_above_noise
|
||||
|
||||
for i in range(N):
|
||||
is_above_noise = magnitudes[i] > noise_threshold
|
||||
if state == 1:
|
||||
if is_above_noise:
|
||||
conseq_below = 0
|
||||
else:
|
||||
conseq_below += 1
|
||||
elif state == -1:
|
||||
if is_above_noise:
|
||||
conseq_above += 1
|
||||
else:
|
||||
conseq_above = 0
|
||||
|
||||
# Perform state change if necessary
|
||||
if state == 1 and conseq_below >= outlier_tolerance:
|
||||
# 1 -> -1
|
||||
state = -1
|
||||
result.append((start, i - conseq_below))
|
||||
summed_message_samples += (i-conseq_below) - start
|
||||
conseq_below = conseq_above = 0
|
||||
elif state == -1 and conseq_above >= outlier_tolerance:
|
||||
# -1 -> 1
|
||||
state = 1
|
||||
start = i - conseq_above
|
||||
conseq_below = conseq_above = 0
|
||||
|
||||
# append last message
|
||||
if state == 1 and start < N - conseq_below:
|
||||
result.append((start, N - conseq_below))
|
||||
|
||||
return result
|
||||
|
||||
cpdef uint64_t[:] get_threshold_divisor_histogram(uint64_t[:] plateau_lengths, float threshold=0.2):
|
||||
"""
|
||||
Get a histogram (i.e. count) how many times a value is a threshold divisor for other values in given data
|
||||
|
||||
Threshold divisible is defined as having a decimal place less than .2 (threshold)
|
||||
|
||||
:param plateau_lengths:
|
||||
:return:
|
||||
"""
|
||||
cdef uint64_t i, j, x, y, minimum, maximum, num_lengths = len(plateau_lengths)
|
||||
|
||||
cdef np.ndarray[np.uint64_t, ndim=1] histogram = np.zeros(int(np.max(plateau_lengths)) + 1, dtype=np.uint64)
|
||||
|
||||
for i in range(0, num_lengths):
|
||||
for j in range(i+1, num_lengths):
|
||||
x = plateau_lengths[i]
|
||||
y = plateau_lengths[j]
|
||||
if x == 0 or y == 0:
|
||||
continue
|
||||
|
||||
if x < y:
|
||||
minimum = x
|
||||
maximum = y
|
||||
else:
|
||||
minimum = y
|
||||
maximum = x
|
||||
|
||||
if maximum / <double>minimum - (maximum / minimum) < threshold:
|
||||
histogram[minimum] += 1
|
||||
|
||||
return histogram
|
||||
|
||||
cpdef np.ndarray[np.uint64_t, ndim=1] merge_plateaus(np.ndarray[np.uint64_t, ndim=1] plateaus,
|
||||
uint64_t tolerance,
|
||||
uint64_t max_count):
|
||||
cdef uint64_t j, n, L = len(plateaus), current = 0, i = 1, tmp_sum
|
||||
if L == 0:
|
||||
return np.zeros(0, dtype=np.uint64)
|
||||
|
||||
cdef np.ndarray[np.uint64_t, ndim=1] result = np.empty(L, dtype=np.uint64)
|
||||
if plateaus[0] <= tolerance:
|
||||
result[0] = 0
|
||||
else:
|
||||
result[0] = plateaus[0]
|
||||
|
||||
while i < L and current < max_count:
|
||||
if plateaus[i] <= tolerance:
|
||||
# Look ahead to see whether we need to merge a larger window e.g. for 67, 1, 10, 1, 21
|
||||
n = 2
|
||||
while i + n < L and plateaus[i + n] <= tolerance:
|
||||
n += 2
|
||||
|
||||
tmp_sum = 0
|
||||
for j in range(i - 1, min(L, i + n)):
|
||||
tmp_sum += plateaus[j]
|
||||
|
||||
result[current] = tmp_sum
|
||||
i += n
|
||||
else:
|
||||
current += 1
|
||||
result[current] = plateaus[i]
|
||||
i += 1
|
||||
|
||||
return result[:current+1]
|
||||
|
||||
|
||||
cpdef np.ndarray[np.uint64_t, ndim=1] get_plateau_lengths(float[:] rect_data, float center, int percentage=25):
|
||||
if len(rect_data) == 0 or center is None:
|
||||
return np.array([], dtype=np.uint64)
|
||||
|
||||
cdef int state, new_state
|
||||
state = -1 if rect_data[0] <= center else 1
|
||||
cdef unsigned long long plateau_length = 0
|
||||
cdef unsigned long long current_sum = 0
|
||||
cdef unsigned long long i = 0
|
||||
cdef unsigned long long len_data = len(rect_data)
|
||||
cdef float sample
|
||||
|
||||
cdef array.array result = array.array('Q', [])
|
||||
|
||||
for i in range(0, len_data):
|
||||
if current_sum >= percentage * len_data / 100:
|
||||
break
|
||||
|
||||
sample = rect_data[i]
|
||||
new_state = -1 if sample <= center else 1
|
||||
|
||||
if state == new_state:
|
||||
plateau_length += 1
|
||||
else:
|
||||
result.append(plateau_length)
|
||||
current_sum += plateau_length
|
||||
state = new_state
|
||||
plateau_length = 1
|
||||
|
||||
return np.array(result, dtype=np.uint64)
|
||||
|
||||
|
||||
cdef float median(double[:] data, unsigned long start, unsigned long data_len, unsigned int k=3) nogil:
|
||||
cdef unsigned long i, j
|
||||
|
||||
if start + k > data_len:
|
||||
k = data_len - start
|
||||
|
||||
cdef float* buffer = <float *>malloc(k * sizeof(float))
|
||||
for i in range(0, k):
|
||||
buffer[i] = data[start+i]
|
||||
|
||||
sort(&buffer[0], (&buffer[0]) + k)
|
||||
try:
|
||||
return buffer[k//2]
|
||||
finally:
|
||||
free(buffer)
|
||||
|
||||
cpdef np.ndarray[np.float32_t, ndim=1] median_filter(double[:] data, unsigned int k=3):
|
||||
cdef long long start, end, i, n = len(data)
|
||||
|
||||
cdef np.ndarray[np.float32_t, ndim=1] result = np.zeros(n, dtype=np.float32)
|
||||
|
||||
for i in prange(0, n, nogil=True, schedule='static'):
|
||||
if i < k // 2:
|
||||
start = 0
|
||||
else:
|
||||
start = i - k // 2
|
||||
|
||||
result[i] = median(data, start=i, data_len=n, k=k)
|
||||
|
||||
return result
|
Reference in New Issue
Block a user