# noinspection PyUnresolvedReferences
cimport numpy as np
import numpy as np
from cpython cimport array
import array
import cython

from cython.parallel import prange
from libc.stdlib cimport malloc, free
from libcpp.algorithm cimport sort
from libc.stdint cimport uint64_t

cpdef tuple k_means(float[:] data, unsigned int k=2):
    cdef float[:] centers = np.empty(k, dtype=np.float32)
    cdef list clusters = []
    cdef set unique = set(data)
    cdef unsigned long i

    if len(unique) < k:
        print("Warning: less different values than k")
        k = len(unique)

    for i in range(k):
        centers[i] = unique.pop()
        clusters.append([])

    cdef float[:] old_centers = np.array(centers, dtype=np.float32)
    cdef float distance, min_distance, error = 1.0
    cdef unsigned int j, index = 0, N = len(data)

    while error != 0:
        for i in range(k):
            clusters[i].clear()

        for i in range(N):
            min_distance = 999999999
            for j in range(k):
                distance = (centers[j] - data[i]) * (centers[j] - data[i])
                if distance < min_distance:
                    min_distance = distance
                    index = j
            clusters[index].append(data[i])

        old_centers = np.array(centers)
        for i in range(k):
            centers[i] = np.mean(clusters[i])

        error = 0.0
        for i in range(k):
            error += old_centers[i] * old_centers[i] - centers[i] * centers[i]

    return centers, clusters


def segment_messages_from_magnitudes(cython.floating[:] magnitudes, float noise_threshold):
    """
    Get the list of start, end indices of messages

    :param magnitudes: Magnitudes of samples
    :param q: Factor which controls how many samples of previous above noise plateau must be under noise to be counted as noise
    :return:
    """
    cdef list result = []

    if len(magnitudes) == 0:
        return []

    cdef unsigned long i, N = len(magnitudes), start = 0
    cdef unsigned long summed_message_samples = 0

    # tolerance / robustness against outliers
    cdef unsigned int outlier_tolerance = 10
    cdef unsigned int conseq_above = 0, conseq_below = 0

    # Three states: 1 = above noise, 0 = in noise, but not yet above k threshold (k * above_total), -1 = in noise
    cdef int state
    state = 1 if magnitudes[0] > noise_threshold else -1

    cdef bint is_above_noise

    for i in range(N):
        is_above_noise = magnitudes[i] > noise_threshold
        if state == 1:
            if is_above_noise:
                conseq_below = 0
            else:
                conseq_below += 1
        elif state == -1:
            if is_above_noise:
                conseq_above += 1
            else:
                conseq_above = 0

        # Perform state change if necessary
        if state == 1 and conseq_below >= outlier_tolerance:
            # 1 -> -1
            state = -1
            result.append((start, i - conseq_below))
            summed_message_samples += (i-conseq_below) - start
            conseq_below = conseq_above = 0
        elif state == -1 and conseq_above >= outlier_tolerance:
            # -1 -> 1
            state = 1
            start = i - conseq_above
            conseq_below = conseq_above = 0

    # append last message
    if state == 1 and start < N - conseq_below:
        result.append((start, N - conseq_below))

    return result

cpdef uint64_t[:] get_threshold_divisor_histogram(uint64_t[:] plateau_lengths, float threshold=0.2):
    """
    Get a histogram (i.e. count) how many times a value is a threshold divisor for other values in given data
    
    Threshold divisible is defined as having a decimal place less than .2 (threshold)
    
    :param plateau_lengths: 
    :return: 
    """
    cdef uint64_t i, j, x, y, minimum, maximum, num_lengths = len(plateau_lengths)

    cdef np.ndarray[np.uint64_t, ndim=1] histogram = np.zeros(int(np.max(plateau_lengths)) + 1, dtype=np.uint64)

    for i in range(0, num_lengths):
        for j in range(i+1, num_lengths):
            x = plateau_lengths[i]
            y = plateau_lengths[j]
            if x == 0 or y == 0:
                continue

            if x < y:
                minimum = x
                maximum = y
            else:
                minimum = y
                maximum = x

            if maximum / <double>minimum - (maximum / minimum) < threshold:
                histogram[minimum] += 1

    return histogram

cpdef np.ndarray[np.uint64_t, ndim=1] merge_plateaus(np.ndarray[np.uint64_t, ndim=1] plateaus,
                                                     uint64_t tolerance,
                                                     uint64_t max_count):
    cdef uint64_t j, n, L = len(plateaus), current = 0, i = 1, tmp_sum
    if L == 0:
        return np.zeros(0, dtype=np.uint64)

    cdef np.ndarray[np.uint64_t, ndim=1] result = np.empty(L, dtype=np.uint64)
    if plateaus[0] <= tolerance:
        result[0] = 0
    else:
        result[0] = plateaus[0]

    while i < L and current < max_count:
        if plateaus[i] <= tolerance:
            # Look ahead to see whether we need to merge a larger window e.g. for 67, 1, 10, 1, 21
            n = 2
            while i + n < L and plateaus[i + n] <= tolerance:
                n += 2

            tmp_sum = 0
            for j in range(i - 1, min(L, i + n)):
                tmp_sum += plateaus[j]

            result[current] = tmp_sum
            i += n
        else:
            current += 1
            result[current] = plateaus[i]
            i += 1

    return result[:current+1]


cpdef np.ndarray[np.uint64_t, ndim=1] get_plateau_lengths(float[:] rect_data, float center, int percentage=25):
    if len(rect_data) == 0 or center is None:
        return np.array([], dtype=np.uint64)

    cdef int state, new_state
    state = -1 if rect_data[0] <= center else 1
    cdef unsigned long long plateau_length = 0
    cdef unsigned long long current_sum = 0
    cdef unsigned long long i = 0
    cdef unsigned long long len_data = len(rect_data)
    cdef float sample

    cdef array.array result = array.array('Q', [])

    for i in range(0, len_data):
        if current_sum >= percentage * len_data / 100:
            break

        sample = rect_data[i]
        new_state = -1 if sample <= center else 1

        if state == new_state:
            plateau_length += 1
        else:
            result.append(plateau_length)
            current_sum += plateau_length
            state = new_state
            plateau_length = 1

    return np.array(result, dtype=np.uint64)


cdef float median(double[:] data, unsigned long start, unsigned long data_len, unsigned int k=3) nogil:
    cdef unsigned long i, j

    if start + k > data_len:
        k = data_len - start

    cdef float* buffer = <float *>malloc(k * sizeof(float))
    for i in range(0, k):
        buffer[i] = data[start+i]

    sort(&buffer[0], (&buffer[0]) + k)
    try:
        return buffer[k//2]
    finally:
        free(buffer)

cpdef np.ndarray[np.float32_t, ndim=1] median_filter(double[:] data, unsigned int k=3):
    cdef long long start, end, i, n = len(data)

    cdef np.ndarray[np.float32_t, ndim=1] result = np.zeros(n, dtype=np.float32)

    for i in prange(0, n, nogil=True, schedule='static'):
        if i < k // 2:
            start = 0
        else:
            start = i - k // 2

        result[i] = median(data, start=i, data_len=n, k=k)

    return result