Source code for entropy.entropy

"""Entropy functions"""
import numpy as np
from numba import jit
from math import factorial, log
from sklearn.neighbors import KDTree
from scipy.signal import periodogram, welch

from .utils import _embed

all = ['perm_entropy', 'spectral_entropy', 'svd_entropy', 'app_entropy',
       'sample_entropy', 'lziv_complexity', 'num_zerocross', 'hjorth_params']


[docs]def perm_entropy(x, order=3, delay=1, normalize=False): """Permutation Entropy. Parameters ---------- x : list or np.array One-dimensional time series of shape (n_times) order : int Order of permutation entropy. Default is 3. delay : int Time delay (lag). Default is 1. normalize : bool If True, divide by log2(order!) to normalize the entropy between 0 and 1. Otherwise, return the permutation entropy in bit. Returns ------- pe : float Permutation Entropy. Notes ----- The permutation entropy is a complexity measure for time-series first introduced by Bandt and Pompe in 2002. The permutation entropy of a signal :math:`x` is defined as: .. math:: H = -\\sum p(\\pi)\\log_2(\\pi) where the sum runs over all :math:`n!` permutations :math:`\\pi` of order :math:`n`. This is the information contained in comparing :math:`n` consecutive values of the time series. It is clear that :math:`0 ≤ H (n) ≤ \\log_2(n!)` where the lower bound is attained for an increasing or decreasing sequence of values, and the upper bound for a completely random system where all :math:`n!` possible permutations appear with the same probability. The embedded matrix :math:`Y` is created by: .. math:: y(i)=[x_i,x_{i+\\text{delay}}, ...,x_{i+(\\text{order}-1) * \\text{delay}}] .. math:: Y=[y(1),y(2),...,y(N-(\\text{order}-1))*\\text{delay})]^T References ---------- Bandt, Christoph, and Bernd Pompe. "Permutation entropy: a natural complexity measure for time series." Physical review letters 88.17 (2002): 174102. Examples -------- Permutation entropy with order 2 >>> import numpy as np >>> import entropy as ent >>> import stochastic.processes.noise as sn >>> x = [4, 7, 9, 10, 6, 11, 3] >>> # Return a value in bit between 0 and log2(factorial(order)) >>> print(f"{ent.perm_entropy(x, order=2):.4f}") 0.9183 Normalized permutation entropy with order 3 >>> # Return a value comprised between 0 and 1. >>> print(f"{ent.perm_entropy(x, normalize=True):.4f}") 0.5888 Fractional Gaussian noise with H = 0.5 >>> rng = np.random.default_rng(seed=42) >>> x = sn.FractionalGaussianNoise(hurst=0.5, rng=rng).sample(10000) >>> print(f"{ent.perm_entropy(x, normalize=True):.4f}") 0.9998 Fractional Gaussian noise with H = 0.9 >>> rng = np.random.default_rng(seed=42) >>> x = sn.FractionalGaussianNoise(hurst=0.9, rng=rng).sample(10000) >>> print(f"{ent.perm_entropy(x, normalize=True):.4f}") 0.9926 Fractional Gaussian noise with H = 0.1 >>> rng = np.random.default_rng(seed=42) >>> x = sn.FractionalGaussianNoise(hurst=0.1, rng=rng).sample(10000) >>> print(f"{ent.perm_entropy(x, normalize=True):.4f}") 0.9959 Random >>> rng = np.random.default_rng(seed=42) >>> print(f"{ent.perm_entropy(rng.random(1000), normalize=True):.4f}") 0.9997 Pure sine wave >>> x = np.sin(2 * np.pi * 1 * np.arange(3000) / 100) >>> print(f"{ent.perm_entropy(x, normalize=True):.4f}") 0.4463 Linearly-increasing time-series >>> x = np.arange(1000) >>> print(f"{ent.perm_entropy(x, normalize=True):.4f}") -0.0000 """ x = np.array(x) ran_order = range(order) hashmult = np.power(order, ran_order) # Embed x and sort the order of permutations sorted_idx = _embed(x, order=order, delay=delay).argsort(kind='quicksort') # Associate unique integer to each permutations hashval = (np.multiply(sorted_idx, hashmult)).sum(1) # Return the counts _, c = np.unique(hashval, return_counts=True) # Use np.true_divide for Python 2 compatibility p = np.true_divide(c, c.sum()) pe = -np.multiply(p, np.log2(p)).sum() if normalize: pe /= np.log2(factorial(order)) return pe
[docs]def spectral_entropy(x, sf, method='fft', nperseg=None, normalize=False, axis=-1): """Spectral Entropy. Parameters ---------- x : list or np.array 1D or N-D data. sf : float Sampling frequency, in Hz. method : str Spectral estimation method: * ``'fft'`` : Fourier Transform (:py:func:`scipy.signal.periodogram`) * ``'welch'`` : Welch periodogram (:py:func:`scipy.signal.welch`) nperseg : int or None Length of each FFT segment for Welch method. If None (default), uses scipy default of 256 samples. normalize : bool If True, divide by log2(psd.size) to normalize the spectral entropy between 0 and 1. Otherwise, return the spectral entropy in bit. axis : int The axis along which the entropy is calculated. Default is -1 (last). Returns ------- se : float Spectral Entropy Notes ----- Spectral Entropy is defined to be the Shannon entropy of the power spectral density (PSD) of the data: .. math:: H(x, sf) = -\\sum_{f=0}^{f_s/2} P(f) \\log_2[P(f)] Where :math:`P` is the normalised PSD, and :math:`f_s` is the sampling frequency. References ---------- - Inouye, T. et al. (1991). Quantification of EEG irregularity by use of the entropy of the power spectrum. Electroencephalography and clinical neurophysiology, 79(3), 204-210. - https://en.wikipedia.org/wiki/Spectral_density - https://en.wikipedia.org/wiki/Welch%27s_method Examples -------- Spectral entropy of a pure sine using FFT >>> import numpy as np >>> import entropy as ent >>> sf, f, dur = 100, 1, 4 >>> N = sf * dur # Total number of discrete samples >>> t = np.arange(N) / sf # Time vector >>> x = np.sin(2 * np.pi * f * t) >>> np.round(ent.spectral_entropy(x, sf, method='fft'), 2) 0.0 Spectral entropy of a random signal using Welch's method >>> np.random.seed(42) >>> x = np.random.rand(3000) >>> ent.spectral_entropy(x, sf=100, method='welch') 6.980045662371389 Normalized spectral entropy >>> ent.spectral_entropy(x, sf=100, method='welch', normalize=True) 0.9955526198316071 Normalized spectral entropy of 2D data >>> np.random.seed(42) >>> x = np.random.normal(size=(4, 3000)) >>> np.round(ent.spectral_entropy(x, sf=100, normalize=True), 4) array([0.9464, 0.9428, 0.9431, 0.9417]) Fractional Gaussian noise with H = 0.5 >>> import stochastic.processes.noise as sn >>> rng = np.random.default_rng(seed=42) >>> x = sn.FractionalGaussianNoise(hurst=0.5, rng=rng).sample(10000) >>> print(f"{ent.spectral_entropy(x, sf=100, normalize=True):.4f}") 0.9505 Fractional Gaussian noise with H = 0.9 >>> rng = np.random.default_rng(seed=42) >>> x = sn.FractionalGaussianNoise(hurst=0.9, rng=rng).sample(10000) >>> print(f"{ent.spectral_entropy(x, sf=100, normalize=True):.4f}") 0.8477 Fractional Gaussian noise with H = 0.1 >>> rng = np.random.default_rng(seed=42) >>> x = sn.FractionalGaussianNoise(hurst=0.1, rng=rng).sample(10000) >>> print(f"{ent.spectral_entropy(x, sf=100, normalize=True):.4f}") 0.9248 """ x = np.asarray(x) # Compute and normalize power spectrum if method == 'fft': _, psd = periodogram(x, sf, axis=axis) elif method == 'welch': _, psd = welch(x, sf, nperseg=nperseg, axis=axis) psd_norm = psd / psd.sum(axis=axis, keepdims=True) se = -(psd_norm * np.log2(psd_norm)).sum(axis=axis) if normalize: se /= np.log2(psd_norm.shape[axis]) return se
[docs]def svd_entropy(x, order=3, delay=1, normalize=False): """Singular Value Decomposition entropy. Parameters ---------- x : list or np.array One-dimensional time series of shape (n_times) order : int Order of SVD entropy (= length of the embedding dimension). Default is 3. delay : int Time delay (lag). Default is 1. normalize : bool If True, divide by log2(order!) to normalize the entropy between 0 and 1. Otherwise, return the permutation entropy in bit. Returns ------- svd_e : float SVD Entropy Notes ----- SVD entropy is an indicator of the number of eigenvectors that are needed for an adequate explanation of the data set. In other words, it measures the dimensionality of the data. The SVD entropy of a signal :math:`x` is defined as: .. math:: H = -\\sum_{i=1}^{M} \\overline{\\sigma}_i log_2(\\overline{\\sigma}_i) where :math:`M` is the number of singular values of the embedded matrix :math:`Y` and :math:`\\sigma_1, \\sigma_2, ..., \\sigma_M` are the normalized singular values of :math:`Y`. The embedded matrix :math:`Y` is created by: .. math:: y(i)=[x_i,x_{i+\\text{delay}}, ...,x_{i+(\\text{order}-1) * \\text{delay}}] .. math:: Y=[y(1),y(2),...,y(N-(\\text{order}-1))*\\text{delay})]^T Examples -------- SVD entropy with order 2 >>> import numpy as np >>> import entropy as ent >>> import stochastic.processes.noise as sn >>> x = [4, 7, 9, 10, 6, 11, 3] >>> # Return a value in bit between 0 and log2(factorial(order)) >>> print(ent.svd_entropy(x, order=2)) 0.7618909465130066 Normalized SVD entropy with order 3 >>> x = [4, 7, 9, 10, 6, 11, 3] >>> # Return a value comprised between 0 and 1. >>> print(ent.svd_entropy(x, order=3, normalize=True)) 0.6870083043946692 Fractional Gaussian noise with H = 0.5 >>> rng = np.random.default_rng(seed=42) >>> x = sn.FractionalGaussianNoise(hurst=0.5, rng=rng).sample(10000) >>> print(f"{ent.svd_entropy(x, normalize=True):.4f}") 1.0000 Fractional Gaussian noise with H = 0.9 >>> rng = np.random.default_rng(seed=42) >>> x = sn.FractionalGaussianNoise(hurst=0.9, rng=rng).sample(10000) >>> print(f"{ent.svd_entropy(x, normalize=True):.4f}") 0.9080 Fractional Gaussian noise with H = 0.1 >>> rng = np.random.default_rng(seed=42) >>> x = sn.FractionalGaussianNoise(hurst=0.1, rng=rng).sample(10000) >>> print(f"{ent.svd_entropy(x, normalize=True):.4f}") 0.9637 Random >>> rng = np.random.default_rng(seed=42) >>> print(f"{ent.svd_entropy(rng.random(1000), normalize=True):.4f}") 0.8527 Pure sine wave >>> x = np.sin(2 * np.pi * 1 * np.arange(3000) / 100) >>> print(f"{ent.svd_entropy(x, normalize=True):.4f}") 0.1775 Linearly-increasing time-series >>> x = np.arange(1000) >>> print(f"{ent.svd_entropy(x, normalize=True):.4f}") 0.0053 """ x = np.array(x) mat = _embed(x, order=order, delay=delay) W = np.linalg.svd(mat, compute_uv=False) # Normalize the singular values W /= sum(W) svd_e = -np.multiply(W, np.log2(W)).sum() if normalize: svd_e /= np.log2(order) return svd_e
def _app_samp_entropy(x, order, metric='chebyshev', approximate=True): """Utility function for `app_entropy`` and `sample_entropy`. """ _all_metrics = KDTree.valid_metrics if metric not in _all_metrics: raise ValueError('The given metric (%s) is not valid. The valid ' 'metric names are: %s' % (metric, _all_metrics)) phi = np.zeros(2) r = 0.2 * np.std(x, ddof=0) # compute phi(order, r) _emb_data1 = _embed(x, order, 1) if approximate: emb_data1 = _emb_data1 else: emb_data1 = _emb_data1[:-1] count1 = KDTree(emb_data1, metric=metric).query_radius(emb_data1, r, count_only=True ).astype(np.float64) # compute phi(order + 1, r) emb_data2 = _embed(x, order + 1, 1) count2 = KDTree(emb_data2, metric=metric).query_radius(emb_data2, r, count_only=True ).astype(np.float64) if approximate: phi[0] = np.mean(np.log(count1 / emb_data1.shape[0])) phi[1] = np.mean(np.log(count2 / emb_data2.shape[0])) else: phi[0] = np.mean((count1 - 1) / (emb_data1.shape[0] - 1)) phi[1] = np.mean((count2 - 1) / (emb_data2.shape[0] - 1)) return phi @jit('f8(f8[:], i4, f8)', nopython=True) def _numba_sampen(x, order, r): """ Fast evaluation of the sample entropy using Numba. """ n = x.size n1 = n - 1 order += 1 order_dbld = 2 * order # Define threshold # r *= x.std() # initialize the lists run = [0] * n run1 = run[:] r1 = [0] * (n * order_dbld) a = [0] * order b = a[:] p = a[:] for i in range(n1): nj = n1 - i for jj in range(nj): j = jj + i + 1 if abs(x[j] - x[i]) < r: run[jj] = run1[jj] + 1 m1 = order if order < run[jj] else run[jj] for m in range(m1): a[m] += 1 if j < n1: b[m] += 1 else: run[jj] = 0 for j in range(order_dbld): run1[j] = run[j] r1[i + n * j] = run[j] if nj > order_dbld - 1: for j in range(order_dbld, nj): run1[j] = run[j] m = order - 1 while m > 0: b[m] = b[m - 1] m -= 1 b[0] = n * n1 / 2 a = np.array([float(aa) for aa in a]) b = np.array([float(bb) for bb in b]) p = np.true_divide(a, b) return -log(p[-1])
[docs]def app_entropy(x, order=2, metric='chebyshev'): """Approximate Entropy. Parameters ---------- x : list or np.array One-dimensional time series of shape (n_times). order : int Embedding dimension. Default is 2. metric : str Name of the distance metric function used with :py:class:`sklearn.neighbors.KDTree`. Default is to use the `Chebyshev <https://en.wikipedia.org/wiki/Chebyshev_distance>`_ distance. Returns ------- ae : float Approximate Entropy. Notes ----- Approximate entropy is a technique used to quantify the amount of regularity and the unpredictability of fluctuations over time-series data. Smaller values indicates that the data is more regular and predictable. The tolerance value (:math:`r`) is set to :math:`0.2 * \\text{std}(x)`. Code adapted from the `mne-features <https://mne.tools/mne-features/>`_ package by Jean-Baptiste Schiratti and Alexandre Gramfort. References ---------- Richman, J. S. et al. (2000). Physiological time-series analysis using approximate entropy and sample entropy. American Journal of Physiology-Heart and Circulatory Physiology, 278(6), H2039-H2049. https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.DistanceMetric.html Examples -------- Fractional Gaussian noise with H = 0.5 >>> import numpy as np >>> import entropy as ent >>> import stochastic.processes.noise as sn >>> rng = np.random.default_rng(seed=42) >>> x = sn.FractionalGaussianNoise(hurst=0.5, rng=rng).sample(10000) >>> print(f"{ent.app_entropy(x, order=2):.4f}") 2.1958 Same with order = 3 and metric = 'euclidean' >>> print(f"{ent.app_entropy(x, order=3, metric='euclidean'):.4f}") 1.5120 Fractional Gaussian noise with H = 0.9 >>> rng = np.random.default_rng(seed=42) >>> x = sn.FractionalGaussianNoise(hurst=0.9, rng=rng).sample(10000) >>> print(f"{ent.app_entropy(x):.4f}") 1.9681 Fractional Gaussian noise with H = 0.1 >>> rng = np.random.default_rng(seed=42) >>> x = sn.FractionalGaussianNoise(hurst=0.1, rng=rng).sample(10000) >>> print(f"{ent.app_entropy(x):.4f}") 2.0906 Random >>> rng = np.random.default_rng(seed=42) >>> print(f"{ent.app_entropy(rng.random(1000)):.4f}") 1.8177 Pure sine wave >>> x = np.sin(2 * np.pi * 1 * np.arange(3000) / 100) >>> print(f"{ent.app_entropy(x):.4f}") 0.2009 Linearly-increasing time-series >>> x = np.arange(1000) >>> print(f"{ent.app_entropy(x):.4f}") -0.0010 """ phi = _app_samp_entropy(x, order=order, metric=metric, approximate=True) return np.subtract(phi[0], phi[1])
[docs]def sample_entropy(x, order=2, metric='chebyshev'): """Sample Entropy. Parameters ---------- x : list or np.array One-dimensional time series of shape (n_times). order : int Embedding dimension. Default is 2. metric : str Name of the distance metric function used with :py:class:`sklearn.neighbors.KDTree`. Default is to use the `Chebyshev <https://en.wikipedia.org/wiki/Chebyshev_distance>`_ distance. Returns ------- se : float Sample Entropy. Notes ----- Sample entropy is a modification of approximate entropy, used for assessing the complexity of physiological time-series signals. It has two advantages over approximate entropy: data length independence and a relatively trouble-free implementation. Large values indicate high complexity whereas smaller values characterize more self-similar and regular signals. The sample entropy of a signal :math:`x` is defined as: .. math:: H(x, m, r) = -\\log\\frac{C(m + 1, r)}{C(m, r)} where :math:`m` is the embedding dimension (= order), :math:`r` is the radius of the neighbourhood (default = :math:`0.2 * \\text{std}(x)`), :math:`C(m + 1, r)` is the number of embedded vectors of length :math:`m + 1` having a `Chebyshev distance <https://en.wikipedia.org/wiki/Chebyshev_distance>`_ inferior to :math:`r` and :math:`C(m, r)` is the number of embedded vectors of length :math:`m` having a Chebyshev distance inferior to :math:`r`. Note that if ``metric == 'chebyshev'`` and ``len(x) < 5000`` points, then the sample entropy is computed using a fast custom Numba script. For other distance metric or longer time-series, the sample entropy is computed using a code from the `mne-features <https://mne.tools/mne-features/>`_ package by Jean-Baptiste Schiratti and Alexandre Gramfort (requires sklearn). References ---------- Richman, J. S. et al. (2000). Physiological time-series analysis using approximate entropy and sample entropy. American Journal of Physiology-Heart and Circulatory Physiology, 278(6), H2039-H2049. https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.DistanceMetric.html Examples -------- Fractional Gaussian noise with H = 0.5 >>> import numpy as np >>> import entropy as ent >>> import stochastic.processes.noise as sn >>> rng = np.random.default_rng(seed=42) >>> x = sn.FractionalGaussianNoise(hurst=0.5, rng=rng).sample(10000) >>> print(f"{ent.sample_entropy(x, order=2):.4f}") 2.1819 Same with order = 3 and using the Euclidean distance >>> print(f"{ent.sample_entropy(x, order=3, metric='euclidean'):.4f}") 2.6806 Fractional Gaussian noise with H = 0.9 >>> rng = np.random.default_rng(seed=42) >>> x = sn.FractionalGaussianNoise(hurst=0.9, rng=rng).sample(10000) >>> print(f"{ent.sample_entropy(x):.4f}") 1.9078 Fractional Gaussian noise with H = 0.1 >>> rng = np.random.default_rng(seed=42) >>> x = sn.FractionalGaussianNoise(hurst=0.1, rng=rng).sample(10000) >>> print(f"{ent.sample_entropy(x):.4f}") 2.0555 Random >>> rng = np.random.default_rng(seed=42) >>> print(f"{ent.sample_entropy(rng.random(1000)):.4f}") 2.2017 Pure sine wave >>> x = np.sin(2 * np.pi * 1 * np.arange(3000) / 100) >>> print(f"{ent.sample_entropy(x):.4f}") 0.1633 Linearly-increasing time-series >>> x = np.arange(1000) >>> print(f"{ent.sample_entropy(x):.4f}") -0.0000 """ x = np.asarray(x, dtype=np.float64) if metric == 'chebyshev' and x.size < 5000: return _numba_sampen(x, order=order, r=(0.2 * x.std(ddof=0))) else: phi = _app_samp_entropy(x, order=order, metric=metric, approximate=False) return -np.log(np.divide(phi[1], phi[0]))
@jit("uint32(uint32[:])", nopython=True) def _lz_complexity(binary_string): """Internal Numba implementation of the Lempel-Ziv (LZ) complexity. https://github.com/Naereen/Lempel-Ziv_Complexity/blob/master/src/lziv_complexity.py - Updated with strict integer typing instead of strings - Slight restructuring based on Yacine Mahdid's notebook: https://github.com/BIAPT/Notebooks/blob/master/features/Lempel-Ziv%20Complexity.ipynb """ # Initialize variables complexity = 1 prefix_len = 1 len_substring = 1 max_len_substring = 1 pointer = 0 # Iterate until the entire string has not been parsed while prefix_len + len_substring <= len(binary_string): # Given a prefix length, find the largest substring if ( binary_string[pointer + len_substring - 1] == binary_string[prefix_len + len_substring - 1] ): len_substring += 1 # increase the length of the substring else: max_len_substring = max(len_substring, max_len_substring) pointer += 1 # Since all pointers have been scanned, pick largest as the # jump size if pointer == prefix_len: # Increment complexity complexity += 1 # Set prefix length to the max substring size found so far # (jump size) prefix_len += max_len_substring # Reset pointer and max substring size pointer = 0 max_len_substring = 1 # Reset length of current substring len_substring = 1 # Check if final iteration occurred in the middle of a substring if len_substring != 1: complexity += 1 return complexity
[docs]def lziv_complexity(sequence, normalize=False): """ Lempel-Ziv (LZ) complexity of (binary) sequence. .. versionadded:: 0.1.1 Parameters ---------- sequence : str or array A sequence of character, e.g. ``'1001111011000010'``, ``[0, 1, 0, 1, 1]``, or ``'Hello World!'``. normalize : bool If ``True``, returns the normalized LZ (see Notes). Returns ------- lz : int or float LZ complexity, which corresponds to the number of different substrings encountered as the stream is viewed from the beginning to the end. If ``normalize=False``, the output is an integer (counts), otherwise the output is a float. Notes ----- LZ complexity is defined as the number of different substrings encountered as the sequence is viewed from begining to the end. Although the raw LZ is an important complexity indicator, it is heavily influenced by sequence length (longer sequence will result in higher LZ). Zhang and colleagues (2009) have therefore proposed the normalized LZ, which is defined by .. math:: \\text{LZn} = \\frac{\\text{LZ}}{(n / \\log_b{n})} where :math:`n` is the length of the sequence and :math:`b` the number of unique characters in the sequence. References ---------- * Lempel, A., & Ziv, J. (1976). On the Complexity of Finite Sequences. IEEE Transactions on Information Theory / Professional Technical Group on Information Theory, 22(1), 75–81. https://doi.org/10.1109/TIT.1976.1055501 * Zhang, Y., Hao, J., Zhou, C., & Chang, K. (2009). Normalized Lempel-Ziv complexity and its application in bio-sequence analysis. Journal of Mathematical Chemistry, 46(4), 1203–1212. https://doi.org/10.1007/s10910-008-9512-2 * https://en.wikipedia.org/wiki/Lempel-Ziv_complexity * https://github.com/Naereen/Lempel-Ziv_Complexity Examples -------- >>> from entropy import lziv_complexity >>> # Substrings = 1 / 0 / 01 / 1110 / 1100 / 0010 >>> s = '1001111011000010' >>> lziv_complexity(s) 6 Using a list of integer / boolean instead of a string >>> # 1 / 0 / 10 >>> lziv_complexity([1, 0, 1, 0, 1, 0, 1, 0, 1, 0]) 3 With normalization >>> lziv_complexity(s, normalize=True) 1.5 This function also works with characters and words >>> s = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' >>> lziv_complexity(s), lziv_complexity(s, normalize=True) (26, 1.0) >>> s = 'HELLO WORLD! HELLO WORLD! HELLO WORLD! HELLO WORLD!' >>> lziv_complexity(s), lziv_complexity(s, normalize=True) (11, 0.38596001132145313) """ assert isinstance(sequence, (str, list, np.ndarray)) assert isinstance(normalize, bool) if isinstance(sequence, (list, np.ndarray)): sequence = np.asarray(sequence) if sequence.dtype.kind in 'bfi': # Convert [True, False] or [1., 0.] to [1, 0] s = sequence.astype("uint32") else: # Treat as numpy array of strings # Map string characters to utf-8 integer representation s = np.fromiter(map(ord, "".join(sequence.astype(str))), dtype="uint32") # Can't preallocate length (by specifying count) due to string # concatenation else: s = np.fromiter(map(ord, sequence), dtype="uint32") if normalize: # 1) Timmermann et al. 2019 # The sequence is randomly shuffled, and the normalized LZ # is calculated as the ratio of the LZ of the original sequence # divided by the LZ of the randomly shuffled LZ. However, the final # output is dependent on the random seed. # sl_shuffled = list(s) # rng = np.random.RandomState(None) # rng.shuffle(sl_shuffled) # s_shuffled = ''.join(sl_shuffled) # return _lz_complexity(s) / _lz_complexity(s_shuffled) # 2) Zhang et al. 2009 n = len(s) base = sum(np.bincount(s) > 0) # Number of unique characters base = 2 if base < 2 else base return _lz_complexity(s) / (n / log(n, base)) else: return _lz_complexity(s)
############################################################################### # OTHER TIME-DOMAIN METRICS ###############################################################################
[docs]def num_zerocross(x, normalize=False, axis=-1): """Number of zero-crossings. .. versionadded: 0.1.3 Parameters ---------- x : list or np.array 1D or N-D data. normalize : bool If True, divide by the number of samples to normalize the output between 0 and 1. Otherwise, return the absolute number of zero crossings. axis : int The axis along which to perform the computation. Default is -1 (last). Returns ------- nzc : int or float Number of zero-crossings. Examples -------- Simple examples >>> import numpy as np >>> import entropy as ent >>> ent.num_zerocross([-1, 0, 1, 2, 3]) 1 >>> ent.num_zerocross([0, 0, 2, -1, 0, 1, 0, 2]) 2 Number of zero crossings of a pure sine >>> import numpy as np >>> import entropy as ent >>> sf, f, dur = 100, 1, 4 >>> N = sf * dur # Total number of discrete samples >>> t = np.arange(N) / sf # Time vector >>> x = np.sin(2 * np.pi * f * t) >>> ent.num_zerocross(x) 7 Random 2D data >>> np.random.seed(42) >>> x = np.random.normal(size=(4, 3000)) >>> ent.num_zerocross(x) array([1499, 1528, 1547, 1457]) Same but normalized by the number of samples >>> np.round(ent.num_zerocross(x, normalize=True), 4) array([0.4997, 0.5093, 0.5157, 0.4857]) Fractional Gaussian noise with H = 0.5 >>> import stochastic.processes.noise as sn >>> rng = np.random.default_rng(seed=42) >>> x = sn.FractionalGaussianNoise(hurst=0.5, rng=rng).sample(10000) >>> print(f"{ent.num_zerocross(x, normalize=True):.4f}") 0.4973 Fractional Gaussian noise with H = 0.9 >>> rng = np.random.default_rng(seed=42) >>> x = sn.FractionalGaussianNoise(hurst=0.9, rng=rng).sample(10000) >>> print(f"{ent.num_zerocross(x, normalize=True):.4f}") 0.2615 Fractional Gaussian noise with H = 0.1 >>> rng = np.random.default_rng(seed=42) >>> x = sn.FractionalGaussianNoise(hurst=0.1, rng=rng).sample(10000) >>> print(f"{ent.num_zerocross(x, normalize=True):.4f}") 0.6451 """ x = np.asarray(x) # https://stackoverflow.com/a/29674950/10581531 nzc = np.diff(np.signbit(x), axis=axis).sum(axis=axis) if normalize: nzc = nzc / x.shape[axis] return nzc
[docs]def hjorth_params(x, axis=-1): """Calculate Hjorth mobility and complexity on given axis. .. versionadded: 0.1.3 Parameters ---------- x : list or np.array 1D or N-D data. axis : int The axis along which to perform the computation. Default is -1 (last). Returns ------- mobility, complexity : float Mobility and complexity parameters. Notes ----- Hjorth Parameters are indicators of statistical properties used in signal processing in the time domain introduced by Bo Hjorth in 1970. The parameters are activity, mobility, and complexity. EntroPy only returns the mobility and complexity parameters, since activity is simply the variance of :math:`x`, which can be computed easily with :py:func:`numpy.var`. The **mobility** parameter represents the mean frequency or the proportion of standard deviation of the power spectrum. This is defined as the square root of variance of the first derivative of :math:`x` divided by the variance of :math:`x`. The **complexity** gives an estimate of the bandwidth of the signal, which indicates the similarity of the shape of the signal to a pure sine wave (where the value converges to 1). Complexity is defined as the ratio of the mobility of the first derivative of :math:`x` to the mobility of :math:`x`. References ---------- - https://en.wikipedia.org/wiki/Hjorth_parameters - https://doi.org/10.1016%2F0013-4694%2870%2990143-4 Examples -------- Hjorth parameters of a pure sine >>> import numpy as np >>> import entropy as ent >>> sf, f, dur = 100, 1, 4 >>> N = sf * dur # Total number of discrete samples >>> t = np.arange(N) / sf # Time vector >>> x = np.sin(2 * np.pi * f * t) >>> np.round(ent.hjorth_params(x), 4) array([0.0627, 1.005 ]) Random 2D data >>> np.random.seed(42) >>> x = np.random.normal(size=(4, 3000)) >>> mob, com = ent.hjorth_params(x) >>> print(mob) [1.42145064 1.4339572 1.42186993 1.40587512] >>> print(com) [1.21877527 1.21092261 1.217278 1.22623163] Fractional Gaussian noise with H = 0.5 >>> import stochastic.processes.noise as sn >>> rng = np.random.default_rng(seed=42) >>> x = sn.FractionalGaussianNoise(hurst=0.5, rng=rng).sample(10000) >>> np.round(ent.hjorth_params(x), 4) array([1.4073, 1.2283]) Fractional Gaussian noise with H = 0.9 >>> rng = np.random.default_rng(seed=42) >>> x = sn.FractionalGaussianNoise(hurst=0.9, rng=rng).sample(10000) >>> np.round(ent.hjorth_params(x), 4) array([0.8395, 1.9143]) Fractional Gaussian noise with H = 0.1 >>> rng = np.random.default_rng(seed=42) >>> x = sn.FractionalGaussianNoise(hurst=0.1, rng=rng).sample(10000) >>> np.round(ent.hjorth_params(x), 4) array([1.6917, 1.0717]) """ x = np.asarray(x) # Calculate derivatives dx = np.diff(x, axis=axis) ddx = np.diff(dx, axis=axis) # Calculate variance x_var = np.var(x, axis=axis) # = activity dx_var = np.var(dx, axis=axis) ddx_var = np.var(ddx, axis=axis) # Mobility and complexity mob = np.sqrt(dx_var / x_var) com = np.sqrt(ddx_var / dx_var) / mob return mob, com