# Source code for yasa.spectral

```
"""
This file contains several helper functions to calculate spectral power from
1D and 2D EEG data.
"""
import mne
import numpy as np
import pandas as pd
from scipy import signal
from scipy.integrate import simps
from scipy.interpolate import RectBivariateSpline
__all__ = ['bandpower', 'bandpower_from_psd', 'irasa', 'stft_power']
[docs]def bandpower(data, sf=None, ch_names=None, hypno=None, include=(2, 3),
win_sec=4, relative=True,
bands=[(0.5, 4, 'Delta'), (4, 8, 'Theta'), (8, 12, 'Alpha'),
(12, 30, 'Beta'), (30, 40, 'Gamma')],
kwargs_welch=dict(average='median', window='hamming')):
"""
Calculate the Welch bandpower for each channel and, if specified,
for each sleep stage.
.. versionadded:: 0.1.6
Parameters
----------
data : np.array_like or :py:class:`mne.io.BaseRaw`
1D or 2D EEG data. Can also be a :py:class:`mne.io.BaseRaw`, in which
case ``data``, ``sf``, and ``ch_names`` will be automatically
extracted, and ``data`` will also be converted from Volts (MNE default)
to micro-Volts (YASA).
sf : float
The sampling frequency of data AND the hypnogram.
Can be omitted if ``data`` is a :py:class:`mne.io.BaseRaw`.
ch_names : list
List of channel names, e.g. ['Cz', 'F3', 'F4', ...]. If None,
channels will be labelled ['CHAN001', 'CHAN002', ...].
Can be omitted if ``data`` is a :py:class:`mne.io.BaseRaw`.
hypno : array_like
Sleep stage vector (hypnogram). If the hypnogram is loaded, the
bandpower will be extracted for each sleep stage defined in
``include``.
The hypnogram must have the exact same number of samples as ``data``.
To upsample your hypnogram, please refer to
:py:func:`yasa.hypno_upsample_to_data`.
.. note::
The default hypnogram format in YASA is a 1D integer
vector where:
- -1 = Artefact / Movement
- 0 = Wake
- 1 = N1 sleep
- 2 = N2 sleep
- 3 = N3 sleep
- 4 = REM
include : tuple, list or int
Values in ``hypno`` that will be included in the mask. The default is
(2, 3), meaning that the bandpower are sequentially calculated
for N2 and N3 sleep. This has no effect when ``hypno`` is None.
win_sec : int or float
The length of the sliding window, in seconds, used for the Welch PSD
calculation. Ideally, this should be at least two times the inverse of
the lower frequency of interest (e.g. for a lower frequency of interest
of 0.5 Hz, the window length should be at least 2 * 1 / 0.5 =
4 seconds).
relative : boolean
If True, bandpower is divided by the total power between the min and
max frequencies defined in ``band``.
bands : list of tuples
List of frequency bands of interests. Each tuple must contain the
lower and upper frequencies, as well as the band name
(e.g. (0.5, 4, 'Delta')).
kwargs_welch : dict
Optional keywords arguments that are passed to the
:py:func:`scipy.signal.welch` function.
Returns
-------
bandpowers : :py:class:`pandas.DataFrame`
Bandpower dataframe, in which each row is a channel and each column
a spectral band.
Notes
-----
For an example of how to use this function, please refer to
https://github.com/raphaelvallat/yasa/blob/master/notebooks/10_bandpower.ipynb
"""
# Check if input data is a MNE Raw object
if isinstance(data, mne.io.BaseRaw):
sf = data.info['sfreq'] # Extract sampling frequency
ch_names = data.ch_names # Extract channel names
data = data.get_data() * 1e6 # Convert from V to uV
_, npts = data.shape
else:
# Safety checks
assert isinstance(data, np.ndarray), 'Data must be a numpy array.'
data = np.atleast_2d(data)
assert data.ndim == 2, 'Data must be of shape (nchan, n_samples).'
nchan, npts = data.shape
assert nchan < npts, 'Data must be of shape (nchan, n_samples).'
assert sf is not None, 'sf must be specified if passing a numpy array.'
assert isinstance(sf, (int, float))
if ch_names is None:
ch_names = ['CHAN' + str(i + 1).zfill(3) for i in range(nchan)]
else:
ch_names = np.atleast_1d(np.asarray(ch_names, dtype=str))
assert ch_names.ndim == 1, 'ch_names must be 1D.'
assert len(ch_names) == nchan, 'ch_names must match data.shape[0].'
win = int(win_sec * sf) # nperseg
if hypno is None:
# Calculate the PSD over the whole data
freqs, psd = signal.welch(data, sf, nperseg=win, **kwargs_welch)
return bandpower_from_psd(psd, freqs, ch_names,
bands=bands,
relative=relative).set_index('Chan')
else:
# Per each sleep stage defined in ``include``.
hypno = np.asarray(hypno)
assert include is not None, 'include cannot be None if hypno is given'
include = np.atleast_1d(np.asarray(include))
assert hypno.ndim == 1, 'Hypno must be a 1D array.'
assert hypno.size == npts, 'Hypno must have same size as data.shape[1]'
assert include.size >= 1, '`include` must have at least one element.'
assert hypno.dtype.kind == include.dtype.kind, ('hypno and include '
'must have same dtype')
assert np.in1d(hypno, include).any(), ('None of the stages '
'specified in `include` '
'are present in hypno.')
# Initialize empty dataframe and loop over stages
df_bp = pd.DataFrame([])
for stage in include:
if stage not in hypno:
continue
data_stage = data[:, hypno == stage]
freqs, psd = signal.welch(data_stage, sf, nperseg=win,
**kwargs_welch)
bp_stage = bandpower_from_psd(psd, freqs, ch_names, bands=bands,
relative=relative)
bp_stage['Stage'] = stage
df_bp = df_bp.append(bp_stage)
return df_bp.set_index(['Stage', 'Chan'])
[docs]def bandpower_from_psd(psd, freqs, ch_names=None, bands=[(0.5, 4, 'Delta'),
(4, 8, 'Theta'), (8, 12, 'Alpha'), (12, 30, 'Beta'),
(30, 40, 'Gamma')], relative=True):
"""Compute the average power of the EEG in specified frequency band(s)
given a pre-computed PSD.
.. versionadded:: 0.1.5
Parameters
----------
psd : array_like
Power spectral density of data, in uV^2/Hz.
Must be of shape (n_channels, n_freqs).
See :py:func:`scipy.signal.welch` for more details.
freqs : array_like
Array of frequencies.
ch_names : list
List of channel names, e.g. ['Cz', 'F3', 'F4', ...]. If None,
channels will be labelled ['CHAN001', 'CHAN002', ...].
bands : list of tuples
List of frequency bands of interests. Each tuple must contain the
lower and upper frequencies, as well as the band name
(e.g. (0.5, 4, 'Delta')).
relative : boolean
If True, bandpower is divided by the total power between the min and
max frequencies defined in ``band`` (default 0.5 to 40 Hz).
Returns
-------
bandpowers : :py:class:`pandas.DataFrame`
Bandpower dataframe, in which each row is a channel and each column
a spectral band.
"""
# Safety checks
freqs = np.asarray(freqs)
assert freqs.ndim == 1
psd = np.atleast_2d(psd)
assert psd.ndim == 2, 'PSD must be of shape (n_channels, n_freqs).'
all_freqs = np.hstack([[b[0], b[1]] for b in bands])
fmin, fmax = min(all_freqs), max(all_freqs)
idx_good_freq = np.logical_and(freqs >= fmin, freqs <= fmax)
freqs = freqs[idx_good_freq]
res = freqs[1] - freqs[0]
nchan = psd.shape[0]
assert nchan < psd.shape[1], 'PSD must be of shape (n_channels, n_freqs).'
if ch_names is not None:
ch_names = np.atleast_1d(np.asarray(ch_names, dtype=str))
assert ch_names.ndim == 1, 'ch_names must be 1D.'
assert len(ch_names) == nchan, 'ch_names must match psd.shape[0].'
else:
ch_names = ['CHAN' + str(i + 1).zfill(3) for i in range(nchan)]
bp = np.zeros((nchan, len(bands)), dtype=np.float)
psd = psd[:, idx_good_freq]
total_power = simps(psd, dx=res)
total_power = total_power[..., np.newaxis]
# Enumerate over the frequency bands
labels = []
for i, band in enumerate(bands):
b0, b1, la = band
labels.append(la)
idx_band = np.logical_and(freqs >= b0, freqs <= b1)
bp[:, i] = simps(psd[:, idx_band], dx=res)
if relative:
bp /= total_power
# Convert to DataFrame
bp = pd.DataFrame(bp, columns=labels)
bp['FreqRes'] = res
# bp['WindowSec'] = 1 / res
bp['Relative'] = relative
bp['Chan'] = ch_names
bp = bp.set_index('Chan').reset_index()
# Add hidden attributes
bp.bands_ = str(bands)
return bp
[docs]def irasa(data, sf=None, ch_names=None, band=(1, 30),
hset=np.arange(1.1, 1.95, 0.05), return_fit=True, win_sec=4,
kwargs_welch=dict(average='median', window='hamming')):
"""
Separate the aperiodic (= fractal, or 1/f) and oscillatory component of the
power spectra of EEG data using the IRASA method.
.. versionadded:: 0.1.7
Parameters
----------
data : :py:class:`numpy.ndarray` or :py:class:`mne.io.BaseRaw`
1D or 2D EEG data. Can also be a :py:class:`mne.io.BaseRaw`, in which
case ``data``, ``sf``, and ``ch_names`` will be automatically
extracted, and ``data`` will also be converted from Volts (MNE default)
to micro-Volts (YASA).
sf : float
The sampling frequency of data AND the hypnogram.
Can be omitted if ``data`` is a :py:class:`mne.io.BaseRaw`.
ch_names : list
List of channel names, e.g. ['Cz', 'F3', 'F4', ...]. If None,
channels will be labelled ['CHAN001', 'CHAN002', ...].
Can be omitted if ``data`` is a :py:class:`mne.io.BaseRaw`.
band : tuple or None
Broad band frequency range.
Default is 1 to 30 Hz.
hset : :py:class:`numpy.ndarray`
Resampling factors used in IRASA calculation. Default is to use a range
of values from 1.1 to 1.9 with an increment of 0.05.
return_fit : boolean
If True (default), fit an exponential function to the aperiodic PSD
and return the fit parameters (intercept, slope) and :math:`R^2` of
the fit.
The aperiodic signal, :math:`L`, is modeled using an exponential
function in semilog-power space (linear frequencies and log PSD) as:
.. math:: L = a + \\text{log}(F^b)
where :math:`a` is the intercept, :math:`b` is the slope, and
:math:`F` the vector of input frequencies.
win_sec : int or float
The length of the sliding window, in seconds, used for the Welch PSD
calculation. Ideally, this should be at least two times the inverse of
the lower frequency of interest (e.g. for a lower frequency of interest
of 0.5 Hz, the window length should be at least 2 * 1 / 0.5 =
4 seconds).
kwargs_welch : dict
Optional keywords arguments that are passed to the
:py:func:`scipy.signal.welch` function.
Returns
-------
freqs : :py:class:`numpy.ndarray`
Frequency vector.
psd_aperiodic : :py:class:`numpy.ndarray`
The fractal (= aperiodic) component of the PSD.
psd_oscillatory : :py:class:`numpy.ndarray`
The oscillatory (= periodic) component of the PSD.
fit_params : :py:class:`pandas.DataFrame` (optional)
Dataframe of fit parameters. Only if ``return_fit=True``.
Notes
-----
The Irregular-Resampling Auto-Spectral Analysis (IRASA) method is
described in Wen & Liu (2016). In a nutshell, the goal is to separate the
fractal and oscillatory components in the power spectrum of EEG signals.
The steps are:
1. Compute the original power spectral density (PSD) using Welch's method.
2. Resample the EEG data by multiple non-integer factors and their
reciprocals (:math:`h` and :math:`1/h`).
3. For every pair of resampled signals, calculate the PSD and take the
geometric mean of both. In the resulting PSD, the power associated with
the oscillatory component is redistributed away from its original
(fundamental and harmonic) frequencies by a frequency offset that varies
with the resampling factor, whereas the power solely attributed to the
fractal component remains the same power-law statistical distribution
independent of the resampling factor.
4. It follows that taking the median of the PSD of the variously
resampled signals can extract the power spectrum of the fractal
component, and the difference between the original power spectrum and
the extracted fractal spectrum offers an approximate estimate of the
power spectrum of the oscillatory component.
Note that an estimate of the original PSD can be calculated by simply
adding ``psd = psd_aperiodic + psd_oscillatory``.
For an example of how to use this function, please refer to
https://github.com/raphaelvallat/yasa/blob/master/notebooks/11_IRASA.ipynb
References
----------
.. [1] Wen, H., & Liu, Z. (2016). Separating Fractal and Oscillatory
Components in the Power Spectrum of Neurophysiological Signal.
Brain Topography, 29(1), 13–26.
https://doi.org/10.1007/s10548-015-0448-0
.. [2] https://github.com/fieldtrip/fieldtrip/blob/master/specest/
.. [3] https://github.com/fooof-tools/fooof
.. [4] https://www.biorxiv.org/content/10.1101/299859v1
"""
import fractions
# Check if input data is a MNE Raw object
if isinstance(data, mne.io.BaseRaw):
sf = data.info['sfreq'] # Extract sampling frequency
ch_names = data.ch_names # Extract channel names
data = data.get_data() * 1e6 # Convert from V to uV
else:
# Safety checks
assert isinstance(data, np.ndarray), 'Data must be a numpy array.'
data = np.atleast_2d(data)
assert data.ndim == 2, 'Data must be of shape (nchan, n_samples).'
nchan, npts = data.shape
assert nchan < npts, 'Data must be of shape (nchan, n_samples).'
assert sf is not None, 'sf must be specified if passing a numpy array.'
assert isinstance(sf, (int, float))
if ch_names is None:
ch_names = ['CHAN' + str(i + 1).zfill(3) for i in range(nchan)]
else:
ch_names = np.atleast_1d(np.asarray(ch_names, dtype=str))
assert ch_names.ndim == 1, 'ch_names must be 1D.'
assert len(ch_names) == nchan, 'ch_names must match data.shape[0].'
# Check the other arguments
hset = np.asarray(hset)
assert hset.ndim == 1, 'hset must be 1D.'
assert hset.size > 1, '2 or more resampling fators are required.'
hset = np.round(hset, 4) # avoid float precision error with np.arange.
band = sorted(band)
assert band[0] > 0, 'first element of band must be > 0.'
assert band[1] < (sf / 2), 'second element of band must be < (sf / 2).'
win = int(win_sec * sf) # nperseg
# Calculate the original PSD over the whole data
freqs, psd = signal.welch(data, sf, nperseg=win, **kwargs_welch)
# Start the IRASA procedure
psds = np.zeros((len(hset), *psd.shape))
for i, h in enumerate(hset):
# Get the upsampling/downsampling (h, 1/h) factors as integer
rat = fractions.Fraction(str(h))
up, down = rat.numerator, rat.denominator
# Much faster than FFT-based resampling
data_up = signal.resample_poly(data, up, down, axis=-1)
data_down = signal.resample_poly(data, down, up, axis=-1)
# Calculate the PSD using same params as original
freqs_up, psd_up = signal.welch(data_up, h * sf, nperseg=win,
**kwargs_welch)
freqs_dw, psd_dw = signal.welch(data_down, sf / h, nperseg=win,
**kwargs_welch)
# Geometric mean of h and 1/h
psds[i, :] = np.sqrt(psd_up * psd_dw)
# Now we take the median PSD of all the resampling factors, which gives
# a good estimate of the aperiodic component of the PSD.
psd_aperiodic = np.median(psds, axis=0)
# We can now calculate the oscillations (= periodic) component.
psd_osc = psd - psd_aperiodic
# Let's crop to the frequencies defined in band
mask_freqs = np.ma.masked_outside(freqs, *band).mask
freqs = freqs[~mask_freqs]
psd_aperiodic = np.compress(~mask_freqs, psd_aperiodic, axis=-1)
psd_osc = np.compress(~mask_freqs, psd_osc, axis=-1)
if return_fit:
# Aperiodic fit in semilog space for each channel
from scipy.optimize import curve_fit
intercepts, slopes, r_squared = [], [], []
def func(t, a, b):
# See https://github.com/fooof-tools/fooof
return a + np.log(t**b)
for y in np.atleast_2d(psd_aperiodic):
y_log = np.log(y)
# Note that here we define bounds for the slope but not for the
# intercept.
popt, pcov = curve_fit(func, freqs, y_log, p0=(2, -1),
bounds=((-np.inf, -10), (np.inf, 2)))
intercepts.append(popt[0])
slopes.append(popt[1])
# Calculate R^2: https://stackoverflow.com/q/19189362/10581531
residuals = y_log - func(freqs, *popt)
ss_res = np.sum(residuals**2)
ss_tot = np.sum((y_log - np.mean(y_log))**2)
r_squared.append(1 - (ss_res / ss_tot))
# Create fit parameters dataframe
fit_params = {'Chan': ch_names, 'Intercept': intercepts,
'Slope': slopes, 'R^2': r_squared,
'std(osc)': np.std(psd_osc, axis=-1, ddof=1)}
return freqs, psd_aperiodic, psd_osc, pd.DataFrame(fit_params)
else:
return freqs, psd_aperiodic, psd_osc
[docs]def stft_power(data, sf, window=2, step=.2, band=(1, 30), interp=True,
norm=False):
"""Compute the pointwise power via STFT and interpolation.
Parameters
----------
data : array_like
Single-channel data.
sf : float
Sampling frequency of the data.
window : int
Window size in seconds for STFT.
2 or 4 seconds are usually a good default.
Higher values = higher frequency resolution = lower time resolution.
step : int
Step in seconds for the STFT.
A step of 0.2 second (200 ms) is usually a good default.
* If ``step`` == 0, overlap at every sample (slowest)
* If ``step`` == nperseg, no overlap (fastest)
Higher values = higher precision = slower computation.
band : tuple or None
Broad band frequency range.
Default is 1 to 30 Hz.
interp : boolean
If True, a cubic interpolation is performed to ensure that the output
is the same size as the input (= pointwise power).
norm : bool
If True, return bandwise normalized band power, i.e. for each time
point, the sum of power in all the frequency bins equals 1.
Returns
-------
f : :py:class:`numpy.ndarray`
Frequency vector
t : :py:class:`numpy.ndarray`
Time vector
Sxx : :py:class:`numpy.ndarray`
Power in the specified frequency bins of shape (f, t)
Notes
-----
2D Interpolation is done using
:py:class:`scipy.interpolate.RectBivariateSpline`
which is much faster than :py:class:`scipy.interpolate.interp2d`
for a rectangular grid. The default is to use a bivariate spline with
3 degrees.
"""
# Safety check
data = np.asarray(data)
assert step <= window
step = 1 / sf if step == 0 else step
# Define STFT parameters
nperseg = int(window * sf)
noverlap = int(nperseg - (step * sf))
# Compute STFT and remove the last epoch
f, t, Sxx = signal.stft(data, sf, nperseg=nperseg, noverlap=noverlap,
detrend=False, padded=True)
# Let's keep only the frequency of interest
if band is not None:
idx_band = np.logical_and(f >= band[0], f <= band[1])
f = f[idx_band]
Sxx = Sxx[idx_band, :]
# Compute power
Sxx = np.square(np.abs(Sxx))
# Interpolate
if interp:
func = RectBivariateSpline(f, t, Sxx)
t = np.arange(data.size) / sf
Sxx = func(f, t)
if norm:
sum_pow = Sxx.sum(0).reshape(1, -1)
np.divide(Sxx, sum_pow, out=Sxx)
return f, t, Sxx
```