Source code for pyrato.parameters

"""
Sub-module implementing calculations of room acoustic parameters from
simulated or experimental data.
"""
import re
import numpy as np
import pyfar as pf
import warnings


[docs]
def reverberation_time_linear_regression(
        energy_decay_curve, T='T20', return_intercept=False):
    """Estimate the reverberation time from a given energy decay curve.

    The linear regression is performed using least squares error minimization
    according to the ISO standard 3382 [#]_.

    Parameters
    ----------
    energy_decay_curve : pyfar.TimeData
        Energy decay curve.
    T : 'T15', 'T20', 'T30', 'T40', 'T50', 'T60', 'EDT', 'LDT'
        Decay interval to be used for the reverberation time extrapolation. EDT
        corresponds to the early decay time extrapolated from the interval
        ``[0, -10]`` dB, LDT corresponds to the late decay time extrapolated
        from the interval ``[-25, -35]`` dB.
    return_intercept : bool
        If `True`, the function returns the intercept of the linear regression,
        which corresponds to the amplitude of the energy decay curve on a
        linear scale. The default is False.

    Returns
    -------
    reverberation_time : double
        The reverberation time

    References
    ----------

    .. [#] ISO 3382, Acoustics - Measurement of the reverberation time of
           rooms with reference to other acoustical parameters.

    Examples
    --------
    Estimate the reverberation time from an energy decay curve.

    >>> import numpy as np
    >>> import pyfar as pf
    >>> import pyrato as ra
    >>> from pyrato.analytic import rectangular_room_rigid_walls
    ...
    >>> L = np.array([8, 5, 3])/10
    >>> source_pos = np.array([5, 3, 1.2])/10
    >>> receiver_pos = np.array([1, 1, 1.2])/10
    >>> rir, _ = rectangular_room_rigid_walls(
    ...     L, source_pos, receiver_pos,
    ...     reverberation_time=1, max_freq=1.5e3, n_samples=2**12,
    ...     speed_of_sound=343.9, samplingrate=3e3)
    >>> rir = rir/rir.time.max()
    ...
    >>> awgn = pf.signals.noise(
    ...     rir.n_samples, rms=10**(-50/20),
    ...     sampling_rate=rir.sampling_rate)
    >>> rir = rir + awgn
    ...
    >>> edc = ra.energy_decay_curve_chu_lundeby(rir)
    >>> t_20 = ra.parameters.reverberation_time_linear_regression(edc, 'T20')
    >>> t_20
    ...     array([0.99526253])

    """

    if T == 'EDT':
        upper = -0.1
        lower = -10.1
    elif T == 'LDT':
        upper = -25.
        lower = -35.
    else:
        if T not in ['T15', 'T20', 'T30', 'T40', 'T50', 'T60']:
            raise ValueError(
                f"{T} is not a valid interval for the regression.")
        upper = -5
        lower = -np.double(re.findall(r'\d+', T)) + upper

    edcs_db = 10*np.log10(np.abs(energy_decay_curve.time))
    times = energy_decay_curve.times

    reverberation_times = np.zeros(energy_decay_curve.cshape, dtype=float)
    intercepts = np.zeros(energy_decay_curve.cshape, dtype=float)

    for ch in np.ndindex(energy_decay_curve.cshape):
        edc_db = edcs_db[ch]
        idx_upper = np.nanargmin(np.abs(upper - edc_db))
        idx_lower = np.nanargmin(np.abs(lower - edc_db))

        A = np.vstack(
            [times[idx_upper:idx_lower], np.ones(idx_lower - idx_upper)]).T
        gradient, const = np.linalg.lstsq(
            A, edc_db[..., idx_upper:idx_lower], rcond=None)[0]

        reverberation_times[ch] = -60 / gradient
        intercepts[ch] = 10**(const/10)

    if return_intercept is True:
        return reverberation_times, intercepts
    else:
        return reverberation_times



[docs]
def clarity(energy_decay_curve, early_time_limit=80):
    r"""
    Calculate the clarity from the energy decay curve (EDC).

    The clarity parameter (C50 or C80) is defined as the ratio of early-to-late
    arriving energy in an impulse response and is a measure for how clearly
    speech or music can be perceived in a room. The early-to-late boundary is
    typically set at 50 ms (C50) or 80 ms (C80) [#isoClarity]_.

    Clarity is calculated as:

    .. math::

        C_{t_e} = 10 \log_{10} \frac{
            \displaystyle \int_0^{t_e} p^2(t) \, dt
        }{
            \displaystyle \int_{t_e}^{\infty} p^2(t) \, dt
        }

    where :math:`t_e` is the early time limit and :math:`p(t)` is the pressure
    of a room impulse response. Here, the clarity is efficiently computed
    from the EDC :math:`e(t)`

    .. math::

        C_{t_e} = 10 \log_{10} \frac{e(0) - e(t_e)}{e(t_e) - e(\infty)}
                = 10 \log_{10} \left( \frac{e(0)}{e(t_e)} - 1 \right),

    where :math:`e(\infty) = 0` by definition of the EDC.

    Parameters
    ----------
    energy_decay_curve : pyfar.TimeData
        Energy decay curve (EDC) of the room impulse response
        (time-domain signal). The EDC must start at time zero.
    early_time_limit : float, optional
        Early time limit (:math:`t_e`) in milliseconds. Defaults to 80 (C80).
        Typical values are 50 ms (C50) or 80 ms (C80) [#isoClarity]_.

    Returns
    -------
    clarity : numpy.ndarray[float]
        Clarity index (early-to-late energy ratio) in decibels,
        shaped according to the channel shape of the input EDC.

    References
    ----------
    .. [#isoClarity] ISO 3382, Acoustics — Measurement of the reverberation
        time of rooms with reference to other acoustical parameters.

    Examples
    --------
    Estimate the clarity from a real room impulse response filtered in
    octave bands:

    >>> import pyfar as pf
    >>> import pyrato as ra
    ...
    >>> rir = pf.signals.files.room_impulse_response(sampling_rate=44100)
    >>> rir = pf.dsp.filter.fractional_octave_bands(rir, num_fractions=1)
    >>> edc = ra.edc.energy_decay_curve_lundeby(rir)
    ...
    >>> C80 = ra.parameters.clarity(edc, early_time_limit=80)
    >>> C80
    ...     [[-55.57140506]
    ...     [-11.75657677]
    ...     [ -3.21150787]
    ...     [  2.76276817]
    ...     [  4.70786211]
    ...     [  5.98148157]
    ...     [  9.66764094]
    ...     [  9.08687417]
    ...     [ 14.14550646]
    ...     [ 21.60048332]]
    """

    if not isinstance(early_time_limit, (int, float)):
        raise TypeError('early_time_limit must be a number.')

    # Convert milliseconds to seconds
    early_time_limit_sec = early_time_limit / 1000

    limits = np.array([early_time_limit_sec,
                       np.inf,
                       0.0,
                       early_time_limit_sec])

    return 10*np.log10(_energy_ratio(limits,
                                     energy_decay_curve,
                                     energy_decay_curve))




[docs]
def early_lateral_energy_fraction(energy_decay_curve_omni,
                                  energy_decay_curve_lateral):
    r"""
    Calculate the early lateral energy fraction.

    The early lateral energy fraction :math:`J_\mathrm{LF}`
    according to [#isoEarlyLat]_ is defined as the ratio between the
    lateral sound energy captured with a figure of eight microphone
    arriving between 5 ms and 80 ms and the total sound energy
    captured with an omnidirectional microphone arriving within
    the first 80 ms after the direct sound. It is a measure of the
    apparent source width.

    The parameter is defined as

    .. math::

        J_\mathrm{LF} =
        \frac{
            \displaystyle \int_{0.005}^{0.08} p_\mathrm{L}^2(t)\,\mathrm{d}t
        }{
            \displaystyle \int_{0}^{0.08} p^2(t)\,\mathrm{d}t
        }

    where :math:`p_\mathrm{L}(t)` is the lateral sound pressure measured with a
    figure-eight microphone whose zero axis is oriented towards the source,
    and :math:`p(t)` is the sound pressure measured at the same position
    with an omnidirectional microphone.

    Using the energy decay curves of the omnidirectional response
    :math:`e(t)` and the lateral response :math:`e_\mathrm{L}(t)`, the
    parameter can be computed efficiently as

    .. math::

        J_\mathrm{LF} =
        \frac{
            e_\mathrm{L}(0.005) - e_\mathrm{L}(0.08)
        }{
            e(0) - e(0.08)
        }.

    Parameters
    ----------
    energy_decay_curve_omni : pyfar.TimeData
        Energy decay curve of the room impulse response measured with an
        omnidirectional microphone. The EDC must start at time zero.

    energy_decay_curve_lateral : pyfar.TimeData
        Energy decay curve of the room impulse response measured with a
        figure-eight microphone oriented according to [#isoEarlyLat]_
        (zero axis pointing towards the source). The EDC must start at
        time zero.
        Both EDCs must have identical ``signal.cshape``.

    Returns
    -------
    Early Lateral Energy Fraction: numpy.ndarray
        Early lateral energy fraction (:math:`J_\mathrm{LF}`),
        shaped according to the channel shape of the input EDCs.

    References
    ----------
    .. [#isoEarlyLat] ISO 3382, Acoustics — Measurement of the reverberation
        time of rooms with reference to other acoustical parameters.
    """

    limits = np.array([0.0, 0.08, 0.005, 0.08])

    return _energy_ratio(limits,
                         energy_decay_curve_omni,
                         energy_decay_curve_lateral)





[docs]
def definition(energy_decay_curve, early_time_limit=50):
    r"""
    Calculate the definition from the energy decay curve (EDC).

    The definition parameter (D50) is defined as the ratio of early-to-total
    arriving energy in an impulse response and is a measure for how defined
    speech or music can be perceived in a room. The early-to-total boundary is
    typically set at 50 ms (D50) [#isoDefinition]_.

    Definition is calculated as:

    .. math::

        D_{t_\mathrm{e}} = \frac{
            \displaystyle \int_0^{t_\mathrm{e}} p^2(t) \, dt
        }{
            \displaystyle \int_{0}^{\infty} p^2(t) \, dt
        }

    where :math:`t_e` is the early time limit and :math:`p(t)` is the pressure
    of a room impulse response. Here, the definition is efficiently computed
    from the EDC :math:`e(t)` directly by:

    .. math::

        D_{t_\mathrm{e}} = \frac{e(0) - e(t_\mathrm{e})}{e(0) - e(\infty)}
                = 1 - \left( \frac{e(t_\mathrm{e})}{e(0)} \right),

    where :math:`e(\infty) = 0` by definition of the EDC.

    Parameters
    ----------
    energy_decay_curve : pyfar.TimeData
        Energy decay curve (EDC) of the room impulse response
        (time-domain signal). The EDC must start at time zero.
    early_time_limit : float, optional
        Early time limit (:math:`t_\mathrm{e}`) in milliseconds. Defaults to
        typical value 50 (D50) [#isoDefinition]_.

    Returns
    -------
    definition : numpy.ndarray[float]
        Definition index (early-to-total energy ratio),
        shaped according to the channel shape of the input EDC.

    References
    ----------
    .. [#isoDefinition] ISO 3382, Acoustics — Measurement of the reverberation
        time of rooms with reference to other acoustical parameters.

    Examples
    --------
    Estimate the defintion from a real room impulse response filtered in
    octave bands:

    >>> import pyfar as pf
    >>> import pyrato
    ...
    >>> rir = pf.signals.files.room_impulse_response(sampling_rate=44100)
    >>> rir = pf.dsp.filter.fractional_octave_bands(
    >>>     rir, num_fractions=1, frequency_range=(125, 20e3))
    >>> edc = pyrato.edc.energy_decay_curve_lundeby(rir)
    ...
    >>> D50 = pyrato.parameters.definition(edc, early_time_limit=50)
    >>> D50
    ...     [[0.25984852]
    ...     [0.50208742]
    ...     [0.66722359]
    ...     [0.73528532]
    ...     [0.87801455]
    ...     [0.82757594]
    ...     [0.86536142]
    ...     [0.87374988]]
    """

    if not isinstance(early_time_limit, (int, float)):
        raise TypeError('early_time_limit must be a number.')

    # Convert milliseconds to seconds
    early_time_limit_sec = early_time_limit / 1000

    limits = np.array([0.0, np.inf, 0.0, early_time_limit_sec])

    return _energy_ratio(limits,
                         energy_decay_curve,
                         energy_decay_curve)


def _energy_ratio(limits, energy_decay_curve1, energy_decay_curve2):
    r"""
    Calculate the energy ratio for the time limits from two energy
    decay curves (EDC).

    A variety of room-acoustic parameters are defined by energy ratios derived
    from one or two time-domain Energy Decay Curves (EDCs). These parameters
    distinguish between time regions using the four provided limits, and some,
    such as Strength (:math:`G`), Early lateral sound (:math:`J_\mathrm{LF}`),
    and Late lateral sound (:math:`L_J`), require EDCs obtained from different
    impulse-response measurements [#iso2]_.

    Energy-Ratio is calculated as:

    .. math::

        ER = \frac{
            \displaystyle \int_{lim3}^{lim4} p_2^2(t) \, dt
        }{
            \displaystyle \int_{lim1}^{lim2} p_1^2(t) \, dt
        }
    where :math:`[lim1, ..., lim4]` are the time limits and :math:`p(t)` is the
    pressure of a room impulse response. Here, the energy ratio is
    efficiently computed from the EDC :math:`e(t)` directly by:

    .. math::

        ER = \frac{
            \displaystyle e_2(lim3) - e_2(lim4)
        }{
            \displaystyle e_1(lim1) - e_1(lim2)
        }.

    By definition, the EDC represents the remaining energy up to
    :math:`\infty` and converges to zero, i.e., :math:`e(\infty)=0`.
    Thus, ``np.inf`` may be used as a limit to select the full
    remaining energy of an EDC.

    Parameters
    ----------
    limits : np.ndarray, list or tuple
        Four time limits (:math:`t_e`) in seconds, shape (4,)
        in ascending order. Limits must be either numerical or np.inf.
    energy_decay_curve1 : pyfar.TimeData
        Energy decay curve 1 (EDC1) of the room impulse response
        (time-domain signal). The EDC must start at time zero.
    energy_decay_curve2 : pyfar.TimeData
        Energy decay curve 2 (EDC2) of the room impulse response
        (time-domain signal). The EDC must start at time zero.

    Returns
    -------
    energy ratio : numpy.ndarray[float]
        energy-ratio index (early-to-late energy ratio),
        shaped according to the channel shape of the input EDC.

    References
    ----------
    .. [#iso2] ISO 3382, Acoustics — Measurement of the reverberation time of
        rooms with reference to other acoustical parameters.
    """

    # Check input type
    if not isinstance(energy_decay_curve1, pf.TimeData):
        raise TypeError(
            "energy_decay_curve1 must be a pyfar.TimeData or derived object.")
    if not isinstance(energy_decay_curve2, pf.TimeData):
        raise TypeError(
            "energy_decay_curve2 must be a pyfar.TimeData or derived object.")

    # Check that both EDCs have the same channel shape
    if energy_decay_curve1.cshape != energy_decay_curve2.cshape:
        raise ValueError(
            f"energy_decay_curve1 and energy_decay_curve2 must have the same "
            f"channel shape. Got cshape={energy_decay_curve1.cshape} and "
            f"cshape={energy_decay_curve2.cshape}.")

    if isinstance(limits, (list, tuple)):
        limits = np.asarray(limits)

    if not isinstance(limits, np.ndarray):
        raise TypeError("limits must be a numpy ndarray, list, or tuple.")

    # Check shape
    if limits.shape != (4,):
        raise ValueError(
            "limits must have shape (4,), " \
            "containing [lim1, lim2, lim3, lim4].",
            )

    # Check if limits are within valid time range
    if (
        np.any(limits[0:2] < 0) or
        np.any((limits[0:2] > energy_decay_curve1.signal_length) &
               (np.isfinite(limits[0:2])))
    ):
        raise ValueError(
            f"limits[0:2] must be between 0 and "
            f"{energy_decay_curve1.signal_length} seconds or np.inf.",
        )
    if (
        np.any(limits[2:4] < 0) or
        np.any((limits[2:4] > energy_decay_curve2.signal_length) &
               (np.isfinite(limits[2:4])))
    ):
        raise ValueError(
            f"limits[2:4] must be between 0 and "
            f"{energy_decay_curve2.signal_length} seconds or np.inf.",
        )

    limits_denominator = limits[0:2]
    limits_numerator = limits[2:4]

    # finite limits mask
    finite_limits_denominator = np.isfinite(limits_denominator)
    finite_limits_numerator = np.isfinite(limits_numerator)

    # Denominator values (EDC1, limits 0:2)
    values_shape1 = energy_decay_curve1.cshape + (2,)
    energy_decay_curve1_values = np.zeros(values_shape1)
    if np.any(finite_limits_denominator):
        limits_energy_decay_curve1_idx = np.atleast_1d(
            energy_decay_curve1.find_nearest_time(
                limits_denominator[finite_limits_denominator],
            ),
        )

        energy_decay_curve1_values[..., finite_limits_denominator] = \
            energy_decay_curve1.time[..., limits_energy_decay_curve1_idx]

    # Numerator values (EDC2, limits 2:4)
    values_shape2 = energy_decay_curve2.cshape + (2,)
    energy_decay_curve2_values = np.zeros(values_shape2)
    if np.any(finite_limits_numerator):
        limits_energy_decay_curve2_idx = np.atleast_1d(
            energy_decay_curve2.find_nearest_time(
                limits_numerator[finite_limits_numerator],
            ),
        )

        energy_decay_curve2_values[..., finite_limits_numerator] = \
            energy_decay_curve2.time[..., limits_energy_decay_curve2_idx]

    # using 'minus' because np.diff yields negative result
    numerator = -np.diff(energy_decay_curve2_values, axis=-1)[..., 0]
    denominator = -np.diff(energy_decay_curve1_values, axis=-1)[..., 0]

    energy_ratio = numerator / denominator

    return energy_ratio


[docs]
def late_lateral_sound_level(energy_decay_curve_free_field,
                       energy_decay_curve_room_lateral):
    r"""
    Calculate the late lateral sound level.

    The late lateral sound level :math:`L_\mathrm{J}` quantifies the strength
    of late-arriving lateral sound energy. According to ISO 3382-1 [#isoLLJ]_,
    it is defined as the level ratio between the late lateral sound energy
    captured with a figure-of-eight microphone and the total sound energy of a
    reference impulse response measured with an omnidirectional microphone at
    a distance of 10 m in the free field. It is a measure of listener
    envelopment.

    The parameter is defined as

    .. math::

        L_\mathrm{J} =
        10 \log_{10}
        \frac{
            \displaystyle \int_{0.08}^{\infty} p_\mathrm{L}^2(t)\,\mathrm{d}t
        }{
            \displaystyle \int_{0}^{\infty} p_{10}^2(t)\,\mathrm{d}t
        }

    where :math:`p_\mathrm{L}(t)` is the lateral sound pressure measured with a
    figure-eight microphone whose zero axis is oriented towards the source,
    and :math:`p_{10}(t)` is the instantaneous sound pressure of the
    impulse response measured with an omnidirectional microphone
    at 10 m distance in the free field.

    Using the energy decay curves of the reference response
    :math:`e_{10}(t)` and the lateral response :math:`e_\mathrm{L}(t)`,
    the parameter can be computed efficiently as

    .. math::

        L_\mathrm{J} =
        10 \log_{10}
        \frac{
            e_\mathrm{L}(0.08)
        }{
            e_{10}(0)
        }.

    Parameters
    ----------
    energy_decay_curve_free_field : pyfar.TimeData
        Energy decay curve of the reference free field impulse response
        measured vwith an omnidirectional microphone at 10 m distance in the
        free field. The EDC must start at time zero.

    energy_decay_curve_room_lateral : pyfar.TimeData
        Energy decay curve of the room impulse response measured with a
        figure-eight microphone oriented according to [#isoLLJ]_
        (zero axis pointing towards the source). The EDC must start at
        time zero.

        Both EDCs must have identical ``signal.cshape``.

    Returns
    -------
    Late Lateral Sound Level : numpy.ndarray
        Late lateral sound level (:math:`L_\mathrm{J}`) in decibels,
        shaped according to the channel shape of the input EDCs.

    References
    ----------
    .. [#isoLLJ] ISO 3382-1, ISO 3382, Acoustics — Measurement of the
        reverberation time of rooms with reference to other acoustical
        parameters.
    """

    limits = np.array([0.0, np.inf, 0.08, np.inf])

    return 10 * np.log10(_energy_ratio(limits,
                                       energy_decay_curve_free_field,
                                       energy_decay_curve_room_lateral))



[docs]
def sound_strength(energy_decay_curve_room,
             energy_decay_curve_free_field):
    r"""
    Calculate the room-acoustic strength parameter (:math:`G`).

    The strength parameter (:math:`G`) is defined as the ratio between the
    total arriving sound energy and the total arriving sound energy
    of a reference free-field response measured at 10 m with the same
    source. It is a measure of the room-induced level amplification
    at the receiver position [#isoStrength]_.

    The parameter is defined as

    .. math::

        G =
        10 \log_{10}
        \frac{
            \displaystyle \int_{0}^{\infty} p^2(t)\,dt
        }{
            \displaystyle \int_{0}^{\infty} p_\mathrm{10}^2(t)\,dt
        }

    where :math:`p(t)` is the room sound pressure and
    :math:`p_\mathrm{10}(t)` is the reference free-field sound pressure
    at 10 m measured with the same loudspeaker.

    Using the energy decay curves of the room response
    :math:`e(t)` and the reference response
    :math:`e_\mathrm{10}(t)`, the
    parameter can be computed efficiently as

    .. math::

        G =
        10 \log_{10}
        \frac{
            e(0) - e(\infty)
        }{
            e_\mathrm{10}(0) - e_\mathrm{10}(\infty)
        }.

    Parameters
    ----------
    energy_decay_curve_room : pyfar.TimeData
        Energy decay curve of the room impulse response. The EDC must
        start at time zero.

    energy_decay_curve_free_field : pyfar.TimeData
        Energy decay curve of the reference free-field impulse response
        at 10 m. The EDC must start at time zero.
        Both EDCs must have identical ``signal.cshape``.

    Returns
    -------
    strength : numpy.ndarray
        Strength parameter (:math:`G`) in decibels,
        shaped according to the channel shape of the input EDC.

    References
    ----------
    .. [#isoStrength] ISO 3382, Acoustics — Measurement of the reverberation
        time of rooms with reference to other acoustical parameters.
    """

    limits = np.array([0.0, np.inf, 0.0, np.inf])

    return 10*np.log10(_energy_ratio(limits,
                                     energy_decay_curve_free_field,
                                     energy_decay_curve_room))




[docs]
def speech_transmission_index_indirect(
        rir, rir_type="acoustical", level=None, snr=np.inf,
        ambient_noise_correction=True):
    """
    Computes the Speech Transmission Index (STI) according to
    IEC 60268-16:2020 using the indirect method.

    The STI is a scalar measure between 0 (bad) and 1 (excellent)
    describing speech intelligibility. It is computed from the
    :py:func:`~modulation_transfer_function`, optionally including auditory
    masking and ambient noise effects.

    STI considers 7 octave bands from 125 Hz to 8 kHz
    and 14 modulation frequencies between 0.63 Hz and
    12.5 Hz [#iec]_.

    Parameters
    ----------
    rir : pyfar.Signal
        Single or multi-channel room impulse response for which the
        STI is computed. The room impulse response must be at least
        1.6 seconds long. See [#iec]_, Section 6.2.
    rir_type : ``'electrical'``, ``'acoustical'``
        Determines whether input signals given by `rir` were obtained
        acoustically or electrically. Default is ``'acoustical'``.
        Auditory masking effects are only applied for acoustical
        signals [#iec]_, section A.3.1.
    level : numpy.ndarray or None, optional
        Test signal level without noise in dB SPL, given per octave band
        (125 Hz–8 kHz). Shape can be ``(7,)`` (7 octave bands: 125 Hz–8 kHz)
        to use the same values for all channels, or ``(rir.cshape, 7)``
        for channel-specific values.
        If ``None`` is provided, auditory and ambient noise corrections are
        omitted. See [#iec]_, section A.3.2.
    snr : numpy.ndarray or float, optional
        Signal-to-noise ratio in dB for each octave band (125 Hz–8 kHz).
        Shape can be ``(7,)`` (7 octave bands: 125 Hz–8 kHz)
        to use the same values for all channels, or ``(rir.cshape, 7)``
        for channel-specific values.
        Default is ``np.inf`` (no ambient noise). See [#iec]_, section 3.
    ambient_noise_correction: bool, optional
        Apply ambient noise correction according to [#iec]_,
        Annex A.2.3. Default is ``True``. Only applied when
        ``level is not None`` and ``ambient_noise_correction is True``.

    Returns
    -------
    sti : numpy.ndarray
        Channel-wise Speech Transmission Index with shape ``rir.cshape``.

    Notes
    -----
    pyfar uses octave-band filters of order 14 and the filter order
    influences the MTF. Higher filter orders produce steeper roll-off and
    a more ideal band separation, which affects the energy distribution
    within each octave band and thus the computed modulation depth.
    The influence on the broadband STI is negligible, since individual
    deviations in the MTF tend to average out in the weighted sum over
    octave bands and modulation frequencies.

    References
    ----------
    .. [#iec] IEC 60268-16:2020
     Sound system equipment - Part 16: Objective rating of speech
     intelligibility by speech transmission index.
    """
    # check if input data is a pyfar.Signal
    if not isinstance(rir, pf.Signal):
        raise TypeError("Input data must be a pyfar.Signal.")

    if np.ndim(snr) == 0:
        snr = np.full((7,), float(snr))
    snr = np.asarray(snr)
    # Check if SNR has valid shape: (7,) uses same values for all
    # channels, (cshape, 7) for channel-specific values
    if snr.shape != (7,) and snr.shape != rir.cshape + (7,):
        raise ValueError(
            f"Input 'snr' must have shape (7,) or "
            f"{rir.cshape + (7,)} (7 octave bands or matching "
            f"rir channels + 7 octave bands), but got {snr.shape}.",
        )
    if np.any(snr < 20):
        warnings.warn(
            "Input 'snr' should be at least 20 dB for every octave band.",
            stacklevel=2,
            )

    if level is not None:
        # check if input level is a numpy array
        if not isinstance(level, np.ndarray):
            raise TypeError("Input 'level' must be a numpy array.")
        level = np.asarray(level)
        # Check if level has valid shape: (7,) uses same values for
        # all channels, (cshape, 7) for channel-specific values
        if level.shape != (7,) and level.shape != rir.cshape + (7,):
            raise ValueError(
                f"Input 'level' must have shape (7,) or "
                f"{rir.cshape + (7,)} (7 octave bands or matching "
                f"rir channels + 7 octave bands), but got {level.shape}.",
            )

    # check rir_type
    if rir_type is None:
        rir_type = "acoustical"
    if rir_type not in ["electrical", "acoustical"]:
        raise ValueError(f"rir_type is '{rir_type}' but must be "
                         "'electrical' or 'acoustical'.")

    # Validate ambient_noise_correction parameter
    if not isinstance(ambient_noise_correction, bool):
        raise TypeError("ambient_noise_correction must be a boolean.")

    sti = np.zeros(rir.cshape)

    for ch in np.ndindex(rir.cshape):
        # Use snr/level directly if shape is (7,), otherwise index by channel
        current_snr = snr if snr.shape == (7,) else snr[ch]
        current_level = (
            None if level is None
            else (level if level.shape == (7,) else level[ch])
        )
        mtf = modulation_transfer_function(rir[ch],
                                           rir_type,
                                           current_level,
                                           current_snr,
                                           ambient_noise_correction)
        sti[ch] = _sti_calc(mtf)

    return sti




[docs]
def modulation_transfer_function(
        rir, rir_type="acoustical", level=None, snr=np.inf,
        ambient_noise_correction=True):
    """
    Compute the modulation transfer function (MTF) of an impulse response
    according to IEC 60268-16:2020.

    The MTF describes the reduction of modulation depth caused by the
    transmission path. It is evaluated for 7 octave bands (125 Hz–8 kHz)
    and 14 modulation frequencies (0.63 Hz–12.5 Hz) and forms the basis
    of the Speech Transmission Index (STI).

    Parameters
    ----------
    rir : pyfar.Signal
        Single-channel room impulse response with ``rir.cshape = (1, )``.
        The room impulse response must be at least 1.6 seconds long.
    rir_type : {'electrical', 'acoustical'}, optional
        Determines whether input signals given by `rir` were obtained
        acoustically or electrically. Default is ``'acoustical'``.
        Auditory masking effects are only applied for acoustical
        signals [#iecMTF]_, section A.3.1.
    level : numpy.ndarray or None, optional
        Test signal level without noise in dB SPL, given per octave band
        (125 Hz–8 kHz). Shape must be ``(7,)`` (7 octave bands: 125 Hz–8 kHz).
        If ``None`` is provided, auditory and ambient noise corrections are
        omitted. Default is ``None``. See [#iecMTF]_, section A.3.2.
    snr : numpy.ndarray or float, optional
        Signal-to-noise ratio in dB for each octave band (125 Hz–8 kHz).
        Shape must be ``(7,)`` (7 octave bands: 125 Hz–8 kHz).
        Default is ``np.inf`` (no ambient noise). See [#iecMTF]_, section 3.
    ambient_noise_correction : bool, optional
        Apply ambient noise correction according to [#iecMTF]_,
        Annex A.2.3. Default is ``True``. Only applied when
        ``level is not None`` and ``ambient_noise_correction is True``.

    Returns
    -------
    mtf : numpy.ndarray
        Modulation transfer function with shape ``(7, 14)``.

    Notes
    -----
    pyfar uses octave-band filters of order 14 and the filter order
    influences the MTF. Higher filter orders produce steeper roll-off and
    a more ideal band separation, which affects the energy distribution
    within each octave band and thus the computed modulation depth.
    The influence on the broadband STI is negligible, since individual
    deviations in the MTF tend to average out in the weighted sum over
    octave bands and modulation frequencies.

    References
    ----------
    .. [#iecMTF] IEC 60268-16:2020
       Sound system equipment - Part 16: Objective rating of speech
       intelligibility by speech transmission index.
    """
    # Check if input data is a pyfar.Signal
    if not isinstance(rir, pf.Signal):
        raise TypeError("Input data must be a pyfar.Signal.")

    # Check if data is single-channel
    if rir.cshape != (1,):
        raise ValueError(
            "Input must be a single-channel impulse response, "
            f"but got shape {rir.cshape}.",
        )

    # Check if signal is at least 1.6 seconds long
    # (IEC 60268-16:2020, Section 6.2)
    if rir.n_samples / rir.sampling_rate < 1.6:
        raise ValueError(
            "Input signal must be at least 1.6 seconds long "
            "(see IEC 60268-16:2020, Section 6.2).",
        )

    if np.ndim(snr) == 0:
        snr = np.full((7,), float(snr))
    if not isinstance(snr, np.ndarray):
        raise TypeError("snr must be a numpy array.")
    if snr.shape != (7,):
        raise ValueError(
            f"snr must have shape (7,) for 7 octave bands "
            f"(125 Hz - 8 kHz), but got {snr.shape}.",
        )
    if np.any(snr < 20):
        warnings.warn(
            "Input 'snr' should be at least 20 dB for every "
            "octave band.",
            stacklevel=2,
        )

    if level is not None:
        # check if input level is a numpy array
        if not isinstance(level, np.ndarray):
            raise TypeError("level must be a numpy array or None.")
        if level.shape != (7,):
            raise ValueError(
                f"Level must have shape (7,) for 7 octave bands "
                f"(125 Hz - 8 kHz), but got {level.shape}.",
            )
        if np.any(level < 1):
            warnings.warn(
                "Input 'level' should be at least 1 dB for every "
                "octave band.",
                stacklevel=2,
            )

    # check rir_type
    if rir_type is None:
        rir_type = "acoustical"
    if rir_type not in ["electrical", "acoustical"]:
        raise ValueError(f"Data_type is '{rir_type}' but must be "
                         "'electrical' or 'acoustical'.")

    # Validate ambient_noise_correction parameter
    if not isinstance(ambient_noise_correction, bool):
        raise TypeError("ambient_noise_correction must be a boolean.")

    rir_oct = pf.dsp.filter.fractional_octave_bands(
        rir, num_fractions=1, frequency_range=(125, 8000))

    # Modulation frequencies (fm) in Hz for MTF calculation
    # Defined in IEC 60268-16:2020, Section A.1.4
    modulation_frequencies = np.array(
        [0.63, 0.80, 1.0, 1.25, 1.60, 2.0, 2.5,
         3.15, 4.0, 5.0, 6.3, 8.0, 10.0, 12.5],
    )

    modulation_frequencies = np.tile(
        modulation_frequencies, (rir_oct.cshape[0], 1))
    energy = rir_oct.time ** 2

    # MTF calculation (IEC 60268-16:2020, Section 6.1)
    term_exp = np.exp(
        -2j * np.pi * modulation_frequencies[:, :, None] * rir_oct.times)
    numerator = np.abs(np.sum(energy * term_exp, axis=-1))
    denominator = np.sum(energy, axis=-1)
    mtf = numerator / denominator

    # Ambient noise correction (IEC 60268-16:2020, Annex A.2.3)
    mtf *= 1 / (1 + 10 ** (-snr[:, None] / 10))

    if level is not None and ambient_noise_correction is True:
        mtf = _ambient_noise_correction(mtf, level, snr, rir_type)

    # Clip MTF to valid range [0, 1] (IEC 60268-16:2020, Section A.2.4)
    return np.clip(mtf, 0, 1)



def _ambient_noise_correction(mtf, level, snr, rir_type):
    r"""
    Weight the MTF by auditory masking, ambient noise, and hearing threshold.

    The basic MTF (after SNR correction) only accounts for reverberation and
    the signal-to-noise ratio. This function applies an additional perceptual
    weighting defined in [#iecANC]_, Annex A.2.4:

    .. math::

        m''(f_m, k) = m'(f_m, k) \cdot
        \frac{I_k}{I_k + I_{\mathrm{am},k} + I_{\mathrm{rt},k}}

    where :math:`f_m` is the modulation frequency, :math:`k` is the octave band
    index, :math:`m'(f_m, k)` is the MTF after the ambient noise correction
    (Annex A.2.3), and the three intensity terms are:

    - :math:`I_k = I_{z,k} + I_{n,k}` — total acoustic intensity level
      for octave band :math:`k`, i.e. test signal (:math:`I_{z,k}`) plus
      background noise (:math:`I_{n,k}`).
    - :math:`I_{\mathrm{am},k}` — masking intensity due to the
      level-dependent auditory masking effect acting on octave band :math:`k`
      from band :math:`k-1`, according to [#iecANC]_, Annex A.4.2
      (acoustical signals only).
    - :math:`I_{\mathrm{rt},k}` — acoustic intensity level of the
      reception threshold for octave band :math:`k`, according to
      [#iecANC]_, Annex A.4.3 (acoustical signals only).

    For electrical signals the auditory terms are zero and this correction
    has no effect.

    Parameters
    ----------
    mtf : numpy.ndarray
        Modulation transfer function after SNR correction, shape ``(7, 14)``.
    level : numpy.ndarray
        Test signal level without noise in dB SPL per octave band,
        shape ``(7,)``.
    snr : numpy.ndarray
        Signal-to-noise ratio in dB per octave band, shape ``(7,)``.
    rir_type : {'electrical', 'acoustical'}
        Auditory masking and hearing threshold corrections are only applied
        for ``'acoustical'`` signals.

    Returns
    -------
    mtf : numpy.ndarray
        Corrected modulation transfer function, shape ``(7, 14)``.

    References
    ----------
    .. [#iecANC] IEC 60268-16:2020
       Sound system equipment - Part 16: Objective rating of speech
       intelligibility by speech transmission index.
    """
    # Total intensity per octave band: signal + noise
    # (IEC 60268-16:2020, Annex A.2.4)
    Ik = 10 ** (level / 10) + 10 ** ((level - snr) / 10)

    # Auditory masking and reception threshold terms are zero for
    # electrical signals (IEC 60268-16:2020, Annex A.4.2, A.4.3)
    I_amk = np.zeros(7)
    I_rt = np.zeros((7, 1))

    if rir_type == "acoustical":
        # Level-dependent auditory masking factor
        # (IEC 60268-16:2020, Annex A.4.2)
        amdb = level.copy()
        amdb[amdb < 63] = 0.5*amdb[amdb < 63] - 65
        amdb[(63 <= amdb) & (amdb < 67)] = 1.8 * amdb[(63 <= amdb) &
                                                    (amdb < 67)] - 146.9
        amdb[(67 <= amdb) & (amdb < 100)] = 0.5 * amdb[(67 <= amdb) &
                                                    (amdb < 100)] - 59.8
        amdb[100 <= amdb] = amdb[100 <= amdb] - 10
        a = 10**(amdb/10)
        # Masking intensity (IEC 60268-16:2020, Annex A.4.2)
        I_k1 = np.roll(Ik, 1)
        I_amk = I_k1 * a
        I_amk[0] = 0  # No masking for lowest band (125 Hz)
        # Absolute speech reception threshold
        # (IEC 60268-16:2020, Annex A.4.3)
        A_k = np.array([[46, 27, 12, 6.5, 7.5, 8, 12]]).T
        I_rt = 10 ** (A_k / 10)

    # Auditory masking + threshold correction (IEC 60268-16:2020, Annex A.2.4)
    mtf = mtf * Ik[:, None] / (Ik[:, None] + I_amk[:, None] + I_rt)
    return mtf


def _sti_calc(mtf):
    """
    Computes the Speech Transmission Index (STI) from the MTF.

    Parameters
    ----------
    mtf : numpy.ndarray
        Modulation transfer function with shape ``(7, 14)`` for which
        the STI is computed.

    Returns
    -------
    sti : float
        Speech Transmission Index.
    """
    # Check if mtf is a numpy array
    if not isinstance(mtf, np.ndarray):
        raise TypeError("mtf must be a numpy array.")

    # Check if mtf has the correct shape
    if mtf.shape != (7, 14):
        raise ValueError(
            f"mtf must have shape (7, 14) for 7 octave bands and "
            f"14 modulation frequencies, but got {mtf.shape}.",
        )

    # Effective SNR from MTF (IEC 60268-16:2020, Annex A.2.1)
    with np.errstate(divide='ignore'):
        snr_eff = 10 * np.log10(mtf / (1 - mtf))

    # Clip SNR to [-15, 15] dB range (IEC 60268-16:2020, Annex A.2.1)
    snr_eff = np.clip(snr_eff, -15, 15)

    # Transmission index (TI) for each octave band and modulation
    # frequency (IEC 60268-16:2020, Annex A.2.1)
    ti = (snr_eff + 15) / 30

    # Modulation transmission index per octave band: average over
    # modulation frequencies (IEC 60268-16:2020, Annex A.2.1)
    mti = np.mean(ti, axis=-1)

    # STI Octave evaluation factors (IEC 60268-16:2020, Table A.1)
    alpha = np.array([0.085, 0.127, 0.230, 0.233, 0.309, 0.224, 0.173])
    beta = np.array([0.085, 0.078, 0.065, 0.011, 0.047, 0.095])

    # STI (IEC 60268-16:2020, Annex A.2.1)
    sti = np.sum(alpha * mti) - np.sum(beta * np.sqrt(mti[:-1] * mti[1:]))

    # limit STI to 1 (IEC 60268-16:2020, Section A.2.1)
    return min(sti, 1.0)