"""
Sub-module implementing calculations of room acoustic parameters from
simulated or experimental data.
"""
import re
import numpy as np
import pyfar as pf
import warnings
[docs]
def reverberation_time_linear_regression(
energy_decay_curve, T='T20', return_intercept=False):
"""Estimate the reverberation time from a given energy decay curve.
The linear regression is performed using least squares error minimization
according to the ISO standard 3382 [#]_.
Parameters
----------
energy_decay_curve : pyfar.TimeData
Energy decay curve.
T : 'T15', 'T20', 'T30', 'T40', 'T50', 'T60', 'EDT', 'LDT'
Decay interval to be used for the reverberation time extrapolation. EDT
corresponds to the early decay time extrapolated from the interval
``[0, -10]`` dB, LDT corresponds to the late decay time extrapolated
from the interval ``[-25, -35]`` dB.
return_intercept : bool
If `True`, the function returns the intercept of the linear regression,
which corresponds to the amplitude of the energy decay curve on a
linear scale. The default is False.
Returns
-------
reverberation_time : double
The reverberation time
References
----------
.. [#] ISO 3382, Acoustics - Measurement of the reverberation time of
rooms with reference to other acoustical parameters.
Examples
--------
Estimate the reverberation time from an energy decay curve.
>>> import numpy as np
>>> import pyfar as pf
>>> import pyrato as ra
>>> from pyrato.analytic import rectangular_room_rigid_walls
...
>>> L = np.array([8, 5, 3])/10
>>> source_pos = np.array([5, 3, 1.2])/10
>>> receiver_pos = np.array([1, 1, 1.2])/10
>>> rir, _ = rectangular_room_rigid_walls(
... L, source_pos, receiver_pos,
... reverberation_time=1, max_freq=1.5e3, n_samples=2**12,
... speed_of_sound=343.9, samplingrate=3e3)
>>> rir = rir/rir.time.max()
...
>>> awgn = pf.signals.noise(
... rir.n_samples, rms=10**(-50/20),
... sampling_rate=rir.sampling_rate)
>>> rir = rir + awgn
...
>>> edc = ra.energy_decay_curve_chu_lundeby(rir)
>>> t_20 = ra.parameters.reverberation_time_linear_regression(edc, 'T20')
>>> t_20
... array([0.99526253])
"""
if T == 'EDT':
upper = -0.1
lower = -10.1
elif T == 'LDT':
upper = -25.
lower = -35.
else:
if T not in ['T15', 'T20', 'T30', 'T40', 'T50', 'T60']:
raise ValueError(
f"{T} is not a valid interval for the regression.")
upper = -5
lower = -np.double(re.findall(r'\d+', T)) + upper
edcs_db = 10*np.log10(np.abs(energy_decay_curve.time))
times = energy_decay_curve.times
reverberation_times = np.zeros(energy_decay_curve.cshape, dtype=float)
intercepts = np.zeros(energy_decay_curve.cshape, dtype=float)
for ch in np.ndindex(energy_decay_curve.cshape):
edc_db = edcs_db[ch]
idx_upper = np.nanargmin(np.abs(upper - edc_db))
idx_lower = np.nanargmin(np.abs(lower - edc_db))
A = np.vstack(
[times[idx_upper:idx_lower], np.ones(idx_lower - idx_upper)]).T
gradient, const = np.linalg.lstsq(
A, edc_db[..., idx_upper:idx_lower], rcond=None)[0]
reverberation_times[ch] = -60 / gradient
intercepts[ch] = 10**(const/10)
if return_intercept is True:
return reverberation_times, intercepts
else:
return reverberation_times
[docs]
def clarity(energy_decay_curve, early_time_limit=80):
r"""
Calculate the clarity from the energy decay curve (EDC).
The clarity parameter (C50 or C80) is defined as the ratio of early-to-late
arriving energy in an impulse response and is a measure for how clearly
speech or music can be perceived in a room. The early-to-late boundary is
typically set at 50 ms (C50) or 80 ms (C80) [#isoClarity]_.
Clarity is calculated as:
.. math::
C_{t_e} = 10 \log_{10} \frac{
\displaystyle \int_0^{t_e} p^2(t) \, dt
}{
\displaystyle \int_{t_e}^{\infty} p^2(t) \, dt
}
where :math:`t_e` is the early time limit and :math:`p(t)` is the pressure
of a room impulse response. Here, the clarity is efficiently computed
from the EDC :math:`e(t)`
.. math::
C_{t_e} = 10 \log_{10} \frac{e(0) - e(t_e)}{e(t_e) - e(\infty)}
= 10 \log_{10} \left( \frac{e(0)}{e(t_e)} - 1 \right),
where :math:`e(\infty) = 0` by definition of the EDC.
Parameters
----------
energy_decay_curve : pyfar.TimeData
Energy decay curve (EDC) of the room impulse response
(time-domain signal). The EDC must start at time zero.
early_time_limit : float, optional
Early time limit (:math:`t_e`) in milliseconds. Defaults to 80 (C80).
Typical values are 50 ms (C50) or 80 ms (C80) [#isoClarity]_.
Returns
-------
clarity : numpy.ndarray[float]
Clarity index (early-to-late energy ratio) in decibels,
shaped according to the channel shape of the input EDC.
References
----------
.. [#isoClarity] ISO 3382, Acoustics — Measurement of the reverberation
time of rooms with reference to other acoustical parameters.
Examples
--------
Estimate the clarity from a real room impulse response filtered in
octave bands:
>>> import pyfar as pf
>>> import pyrato as ra
...
>>> rir = pf.signals.files.room_impulse_response(sampling_rate=44100)
>>> rir = pf.dsp.filter.fractional_octave_bands(rir, num_fractions=1)
>>> edc = ra.edc.energy_decay_curve_lundeby(rir)
...
>>> C80 = ra.parameters.clarity(edc, early_time_limit=80)
>>> C80
... [[-55.57140506]
... [-11.75657677]
... [ -3.21150787]
... [ 2.76276817]
... [ 4.70786211]
... [ 5.98148157]
... [ 9.66764094]
... [ 9.08687417]
... [ 14.14550646]
... [ 21.60048332]]
"""
if not isinstance(early_time_limit, (int, float)):
raise TypeError('early_time_limit must be a number.')
# Convert milliseconds to seconds
early_time_limit_sec = early_time_limit / 1000
limits = np.array([early_time_limit_sec,
np.inf,
0.0,
early_time_limit_sec])
return 10*np.log10(_energy_ratio(limits,
energy_decay_curve,
energy_decay_curve))
[docs]
def early_lateral_energy_fraction(energy_decay_curve_omni,
energy_decay_curve_lateral):
r"""
Calculate the early lateral energy fraction.
The early lateral energy fraction :math:`J_\mathrm{LF}`
according to [#isoEarlyLat]_ is defined as the ratio between the
lateral sound energy captured with a figure of eight microphone
arriving between 5 ms and 80 ms and the total sound energy
captured with an omnidirectional microphone arriving within
the first 80 ms after the direct sound. It is a measure of the
apparent source width.
The parameter is defined as
.. math::
J_\mathrm{LF} =
\frac{
\displaystyle \int_{0.005}^{0.08} p_\mathrm{L}^2(t)\,\mathrm{d}t
}{
\displaystyle \int_{0}^{0.08} p^2(t)\,\mathrm{d}t
}
where :math:`p_\mathrm{L}(t)` is the lateral sound pressure measured with a
figure-eight microphone whose zero axis is oriented towards the source,
and :math:`p(t)` is the sound pressure measured at the same position
with an omnidirectional microphone.
Using the energy decay curves of the omnidirectional response
:math:`e(t)` and the lateral response :math:`e_\mathrm{L}(t)`, the
parameter can be computed efficiently as
.. math::
J_\mathrm{LF} =
\frac{
e_\mathrm{L}(0.005) - e_\mathrm{L}(0.08)
}{
e(0) - e(0.08)
}.
Parameters
----------
energy_decay_curve_omni : pyfar.TimeData
Energy decay curve of the room impulse response measured with an
omnidirectional microphone. The EDC must start at time zero.
energy_decay_curve_lateral : pyfar.TimeData
Energy decay curve of the room impulse response measured with a
figure-eight microphone oriented according to [#isoEarlyLat]_
(zero axis pointing towards the source). The EDC must start at
time zero.
Both EDCs must have identical ``signal.cshape``.
Returns
-------
Early Lateral Energy Fraction: numpy.ndarray
Early lateral energy fraction (:math:`J_\mathrm{LF}`),
shaped according to the channel shape of the input EDCs.
References
----------
.. [#isoEarlyLat] ISO 3382, Acoustics — Measurement of the reverberation
time of rooms with reference to other acoustical parameters.
"""
limits = np.array([0.0, 0.08, 0.005, 0.08])
return _energy_ratio(limits,
energy_decay_curve_omni,
energy_decay_curve_lateral)
[docs]
def definition(energy_decay_curve, early_time_limit=50):
r"""
Calculate the definition from the energy decay curve (EDC).
The definition parameter (D50) is defined as the ratio of early-to-total
arriving energy in an impulse response and is a measure for how defined
speech or music can be perceived in a room. The early-to-total boundary is
typically set at 50 ms (D50) [#isoDefinition]_.
Definition is calculated as:
.. math::
D_{t_\mathrm{e}} = \frac{
\displaystyle \int_0^{t_\mathrm{e}} p^2(t) \, dt
}{
\displaystyle \int_{0}^{\infty} p^2(t) \, dt
}
where :math:`t_e` is the early time limit and :math:`p(t)` is the pressure
of a room impulse response. Here, the definition is efficiently computed
from the EDC :math:`e(t)` directly by:
.. math::
D_{t_\mathrm{e}} = \frac{e(0) - e(t_\mathrm{e})}{e(0) - e(\infty)}
= 1 - \left( \frac{e(t_\mathrm{e})}{e(0)} \right),
where :math:`e(\infty) = 0` by definition of the EDC.
Parameters
----------
energy_decay_curve : pyfar.TimeData
Energy decay curve (EDC) of the room impulse response
(time-domain signal). The EDC must start at time zero.
early_time_limit : float, optional
Early time limit (:math:`t_\mathrm{e}`) in milliseconds. Defaults to
typical value 50 (D50) [#isoDefinition]_.
Returns
-------
definition : numpy.ndarray[float]
Definition index (early-to-total energy ratio),
shaped according to the channel shape of the input EDC.
References
----------
.. [#isoDefinition] ISO 3382, Acoustics — Measurement of the reverberation
time of rooms with reference to other acoustical parameters.
Examples
--------
Estimate the defintion from a real room impulse response filtered in
octave bands:
>>> import pyfar as pf
>>> import pyrato
...
>>> rir = pf.signals.files.room_impulse_response(sampling_rate=44100)
>>> rir = pf.dsp.filter.fractional_octave_bands(
>>> rir, num_fractions=1, frequency_range=(125, 20e3))
>>> edc = pyrato.edc.energy_decay_curve_lundeby(rir)
...
>>> D50 = pyrato.parameters.definition(edc, early_time_limit=50)
>>> D50
... [[0.25984852]
... [0.50208742]
... [0.66722359]
... [0.73528532]
... [0.87801455]
... [0.82757594]
... [0.86536142]
... [0.87374988]]
"""
if not isinstance(early_time_limit, (int, float)):
raise TypeError('early_time_limit must be a number.')
# Convert milliseconds to seconds
early_time_limit_sec = early_time_limit / 1000
limits = np.array([0.0, np.inf, 0.0, early_time_limit_sec])
return _energy_ratio(limits,
energy_decay_curve,
energy_decay_curve)
def _energy_ratio(limits, energy_decay_curve1, energy_decay_curve2):
r"""
Calculate the energy ratio for the time limits from two energy
decay curves (EDC).
A variety of room-acoustic parameters are defined by energy ratios derived
from one or two time-domain Energy Decay Curves (EDCs). These parameters
distinguish between time regions using the four provided limits, and some,
such as Strength (:math:`G`), Early lateral sound (:math:`J_\mathrm{LF}`),
and Late lateral sound (:math:`L_J`), require EDCs obtained from different
impulse-response measurements [#iso2]_.
Energy-Ratio is calculated as:
.. math::
ER = \frac{
\displaystyle \int_{lim3}^{lim4} p_2^2(t) \, dt
}{
\displaystyle \int_{lim1}^{lim2} p_1^2(t) \, dt
}
where :math:`[lim1, ..., lim4]` are the time limits and :math:`p(t)` is the
pressure of a room impulse response. Here, the energy ratio is
efficiently computed from the EDC :math:`e(t)` directly by:
.. math::
ER = \frac{
\displaystyle e_2(lim3) - e_2(lim4)
}{
\displaystyle e_1(lim1) - e_1(lim2)
}.
By definition, the EDC represents the remaining energy up to
:math:`\infty` and converges to zero, i.e., :math:`e(\infty)=0`.
Thus, ``np.inf`` may be used as a limit to select the full
remaining energy of an EDC.
Parameters
----------
limits : np.ndarray, list or tuple
Four time limits (:math:`t_e`) in seconds, shape (4,)
in ascending order. Limits must be either numerical or np.inf.
energy_decay_curve1 : pyfar.TimeData
Energy decay curve 1 (EDC1) of the room impulse response
(time-domain signal). The EDC must start at time zero.
energy_decay_curve2 : pyfar.TimeData
Energy decay curve 2 (EDC2) of the room impulse response
(time-domain signal). The EDC must start at time zero.
Returns
-------
energy ratio : numpy.ndarray[float]
energy-ratio index (early-to-late energy ratio),
shaped according to the channel shape of the input EDC.
References
----------
.. [#iso2] ISO 3382, Acoustics — Measurement of the reverberation time of
rooms with reference to other acoustical parameters.
"""
# Check input type
if not isinstance(energy_decay_curve1, pf.TimeData):
raise TypeError(
"energy_decay_curve1 must be a pyfar.TimeData or derived object.")
if not isinstance(energy_decay_curve2, pf.TimeData):
raise TypeError(
"energy_decay_curve2 must be a pyfar.TimeData or derived object.")
# Check that both EDCs have the same channel shape
if energy_decay_curve1.cshape != energy_decay_curve2.cshape:
raise ValueError(
f"energy_decay_curve1 and energy_decay_curve2 must have the same "
f"channel shape. Got cshape={energy_decay_curve1.cshape} and "
f"cshape={energy_decay_curve2.cshape}.")
if isinstance(limits, (list, tuple)):
limits = np.asarray(limits)
if not isinstance(limits, np.ndarray):
raise TypeError("limits must be a numpy ndarray, list, or tuple.")
# Check shape
if limits.shape != (4,):
raise ValueError(
"limits must have shape (4,), " \
"containing [lim1, lim2, lim3, lim4].",
)
# Check if limits are within valid time range
if (
np.any(limits[0:2] < 0) or
np.any((limits[0:2] > energy_decay_curve1.signal_length) &
(np.isfinite(limits[0:2])))
):
raise ValueError(
f"limits[0:2] must be between 0 and "
f"{energy_decay_curve1.signal_length} seconds or np.inf.",
)
if (
np.any(limits[2:4] < 0) or
np.any((limits[2:4] > energy_decay_curve2.signal_length) &
(np.isfinite(limits[2:4])))
):
raise ValueError(
f"limits[2:4] must be between 0 and "
f"{energy_decay_curve2.signal_length} seconds or np.inf.",
)
limits_denominator = limits[0:2]
limits_numerator = limits[2:4]
# finite limits mask
finite_limits_denominator = np.isfinite(limits_denominator)
finite_limits_numerator = np.isfinite(limits_numerator)
# Denominator values (EDC1, limits 0:2)
values_shape1 = energy_decay_curve1.cshape + (2,)
energy_decay_curve1_values = np.zeros(values_shape1)
if np.any(finite_limits_denominator):
limits_energy_decay_curve1_idx = np.atleast_1d(
energy_decay_curve1.find_nearest_time(
limits_denominator[finite_limits_denominator],
),
)
energy_decay_curve1_values[..., finite_limits_denominator] = \
energy_decay_curve1.time[..., limits_energy_decay_curve1_idx]
# Numerator values (EDC2, limits 2:4)
values_shape2 = energy_decay_curve2.cshape + (2,)
energy_decay_curve2_values = np.zeros(values_shape2)
if np.any(finite_limits_numerator):
limits_energy_decay_curve2_idx = np.atleast_1d(
energy_decay_curve2.find_nearest_time(
limits_numerator[finite_limits_numerator],
),
)
energy_decay_curve2_values[..., finite_limits_numerator] = \
energy_decay_curve2.time[..., limits_energy_decay_curve2_idx]
# using 'minus' because np.diff yields negative result
numerator = -np.diff(energy_decay_curve2_values, axis=-1)[..., 0]
denominator = -np.diff(energy_decay_curve1_values, axis=-1)[..., 0]
energy_ratio = numerator / denominator
return energy_ratio
[docs]
def late_lateral_sound_level(energy_decay_curve_free_field,
energy_decay_curve_room_lateral):
r"""
Calculate the late lateral sound level.
The late lateral sound level :math:`L_\mathrm{J}` quantifies the strength
of late-arriving lateral sound energy. According to ISO 3382-1 [#isoLLJ]_,
it is defined as the level ratio between the late lateral sound energy
captured with a figure-of-eight microphone and the total sound energy of a
reference impulse response measured with an omnidirectional microphone at
a distance of 10 m in the free field. It is a measure of listener
envelopment.
The parameter is defined as
.. math::
L_\mathrm{J} =
10 \log_{10}
\frac{
\displaystyle \int_{0.08}^{\infty} p_\mathrm{L}^2(t)\,\mathrm{d}t
}{
\displaystyle \int_{0}^{\infty} p_{10}^2(t)\,\mathrm{d}t
}
where :math:`p_\mathrm{L}(t)` is the lateral sound pressure measured with a
figure-eight microphone whose zero axis is oriented towards the source,
and :math:`p_{10}(t)` is the instantaneous sound pressure of the
impulse response measured with an omnidirectional microphone
at 10 m distance in the free field.
Using the energy decay curves of the reference response
:math:`e_{10}(t)` and the lateral response :math:`e_\mathrm{L}(t)`,
the parameter can be computed efficiently as
.. math::
L_\mathrm{J} =
10 \log_{10}
\frac{
e_\mathrm{L}(0.08)
}{
e_{10}(0)
}.
Parameters
----------
energy_decay_curve_free_field : pyfar.TimeData
Energy decay curve of the reference free field impulse response
measured vwith an omnidirectional microphone at 10 m distance in the
free field. The EDC must start at time zero.
energy_decay_curve_room_lateral : pyfar.TimeData
Energy decay curve of the room impulse response measured with a
figure-eight microphone oriented according to [#isoLLJ]_
(zero axis pointing towards the source). The EDC must start at
time zero.
Both EDCs must have identical ``signal.cshape``.
Returns
-------
Late Lateral Sound Level : numpy.ndarray
Late lateral sound level (:math:`L_\mathrm{J}`) in decibels,
shaped according to the channel shape of the input EDCs.
References
----------
.. [#isoLLJ] ISO 3382-1, ISO 3382, Acoustics — Measurement of the
reverberation time of rooms with reference to other acoustical
parameters.
"""
limits = np.array([0.0, np.inf, 0.08, np.inf])
return 10 * np.log10(_energy_ratio(limits,
energy_decay_curve_free_field,
energy_decay_curve_room_lateral))
[docs]
def sound_strength(energy_decay_curve_room,
energy_decay_curve_free_field):
r"""
Calculate the room-acoustic strength parameter (:math:`G`).
The strength parameter (:math:`G`) is defined as the ratio between the
total arriving sound energy and the total arriving sound energy
of a reference free-field response measured at 10 m with the same
source. It is a measure of the room-induced level amplification
at the receiver position [#isoStrength]_.
The parameter is defined as
.. math::
G =
10 \log_{10}
\frac{
\displaystyle \int_{0}^{\infty} p^2(t)\,dt
}{
\displaystyle \int_{0}^{\infty} p_\mathrm{10}^2(t)\,dt
}
where :math:`p(t)` is the room sound pressure and
:math:`p_\mathrm{10}(t)` is the reference free-field sound pressure
at 10 m measured with the same loudspeaker.
Using the energy decay curves of the room response
:math:`e(t)` and the reference response
:math:`e_\mathrm{10}(t)`, the
parameter can be computed efficiently as
.. math::
G =
10 \log_{10}
\frac{
e(0) - e(\infty)
}{
e_\mathrm{10}(0) - e_\mathrm{10}(\infty)
}.
Parameters
----------
energy_decay_curve_room : pyfar.TimeData
Energy decay curve of the room impulse response. The EDC must
start at time zero.
energy_decay_curve_free_field : pyfar.TimeData
Energy decay curve of the reference free-field impulse response
at 10 m. The EDC must start at time zero.
Both EDCs must have identical ``signal.cshape``.
Returns
-------
strength : numpy.ndarray
Strength parameter (:math:`G`) in decibels,
shaped according to the channel shape of the input EDC.
References
----------
.. [#isoStrength] ISO 3382, Acoustics — Measurement of the reverberation
time of rooms with reference to other acoustical parameters.
"""
limits = np.array([0.0, np.inf, 0.0, np.inf])
return 10*np.log10(_energy_ratio(limits,
energy_decay_curve_free_field,
energy_decay_curve_room))
[docs]
def speech_transmission_index_indirect(
rir, rir_type="acoustical", level=None, snr=np.inf,
ambient_noise_correction=True):
"""
Computes the Speech Transmission Index (STI) according to
IEC 60268-16:2020 using the indirect method.
The STI is a scalar measure between 0 (bad) and 1 (excellent)
describing speech intelligibility. It is computed from the
:py:func:`~modulation_transfer_function`, optionally including auditory
masking and ambient noise effects.
STI considers 7 octave bands from 125 Hz to 8 kHz
and 14 modulation frequencies between 0.63 Hz and
12.5 Hz [#iec]_.
Parameters
----------
rir : pyfar.Signal
Single or multi-channel room impulse response for which the
STI is computed. The room impulse response must be at least
1.6 seconds long. See [#iec]_, Section 6.2.
rir_type : ``'electrical'``, ``'acoustical'``
Determines whether input signals given by `rir` were obtained
acoustically or electrically. Default is ``'acoustical'``.
Auditory masking effects are only applied for acoustical
signals [#iec]_, section A.3.1.
level : numpy.ndarray or None, optional
Test signal level without noise in dB SPL, given per octave band
(125 Hz–8 kHz). Shape can be ``(7,)`` (7 octave bands: 125 Hz–8 kHz)
to use the same values for all channels, or ``(rir.cshape, 7)``
for channel-specific values.
If ``None`` is provided, auditory and ambient noise corrections are
omitted. See [#iec]_, section A.3.2.
snr : numpy.ndarray or float, optional
Signal-to-noise ratio in dB for each octave band (125 Hz–8 kHz).
Shape can be ``(7,)`` (7 octave bands: 125 Hz–8 kHz)
to use the same values for all channels, or ``(rir.cshape, 7)``
for channel-specific values.
Default is ``np.inf`` (no ambient noise). See [#iec]_, section 3.
ambient_noise_correction: bool, optional
Apply ambient noise correction according to [#iec]_,
Annex A.2.3. Default is ``True``. Only applied when
``level is not None`` and ``ambient_noise_correction is True``.
Returns
-------
sti : numpy.ndarray
Channel-wise Speech Transmission Index with shape ``rir.cshape``.
Notes
-----
pyfar uses octave-band filters of order 14 and the filter order
influences the MTF. Higher filter orders produce steeper roll-off and
a more ideal band separation, which affects the energy distribution
within each octave band and thus the computed modulation depth.
The influence on the broadband STI is negligible, since individual
deviations in the MTF tend to average out in the weighted sum over
octave bands and modulation frequencies.
References
----------
.. [#iec] IEC 60268-16:2020
Sound system equipment - Part 16: Objective rating of speech
intelligibility by speech transmission index.
"""
# check if input data is a pyfar.Signal
if not isinstance(rir, pf.Signal):
raise TypeError("Input data must be a pyfar.Signal.")
if np.ndim(snr) == 0:
snr = np.full((7,), float(snr))
snr = np.asarray(snr)
# Check if SNR has valid shape: (7,) uses same values for all
# channels, (cshape, 7) for channel-specific values
if snr.shape != (7,) and snr.shape != rir.cshape + (7,):
raise ValueError(
f"Input 'snr' must have shape (7,) or "
f"{rir.cshape + (7,)} (7 octave bands or matching "
f"rir channels + 7 octave bands), but got {snr.shape}.",
)
if np.any(snr < 20):
warnings.warn(
"Input 'snr' should be at least 20 dB for every octave band.",
stacklevel=2,
)
if level is not None:
# check if input level is a numpy array
if not isinstance(level, np.ndarray):
raise TypeError("Input 'level' must be a numpy array.")
level = np.asarray(level)
# Check if level has valid shape: (7,) uses same values for
# all channels, (cshape, 7) for channel-specific values
if level.shape != (7,) and level.shape != rir.cshape + (7,):
raise ValueError(
f"Input 'level' must have shape (7,) or "
f"{rir.cshape + (7,)} (7 octave bands or matching "
f"rir channels + 7 octave bands), but got {level.shape}.",
)
# check rir_type
if rir_type is None:
rir_type = "acoustical"
if rir_type not in ["electrical", "acoustical"]:
raise ValueError(f"rir_type is '{rir_type}' but must be "
"'electrical' or 'acoustical'.")
# Validate ambient_noise_correction parameter
if not isinstance(ambient_noise_correction, bool):
raise TypeError("ambient_noise_correction must be a boolean.")
sti = np.zeros(rir.cshape)
for ch in np.ndindex(rir.cshape):
# Use snr/level directly if shape is (7,), otherwise index by channel
current_snr = snr if snr.shape == (7,) else snr[ch]
current_level = (
None if level is None
else (level if level.shape == (7,) else level[ch])
)
mtf = modulation_transfer_function(rir[ch],
rir_type,
current_level,
current_snr,
ambient_noise_correction)
sti[ch] = _sti_calc(mtf)
return sti
[docs]
def modulation_transfer_function(
rir, rir_type="acoustical", level=None, snr=np.inf,
ambient_noise_correction=True):
"""
Compute the modulation transfer function (MTF) of an impulse response
according to IEC 60268-16:2020.
The MTF describes the reduction of modulation depth caused by the
transmission path. It is evaluated for 7 octave bands (125 Hz–8 kHz)
and 14 modulation frequencies (0.63 Hz–12.5 Hz) and forms the basis
of the Speech Transmission Index (STI).
Parameters
----------
rir : pyfar.Signal
Single-channel room impulse response with ``rir.cshape = (1, )``.
The room impulse response must be at least 1.6 seconds long.
rir_type : {'electrical', 'acoustical'}, optional
Determines whether input signals given by `rir` were obtained
acoustically or electrically. Default is ``'acoustical'``.
Auditory masking effects are only applied for acoustical
signals [#iecMTF]_, section A.3.1.
level : numpy.ndarray or None, optional
Test signal level without noise in dB SPL, given per octave band
(125 Hz–8 kHz). Shape must be ``(7,)`` (7 octave bands: 125 Hz–8 kHz).
If ``None`` is provided, auditory and ambient noise corrections are
omitted. Default is ``None``. See [#iecMTF]_, section A.3.2.
snr : numpy.ndarray or float, optional
Signal-to-noise ratio in dB for each octave band (125 Hz–8 kHz).
Shape must be ``(7,)`` (7 octave bands: 125 Hz–8 kHz).
Default is ``np.inf`` (no ambient noise). See [#iecMTF]_, section 3.
ambient_noise_correction : bool, optional
Apply ambient noise correction according to [#iecMTF]_,
Annex A.2.3. Default is ``True``. Only applied when
``level is not None`` and ``ambient_noise_correction is True``.
Returns
-------
mtf : numpy.ndarray
Modulation transfer function with shape ``(7, 14)``.
Notes
-----
pyfar uses octave-band filters of order 14 and the filter order
influences the MTF. Higher filter orders produce steeper roll-off and
a more ideal band separation, which affects the energy distribution
within each octave band and thus the computed modulation depth.
The influence on the broadband STI is negligible, since individual
deviations in the MTF tend to average out in the weighted sum over
octave bands and modulation frequencies.
References
----------
.. [#iecMTF] IEC 60268-16:2020
Sound system equipment - Part 16: Objective rating of speech
intelligibility by speech transmission index.
"""
# Check if input data is a pyfar.Signal
if not isinstance(rir, pf.Signal):
raise TypeError("Input data must be a pyfar.Signal.")
# Check if data is single-channel
if rir.cshape != (1,):
raise ValueError(
"Input must be a single-channel impulse response, "
f"but got shape {rir.cshape}.",
)
# Check if signal is at least 1.6 seconds long
# (IEC 60268-16:2020, Section 6.2)
if rir.n_samples / rir.sampling_rate < 1.6:
raise ValueError(
"Input signal must be at least 1.6 seconds long "
"(see IEC 60268-16:2020, Section 6.2).",
)
if np.ndim(snr) == 0:
snr = np.full((7,), float(snr))
if not isinstance(snr, np.ndarray):
raise TypeError("snr must be a numpy array.")
if snr.shape != (7,):
raise ValueError(
f"snr must have shape (7,) for 7 octave bands "
f"(125 Hz - 8 kHz), but got {snr.shape}.",
)
if np.any(snr < 20):
warnings.warn(
"Input 'snr' should be at least 20 dB for every "
"octave band.",
stacklevel=2,
)
if level is not None:
# check if input level is a numpy array
if not isinstance(level, np.ndarray):
raise TypeError("level must be a numpy array or None.")
if level.shape != (7,):
raise ValueError(
f"Level must have shape (7,) for 7 octave bands "
f"(125 Hz - 8 kHz), but got {level.shape}.",
)
if np.any(level < 1):
warnings.warn(
"Input 'level' should be at least 1 dB for every "
"octave band.",
stacklevel=2,
)
# check rir_type
if rir_type is None:
rir_type = "acoustical"
if rir_type not in ["electrical", "acoustical"]:
raise ValueError(f"Data_type is '{rir_type}' but must be "
"'electrical' or 'acoustical'.")
# Validate ambient_noise_correction parameter
if not isinstance(ambient_noise_correction, bool):
raise TypeError("ambient_noise_correction must be a boolean.")
rir_oct = pf.dsp.filter.fractional_octave_bands(
rir, num_fractions=1, frequency_range=(125, 8000))
# Modulation frequencies (fm) in Hz for MTF calculation
# Defined in IEC 60268-16:2020, Section A.1.4
modulation_frequencies = np.array(
[0.63, 0.80, 1.0, 1.25, 1.60, 2.0, 2.5,
3.15, 4.0, 5.0, 6.3, 8.0, 10.0, 12.5],
)
modulation_frequencies = np.tile(
modulation_frequencies, (rir_oct.cshape[0], 1))
energy = rir_oct.time ** 2
# MTF calculation (IEC 60268-16:2020, Section 6.1)
term_exp = np.exp(
-2j * np.pi * modulation_frequencies[:, :, None] * rir_oct.times)
numerator = np.abs(np.sum(energy * term_exp, axis=-1))
denominator = np.sum(energy, axis=-1)
mtf = numerator / denominator
# Ambient noise correction (IEC 60268-16:2020, Annex A.2.3)
mtf *= 1 / (1 + 10 ** (-snr[:, None] / 10))
if level is not None and ambient_noise_correction is True:
mtf = _ambient_noise_correction(mtf, level, snr, rir_type)
# Clip MTF to valid range [0, 1] (IEC 60268-16:2020, Section A.2.4)
return np.clip(mtf, 0, 1)
def _ambient_noise_correction(mtf, level, snr, rir_type):
r"""
Weight the MTF by auditory masking, ambient noise, and hearing threshold.
The basic MTF (after SNR correction) only accounts for reverberation and
the signal-to-noise ratio. This function applies an additional perceptual
weighting defined in [#iecANC]_, Annex A.2.4:
.. math::
m''(f_m, k) = m'(f_m, k) \cdot
\frac{I_k}{I_k + I_{\mathrm{am},k} + I_{\mathrm{rt},k}}
where :math:`f_m` is the modulation frequency, :math:`k` is the octave band
index, :math:`m'(f_m, k)` is the MTF after the ambient noise correction
(Annex A.2.3), and the three intensity terms are:
- :math:`I_k = I_{z,k} + I_{n,k}` — total acoustic intensity level
for octave band :math:`k`, i.e. test signal (:math:`I_{z,k}`) plus
background noise (:math:`I_{n,k}`).
- :math:`I_{\mathrm{am},k}` — masking intensity due to the
level-dependent auditory masking effect acting on octave band :math:`k`
from band :math:`k-1`, according to [#iecANC]_, Annex A.4.2
(acoustical signals only).
- :math:`I_{\mathrm{rt},k}` — acoustic intensity level of the
reception threshold for octave band :math:`k`, according to
[#iecANC]_, Annex A.4.3 (acoustical signals only).
For electrical signals the auditory terms are zero and this correction
has no effect.
Parameters
----------
mtf : numpy.ndarray
Modulation transfer function after SNR correction, shape ``(7, 14)``.
level : numpy.ndarray
Test signal level without noise in dB SPL per octave band,
shape ``(7,)``.
snr : numpy.ndarray
Signal-to-noise ratio in dB per octave band, shape ``(7,)``.
rir_type : {'electrical', 'acoustical'}
Auditory masking and hearing threshold corrections are only applied
for ``'acoustical'`` signals.
Returns
-------
mtf : numpy.ndarray
Corrected modulation transfer function, shape ``(7, 14)``.
References
----------
.. [#iecANC] IEC 60268-16:2020
Sound system equipment - Part 16: Objective rating of speech
intelligibility by speech transmission index.
"""
# Total intensity per octave band: signal + noise
# (IEC 60268-16:2020, Annex A.2.4)
Ik = 10 ** (level / 10) + 10 ** ((level - snr) / 10)
# Auditory masking and reception threshold terms are zero for
# electrical signals (IEC 60268-16:2020, Annex A.4.2, A.4.3)
I_amk = np.zeros(7)
I_rt = np.zeros((7, 1))
if rir_type == "acoustical":
# Level-dependent auditory masking factor
# (IEC 60268-16:2020, Annex A.4.2)
amdb = level.copy()
amdb[amdb < 63] = 0.5*amdb[amdb < 63] - 65
amdb[(63 <= amdb) & (amdb < 67)] = 1.8 * amdb[(63 <= amdb) &
(amdb < 67)] - 146.9
amdb[(67 <= amdb) & (amdb < 100)] = 0.5 * amdb[(67 <= amdb) &
(amdb < 100)] - 59.8
amdb[100 <= amdb] = amdb[100 <= amdb] - 10
a = 10**(amdb/10)
# Masking intensity (IEC 60268-16:2020, Annex A.4.2)
I_k1 = np.roll(Ik, 1)
I_amk = I_k1 * a
I_amk[0] = 0 # No masking for lowest band (125 Hz)
# Absolute speech reception threshold
# (IEC 60268-16:2020, Annex A.4.3)
A_k = np.array([[46, 27, 12, 6.5, 7.5, 8, 12]]).T
I_rt = 10 ** (A_k / 10)
# Auditory masking + threshold correction (IEC 60268-16:2020, Annex A.2.4)
mtf = mtf * Ik[:, None] / (Ik[:, None] + I_amk[:, None] + I_rt)
return mtf
def _sti_calc(mtf):
"""
Computes the Speech Transmission Index (STI) from the MTF.
Parameters
----------
mtf : numpy.ndarray
Modulation transfer function with shape ``(7, 14)`` for which
the STI is computed.
Returns
-------
sti : float
Speech Transmission Index.
"""
# Check if mtf is a numpy array
if not isinstance(mtf, np.ndarray):
raise TypeError("mtf must be a numpy array.")
# Check if mtf has the correct shape
if mtf.shape != (7, 14):
raise ValueError(
f"mtf must have shape (7, 14) for 7 octave bands and "
f"14 modulation frequencies, but got {mtf.shape}.",
)
# Effective SNR from MTF (IEC 60268-16:2020, Annex A.2.1)
with np.errstate(divide='ignore'):
snr_eff = 10 * np.log10(mtf / (1 - mtf))
# Clip SNR to [-15, 15] dB range (IEC 60268-16:2020, Annex A.2.1)
snr_eff = np.clip(snr_eff, -15, 15)
# Transmission index (TI) for each octave band and modulation
# frequency (IEC 60268-16:2020, Annex A.2.1)
ti = (snr_eff + 15) / 30
# Modulation transmission index per octave band: average over
# modulation frequencies (IEC 60268-16:2020, Annex A.2.1)
mti = np.mean(ti, axis=-1)
# STI Octave evaluation factors (IEC 60268-16:2020, Table A.1)
alpha = np.array([0.085, 0.127, 0.230, 0.233, 0.309, 0.224, 0.173])
beta = np.array([0.085, 0.078, 0.065, 0.011, 0.047, 0.095])
# STI (IEC 60268-16:2020, Annex A.2.1)
sti = np.sum(alpha * mti) - np.sum(beta * np.sqrt(mti[:-1] * mti[1:]))
# limit STI to 1 (IEC 60268-16:2020, Section A.2.1)
return min(sti, 1.0)