1 /*
2 * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SPECTRAL_FEATURES_INTERNAL_H_
12 #define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SPECTRAL_FEATURES_INTERNAL_H_
13
14 #include <stddef.h>
15
16 #include <array>
17 #include <vector>
18
19 #include "api/array_view.h"
20 #include "modules/audio_processing/agc2/rnn_vad/common.h"
21
22 namespace webrtc {
23 namespace rnn_vad {
24
25 // At a sample rate of 24 kHz, the last 3 Opus bands are beyond the Nyquist
26 // frequency. However, band #19 gets the contributions from band #18 because
27 // of the symmetric triangular filter with peak response at 12 kHz.
28 constexpr int kOpusBands24kHz = 20;
29 static_assert(kOpusBands24kHz < kNumBands,
30 "The number of bands at 24 kHz must be less than those defined "
31 "in the Opus scale at 48 kHz.");
32
33 // Number of FFT frequency bins covered by each band in the Opus scale at a
34 // sample rate of 24 kHz for 20 ms frames.
35 // Declared here for unit testing.
GetOpusScaleNumBins24kHz20ms()36 constexpr std::array<int, kOpusBands24kHz - 1> GetOpusScaleNumBins24kHz20ms() {
37 return {4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 16, 16, 16, 24, 24, 32, 48};
38 }
39
40 // TODO(bugs.webrtc.org/10480): Move to a separate file.
41 // Class to compute band-wise spectral features in the Opus perceptual scale
42 // for 20 ms frames sampled at 24 kHz. The analysis methods apply triangular
43 // filters with peak response at the each band boundary.
44 class SpectralCorrelator {
45 public:
46 // Ctor.
47 SpectralCorrelator();
48 SpectralCorrelator(const SpectralCorrelator&) = delete;
49 SpectralCorrelator& operator=(const SpectralCorrelator&) = delete;
50 ~SpectralCorrelator();
51
52 // Computes the band-wise spectral auto-correlations.
53 // |x| must:
54 // - have size equal to |kFrameSize20ms24kHz|;
55 // - be encoded as vectors of interleaved real-complex FFT coefficients
56 // where x[1] = y[1] = 0 (the Nyquist frequency coefficient is omitted).
57 void ComputeAutoCorrelation(
58 rtc::ArrayView<const float> x,
59 rtc::ArrayView<float, kOpusBands24kHz> auto_corr) const;
60
61 // Computes the band-wise spectral cross-correlations.
62 // |x| and |y| must:
63 // - have size equal to |kFrameSize20ms24kHz|;
64 // - be encoded as vectors of interleaved real-complex FFT coefficients where
65 // x[1] = y[1] = 0 (the Nyquist frequency coefficient is omitted).
66 void ComputeCrossCorrelation(
67 rtc::ArrayView<const float> x,
68 rtc::ArrayView<const float> y,
69 rtc::ArrayView<float, kOpusBands24kHz> cross_corr) const;
70
71 private:
72 const std::vector<float> weights_; // Weights for each Fourier coefficient.
73 };
74
75 // TODO(bugs.webrtc.org/10480): Move to anonymous namespace in
76 // spectral_features.cc. Given a vector of Opus-bands energy coefficients,
77 // computes the log magnitude spectrum applying smoothing both over time and
78 // over frequency. Declared here for unit testing.
79 void ComputeSmoothedLogMagnitudeSpectrum(
80 rtc::ArrayView<const float> bands_energy,
81 rtc::ArrayView<float, kNumBands> log_bands_energy);
82
83 // TODO(bugs.webrtc.org/10480): Move to anonymous namespace in
84 // spectral_features.cc. Creates a DCT table for arrays having size equal to
85 // |kNumBands|. Declared here for unit testing.
86 std::array<float, kNumBands * kNumBands> ComputeDctTable();
87
88 // TODO(bugs.webrtc.org/10480): Move to anonymous namespace in
89 // spectral_features.cc. Computes DCT for |in| given a pre-computed DCT table.
90 // In-place computation is not allowed and |out| can be smaller than |in| in
91 // order to only compute the first DCT coefficients. Declared here for unit
92 // testing.
93 void ComputeDct(rtc::ArrayView<const float> in,
94 rtc::ArrayView<const float, kNumBands * kNumBands> dct_table,
95 rtc::ArrayView<float> out);
96
97 } // namespace rnn_vad
98 } // namespace webrtc
99
100 #endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SPECTRAL_FEATURES_INTERNAL_H_
101