1 /*
2  *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SPECTRAL_FEATURES_INTERNAL_H_
12 #define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SPECTRAL_FEATURES_INTERNAL_H_
13 
14 #include <stddef.h>
15 
16 #include <array>
17 #include <vector>
18 
19 #include "api/array_view.h"
20 #include "modules/audio_processing/agc2/rnn_vad/common.h"
21 
22 namespace webrtc {
23 namespace rnn_vad {
24 
25 // At a sample rate of 24 kHz, the last 3 Opus bands are beyond the Nyquist
26 // frequency. However, band #19 gets the contributions from band #18 because
27 // of the symmetric triangular filter with peak response at 12 kHz.
28 constexpr int kOpusBands24kHz = 20;
29 static_assert(kOpusBands24kHz < kNumBands,
30               "The number of bands at 24 kHz must be less than those defined "
31               "in the Opus scale at 48 kHz.");
32 
33 // Number of FFT frequency bins covered by each band in the Opus scale at a
34 // sample rate of 24 kHz for 20 ms frames.
35 // Declared here for unit testing.
GetOpusScaleNumBins24kHz20ms()36 constexpr std::array<int, kOpusBands24kHz - 1> GetOpusScaleNumBins24kHz20ms() {
37   return {4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 16, 16, 16, 24, 24, 32, 48};
38 }
39 
40 // TODO(bugs.webrtc.org/10480): Move to a separate file.
41 // Class to compute band-wise spectral features in the Opus perceptual scale
42 // for 20 ms frames sampled at 24 kHz. The analysis methods apply triangular
43 // filters with peak response at the each band boundary.
44 class SpectralCorrelator {
45  public:
46   // Ctor.
47   SpectralCorrelator();
48   SpectralCorrelator(const SpectralCorrelator&) = delete;
49   SpectralCorrelator& operator=(const SpectralCorrelator&) = delete;
50   ~SpectralCorrelator();
51 
52   // Computes the band-wise spectral auto-correlations.
53   // |x| must:
54   //  - have size equal to |kFrameSize20ms24kHz|;
55   //  - be encoded as vectors of interleaved real-complex FFT coefficients
56   //    where x[1] = y[1] = 0 (the Nyquist frequency coefficient is omitted).
57   void ComputeAutoCorrelation(
58       rtc::ArrayView<const float> x,
59       rtc::ArrayView<float, kOpusBands24kHz> auto_corr) const;
60 
61   // Computes the band-wise spectral cross-correlations.
62   // |x| and |y| must:
63   //  - have size equal to |kFrameSize20ms24kHz|;
64   //  - be encoded as vectors of interleaved real-complex FFT coefficients where
65   //    x[1] = y[1] = 0 (the Nyquist frequency coefficient is omitted).
66   void ComputeCrossCorrelation(
67       rtc::ArrayView<const float> x,
68       rtc::ArrayView<const float> y,
69       rtc::ArrayView<float, kOpusBands24kHz> cross_corr) const;
70 
71  private:
72   const std::vector<float> weights_;  // Weights for each Fourier coefficient.
73 };
74 
75 // TODO(bugs.webrtc.org/10480): Move to anonymous namespace in
76 // spectral_features.cc. Given a vector of Opus-bands energy coefficients,
77 // computes the log magnitude spectrum applying smoothing both over time and
78 // over frequency. Declared here for unit testing.
79 void ComputeSmoothedLogMagnitudeSpectrum(
80     rtc::ArrayView<const float> bands_energy,
81     rtc::ArrayView<float, kNumBands> log_bands_energy);
82 
83 // TODO(bugs.webrtc.org/10480): Move to anonymous namespace in
84 // spectral_features.cc. Creates a DCT table for arrays having size equal to
85 // |kNumBands|. Declared here for unit testing.
86 std::array<float, kNumBands * kNumBands> ComputeDctTable();
87 
88 // TODO(bugs.webrtc.org/10480): Move to anonymous namespace in
89 // spectral_features.cc. Computes DCT for |in| given a pre-computed DCT table.
90 // In-place computation is not allowed and |out| can be smaller than |in| in
91 // order to only compute the first DCT coefficients. Declared here for unit
92 // testing.
93 void ComputeDct(rtc::ArrayView<const float> in,
94                 rtc::ArrayView<const float, kNumBands * kNumBands> dct_table,
95                 rtc::ArrayView<float> out);
96 
97 }  // namespace rnn_vad
98 }  // namespace webrtc
99 
100 #endif  // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SPECTRAL_FEATURES_INTERNAL_H_
101