1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #ifndef MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
12 #define MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
13
14 #include <stddef.h>
15 #include <stdint.h>
16
17 #include <memory>
18
19 #include "modules/audio_processing/vad/common.h" // AudioFeatures, kSampleR...
20
~PoleZeroFilter()21 namespace webrtc {
22
23 class PoleZeroFilter;
24
25 class VadAudioProc {
26 public:
27 // Forward declare iSAC structs.
28 struct PitchAnalysisStruct;
29 struct PreFiltBankstr;
30
31 VadAudioProc();
32 ~VadAudioProc();
33
34 int ExtractFeatures(const int16_t* audio_frame,
35 size_t length,
36 AudioFeatures* audio_features);
37
38 static const size_t kDftSize = 512;
39
40 private:
41 void PitchAnalysis(double* pitch_gains, double* pitch_lags_hz, size_t length);
42 void SubframeCorrelation(double* corr,
43 size_t length_corr,
44 size_t subframe_index);
45 void GetLpcPolynomials(double* lpc, size_t length_lpc);
46 void FindFirstSpectralPeaks(double* f_peak, size_t length_f_peak);
47 void Rms(double* rms, size_t length_rms);
48 void ResetBuffer();
49
50 // To compute spectral peak we perform LPC analysis to get spectral envelope.
51 // For every 30 ms we compute 3 spectral peak there for 3 LPC analysis.
52 // LPC is computed over 15 ms of windowed audio. For every 10 ms sub-frame
53 // we need 5 ms of past signal to create the input of LPC analysis.
54 enum : size_t {
55 kNumPastSignalSamples = static_cast<size_t>(kSampleRateHz / 200)
56 };
57
58 // TODO(turajs): maybe defining this at a higher level (maybe enum) so that
59 // all the code recognize it as "no-error."
60 enum : int { kNoError = 0 };
61
62 enum : size_t { kNum10msSubframes = 3 };
63 enum : size_t {
64 kNumSubframeSamples = static_cast<size_t>(kSampleRateHz / 100)
65 };
66 enum : size_t {
67 // Samples in 30 ms @ given sampling rate.
68 kNumSamplesToProcess = kNum10msSubframes * kNumSubframeSamples
69 };
70 enum : size_t {
71 kBufferLength = kNumPastSignalSamples + kNumSamplesToProcess
72 };
73 enum : size_t { kIpLength = kDftSize >> 1 };
74 enum : size_t { kWLength = kDftSize >> 1 };
75 enum : size_t { kLpcOrder = 16 };
76
77 size_t ip_[kIpLength];
78 float w_fft_[kWLength];
79
80 // A buffer of 5 ms (past audio) + 30 ms (one iSAC frame ).
81 float audio_buffer_[kBufferLength];
82 size_t num_buffer_samples_;
83
84 double log_old_gain_;
85 double old_lag_;
86
87 std::unique_ptr<PitchAnalysisStruct> pitch_analysis_handle_;
88 std::unique_ptr<PreFiltBankstr> pre_filter_handle_;
89 std::unique_ptr<PoleZeroFilter> high_pass_filter_;
90 };
91
92 } // namespace webrtc
93
94 #endif // MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
95