1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #ifndef MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_ 12 #define MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_ 13 14 #include <stddef.h> 15 #include <stdint.h> 16 #include <memory> 17 18 #include "modules/audio_processing/vad/common.h" // AudioFeatures, kSampleR... 19 20 namespace webrtc { 21 22 class PoleZeroFilter; 23 24 class VadAudioProc { 25 public: 26 // Forward declare iSAC structs. 27 struct PitchAnalysisStruct; 28 struct PreFiltBankstr; 29 30 VadAudioProc(); 31 ~VadAudioProc(); 32 33 int ExtractFeatures(const int16_t* audio_frame, 34 size_t length, 35 AudioFeatures* audio_features); 36 37 static const size_t kDftSize = 512; 38 39 private: 40 void PitchAnalysis(double* pitch_gains, double* pitch_lags_hz, size_t length); 41 void SubframeCorrelation(double* corr, 42 size_t length_corr, 43 size_t subframe_index); 44 void GetLpcPolynomials(double* lpc, size_t length_lpc); 45 void FindFirstSpectralPeaks(double* f_peak, size_t length_f_peak); 46 void Rms(double* rms, size_t length_rms); 47 void ResetBuffer(); 48 49 // To compute spectral peak we perform LPC analysis to get spectral envelope. 50 // For every 30 ms we compute 3 spectral peak there for 3 LPC analysis. 51 // LPC is computed over 15 ms of windowed audio. For every 10 ms sub-frame 52 // we need 5 ms of past signal to create the input of LPC analysis. 53 enum : size_t { 54 kNumPastSignalSamples = static_cast<size_t>(kSampleRateHz / 200) 55 }; 56 57 // TODO(turajs): maybe defining this at a higher level (maybe enum) so that 58 // all the code recognize it as "no-error." 59 enum : int { kNoError = 0 }; 60 61 enum : size_t { kNum10msSubframes = 3 }; 62 enum : size_t { 63 kNumSubframeSamples = static_cast<size_t>(kSampleRateHz / 100) 64 }; 65 enum : size_t { 66 // Samples in 30 ms @ given sampling rate. 67 kNumSamplesToProcess = kNum10msSubframes * kNumSubframeSamples 68 }; 69 enum : size_t { 70 kBufferLength = kNumPastSignalSamples + kNumSamplesToProcess 71 }; 72 enum : size_t { kIpLength = kDftSize >> 1 }; 73 enum : size_t { kWLength = kDftSize >> 1 }; 74 enum : size_t { kLpcOrder = 16 }; 75 76 size_t ip_[kIpLength]; 77 float w_fft_[kWLength]; 78 79 // A buffer of 5 ms (past audio) + 30 ms (one iSAC frame ). 80 float audio_buffer_[kBufferLength]; 81 size_t num_buffer_samples_; 82 83 double log_old_gain_; 84 double old_lag_; 85 86 std::unique_ptr<PitchAnalysisStruct> pitch_analysis_handle_; 87 std::unique_ptr<PreFiltBankstr> pre_filter_handle_; 88 std::unique_ptr<PoleZeroFilter> high_pass_filter_; 89 }; 90 91 } // namespace webrtc 92 93 #endif // MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_ 94