1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
12 #define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
13 
14 #include <memory>
15 
16 #include "webrtc/modules/audio_processing/vad/common.h"
17 #include "webrtc/typedefs.h"
18 
19 namespace webrtc {
20 
21 class AudioFrame;
22 class PoleZeroFilter;
23 
24 class VadAudioProc {
25  public:
26   // Forward declare iSAC structs.
27   struct PitchAnalysisStruct;
28   struct PreFiltBankstr;
29 
30   VadAudioProc();
31   ~VadAudioProc();
32 
33   int ExtractFeatures(const int16_t* audio_frame,
34                       size_t length,
35                       AudioFeatures* audio_features);
36 
37   static const size_t kDftSize = 512;
38 
39  private:
40   void PitchAnalysis(double* pitch_gains, double* pitch_lags_hz, size_t length);
41   void SubframeCorrelation(double* corr,
42                            size_t length_corr,
43                            size_t subframe_index);
44   void GetLpcPolynomials(double* lpc, size_t length_lpc);
45   void FindFirstSpectralPeaks(double* f_peak, size_t length_f_peak);
46   void Rms(double* rms, size_t length_rms);
47   void ResetBuffer();
48 
49   // To compute spectral peak we perform LPC analysis to get spectral envelope.
50   // For every 30 ms we compute 3 spectral peak there for 3 LPC analysis.
51   // LPC is computed over 15 ms of windowed audio. For every 10 ms sub-frame
52   // we need 5 ms of past signal to create the input of LPC analysis.
53   enum : size_t {
54     kNumPastSignalSamples = static_cast<size_t>(kSampleRateHz / 200)
55   };
56 
57   // TODO(turajs): maybe defining this at a higher level (maybe enum) so that
58   // all the code recognize it as "no-error."
59   enum : int { kNoError = 0 };
60 
61   enum : size_t { kNum10msSubframes = 3 };
62   enum : size_t {
63     kNumSubframeSamples = static_cast<size_t>(kSampleRateHz / 100)
64   };
65   enum : size_t {
66     // Samples in 30 ms @ given sampling rate.
67     kNumSamplesToProcess = kNum10msSubframes * kNumSubframeSamples
68   };
69   enum : size_t {
70     kBufferLength = kNumPastSignalSamples + kNumSamplesToProcess
71   };
72   enum : size_t { kIpLength = kDftSize >> 1 };
73   enum : size_t { kWLength = kDftSize >> 1 };
74   enum : size_t { kLpcOrder = 16 };
75 
76   size_t ip_[kIpLength];
77   float w_fft_[kWLength];
78 
79   // A buffer of 5 ms (past audio) + 30 ms (one iSAC frame ).
80   float audio_buffer_[kBufferLength];
81   size_t num_buffer_samples_;
82 
83   double log_old_gain_;
84   double old_lag_;
85 
86   std::unique_ptr<PitchAnalysisStruct> pitch_analysis_handle_;
87   std::unique_ptr<PreFiltBankstr> pre_filter_handle_;
88   std::unique_ptr<PoleZeroFilter> high_pass_filter_;
89 };
90 
91 }  // namespace webrtc
92 
93 #endif  // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
94