1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #ifndef WEBRTC_MODULES_INCLUDE_MODULE_COMMON_TYPES_H_
12 #define WEBRTC_MODULES_INCLUDE_MODULE_COMMON_TYPES_H_
13 
14 #include <assert.h>
15 #include <string.h>  // memcpy
16 
17 #include <algorithm>
18 #include <limits>
19 
20 #include "webrtc/base/constructormagic.h"
21 #include "webrtc/common_types.h"
22 #include "webrtc/common_video/rotation.h"
23 #include "webrtc/typedefs.h"
24 
25 namespace webrtc {
26 
27 struct RTPAudioHeader {
28   uint8_t numEnergy;                  // number of valid entries in arrOfEnergy
29   uint8_t arrOfEnergy[kRtpCsrcSize];  // one energy byte (0-9) per channel
30   bool isCNG;                         // is this CNG
31   uint8_t channel;                    // number of channels 2 = stereo
32 };
33 
34 const int16_t kNoPictureId = -1;
35 const int16_t kMaxOneBytePictureId = 0x7F;    // 7 bits
36 const int16_t kMaxTwoBytePictureId = 0x7FFF;  // 15 bits
37 const int16_t kNoTl0PicIdx = -1;
38 const uint8_t kNoTemporalIdx = 0xFF;
39 const uint8_t kNoSpatialIdx = 0xFF;
40 const uint8_t kNoGofIdx = 0xFF;
41 const uint8_t kNumVp9Buffers = 8;
42 const size_t kMaxVp9RefPics = 3;
43 const size_t kMaxVp9FramesInGof = 0xFF;  // 8 bits
44 const size_t kMaxVp9NumberOfSpatialLayers = 8;
45 const int kNoKeyIdx = -1;
46 
47 struct RTPVideoHeaderVP8 {
InitRTPVideoHeaderVP8RTPVideoHeaderVP848   void InitRTPVideoHeaderVP8() {
49     nonReference = false;
50     pictureId = kNoPictureId;
51     tl0PicIdx = kNoTl0PicIdx;
52     temporalIdx = kNoTemporalIdx;
53     layerSync = false;
54     keyIdx = kNoKeyIdx;
55     partitionId = 0;
56     beginningOfPartition = false;
57   }
58 
59   bool nonReference;          // Frame is discardable.
60   int16_t pictureId;          // Picture ID index, 15 bits;
61                               // kNoPictureId if PictureID does not exist.
62   int16_t tl0PicIdx;          // TL0PIC_IDX, 8 bits;
63                               // kNoTl0PicIdx means no value provided.
64   uint8_t temporalIdx;        // Temporal layer index, or kNoTemporalIdx.
65   bool layerSync;             // This frame is a layer sync frame.
66                               // Disabled if temporalIdx == kNoTemporalIdx.
67   int keyIdx;                 // 5 bits; kNoKeyIdx means not used.
68   int partitionId;            // VP8 partition ID
69   bool beginningOfPartition;  // True if this packet is the first
70                               // in a VP8 partition. Otherwise false
71 };
72 
73 enum TemporalStructureMode {
74   kTemporalStructureMode1,    // 1 temporal layer structure - i.e., IPPP...
75   kTemporalStructureMode2,    // 2 temporal layers 0-1-0-1...
76   kTemporalStructureMode3     // 3 temporal layers 0-2-1-2-0-2-1-2...
77 };
78 
79 struct GofInfoVP9 {
SetGofInfoVP9GofInfoVP980   void SetGofInfoVP9(TemporalStructureMode tm) {
81     switch (tm) {
82       case kTemporalStructureMode1:
83         num_frames_in_gof = 1;
84         temporal_idx[0] = 0;
85         temporal_up_switch[0] = false;
86         num_ref_pics[0] = 1;
87         pid_diff[0][0] = 1;
88         break;
89       case kTemporalStructureMode2:
90         num_frames_in_gof = 2;
91         temporal_idx[0] = 0;
92         temporal_up_switch[0] = false;
93         num_ref_pics[0] = 1;
94         pid_diff[0][0] = 2;
95 
96         temporal_idx[1] = 1;
97         temporal_up_switch[1] = true;
98         num_ref_pics[1] = 1;
99         pid_diff[1][0] = 1;
100         break;
101       case kTemporalStructureMode3:
102         num_frames_in_gof = 4;
103         temporal_idx[0] = 0;
104         temporal_up_switch[0] = false;
105         num_ref_pics[0] = 1;
106         pid_diff[0][0] = 4;
107 
108         temporal_idx[1] = 2;
109         temporal_up_switch[1] = true;
110         num_ref_pics[1] = 1;
111         pid_diff[1][0] = 1;
112 
113         temporal_idx[2] = 1;
114         temporal_up_switch[2] = true;
115         num_ref_pics[2] = 1;
116         pid_diff[2][0] = 2;
117 
118         temporal_idx[3] = 2;
119         temporal_up_switch[3] = false;
120         num_ref_pics[3] = 2;
121         pid_diff[3][0] = 1;
122         pid_diff[3][1] = 2;
123         break;
124       default:
125         assert(false);
126     }
127   }
128 
CopyGofInfoVP9GofInfoVP9129   void CopyGofInfoVP9(const GofInfoVP9& src) {
130     num_frames_in_gof = src.num_frames_in_gof;
131     for (size_t i = 0; i < num_frames_in_gof; ++i) {
132       temporal_idx[i] = src.temporal_idx[i];
133       temporal_up_switch[i] = src.temporal_up_switch[i];
134       num_ref_pics[i] = src.num_ref_pics[i];
135       for (uint8_t r = 0; r < num_ref_pics[i]; ++r) {
136         pid_diff[i][r] = src.pid_diff[i][r];
137       }
138     }
139   }
140 
141   size_t num_frames_in_gof;
142   uint8_t temporal_idx[kMaxVp9FramesInGof];
143   bool temporal_up_switch[kMaxVp9FramesInGof];
144   uint8_t num_ref_pics[kMaxVp9FramesInGof];
145   uint8_t pid_diff[kMaxVp9FramesInGof][kMaxVp9RefPics];
146 };
147 
148 struct RTPVideoHeaderVP9 {
InitRTPVideoHeaderVP9RTPVideoHeaderVP9149   void InitRTPVideoHeaderVP9() {
150     inter_pic_predicted = false;
151     flexible_mode = false;
152     beginning_of_frame = false;
153     end_of_frame = false;
154     ss_data_available = false;
155     picture_id = kNoPictureId;
156     max_picture_id = kMaxTwoBytePictureId;
157     tl0_pic_idx = kNoTl0PicIdx;
158     temporal_idx = kNoTemporalIdx;
159     spatial_idx = kNoSpatialIdx;
160     temporal_up_switch = false;
161     inter_layer_predicted = false;
162     gof_idx = kNoGofIdx;
163     num_ref_pics = 0;
164     num_spatial_layers = 1;
165   }
166 
167   bool inter_pic_predicted;  // This layer frame is dependent on previously
168                              // coded frame(s).
169   bool flexible_mode;        // This frame is in flexible mode.
170   bool beginning_of_frame;   // True if this packet is the first in a VP9 layer
171                              // frame.
172   bool end_of_frame;  // True if this packet is the last in a VP9 layer frame.
173   bool ss_data_available;  // True if SS data is available in this payload
174                            // descriptor.
175   int16_t picture_id;      // PictureID index, 15 bits;
176                            // kNoPictureId if PictureID does not exist.
177   int16_t max_picture_id;  // Maximum picture ID index; either 0x7F or 0x7FFF;
178   int16_t tl0_pic_idx;     // TL0PIC_IDX, 8 bits;
179                            // kNoTl0PicIdx means no value provided.
180   uint8_t temporal_idx;    // Temporal layer index, or kNoTemporalIdx.
181   uint8_t spatial_idx;     // Spatial layer index, or kNoSpatialIdx.
182   bool temporal_up_switch;  // True if upswitch to higher frame rate is possible
183                             // starting from this frame.
184   bool inter_layer_predicted;  // Frame is dependent on directly lower spatial
185                                // layer frame.
186 
187   uint8_t gof_idx;  // Index to predefined temporal frame info in SS data.
188 
189   uint8_t num_ref_pics;  // Number of reference pictures used by this layer
190                          // frame.
191   uint8_t pid_diff[kMaxVp9RefPics];  // P_DIFF signaled to derive the PictureID
192                                      // of the reference pictures.
193   int16_t ref_picture_id[kMaxVp9RefPics];  // PictureID of reference pictures.
194 
195   // SS data.
196   size_t num_spatial_layers;  // Always populated.
197   bool spatial_layer_resolution_present;
198   uint16_t width[kMaxVp9NumberOfSpatialLayers];
199   uint16_t height[kMaxVp9NumberOfSpatialLayers];
200   GofInfoVP9 gof;
201 };
202 
203 // The packetization types that we support: single, aggregated, and fragmented.
204 enum H264PacketizationTypes {
205   kH264SingleNalu,  // This packet contains a single NAL unit.
206   kH264StapA,       // This packet contains STAP-A (single time
207                     // aggregation) packets. If this packet has an
208                     // associated NAL unit type, it'll be for the
209                     // first such aggregated packet.
210   kH264FuA,         // This packet contains a FU-A (fragmentation
211                     // unit) packet, meaning it is a part of a frame
212                     // that was too large to fit into a single packet.
213 };
214 
215 struct RTPVideoHeaderH264 {
216   uint8_t nalu_type;  // The NAL unit type. If this is a header for a
217                       // fragmented packet, it's the NAL unit type of
218                       // the original data. If this is the header for an
219                       // aggregated packet, it's the NAL unit type of
220                       // the first NAL unit in the packet.
221   H264PacketizationTypes packetization_type;
222 };
223 
224 union RTPVideoTypeHeader {
225   RTPVideoHeaderVP8 VP8;
226   RTPVideoHeaderVP9 VP9;
227   RTPVideoHeaderH264 H264;
228 };
229 
230 enum RtpVideoCodecTypes {
231   kRtpVideoNone,
232   kRtpVideoGeneric,
233   kRtpVideoVp8,
234   kRtpVideoVp9,
235   kRtpVideoH264
236 };
237 // Since RTPVideoHeader is used as a member of a union, it can't have a
238 // non-trivial default constructor.
239 struct RTPVideoHeader {
240   uint16_t width;  // size
241   uint16_t height;
242   VideoRotation rotation;
243 
244   bool isFirstPacket;    // first packet in frame
245   uint8_t simulcastIdx;  // Index if the simulcast encoder creating
246                          // this frame, 0 if not using simulcast.
247   RtpVideoCodecTypes codec;
248   RTPVideoTypeHeader codecHeader;
249 };
250 union RTPTypeHeader {
251   RTPAudioHeader Audio;
252   RTPVideoHeader Video;
253 };
254 
255 struct WebRtcRTPHeader {
256   RTPHeader header;
257   FrameType frameType;
258   RTPTypeHeader type;
259   // NTP time of the capture time in local timebase in milliseconds.
260   int64_t ntp_time_ms;
261 };
262 
263 class RTPFragmentationHeader {
264  public:
RTPFragmentationHeader()265   RTPFragmentationHeader()
266       : fragmentationVectorSize(0),
267         fragmentationOffset(NULL),
268         fragmentationLength(NULL),
269         fragmentationTimeDiff(NULL),
270         fragmentationPlType(NULL) {};
271 
~RTPFragmentationHeader()272   ~RTPFragmentationHeader() {
273     delete[] fragmentationOffset;
274     delete[] fragmentationLength;
275     delete[] fragmentationTimeDiff;
276     delete[] fragmentationPlType;
277   }
278 
CopyFrom(const RTPFragmentationHeader & src)279   void CopyFrom(const RTPFragmentationHeader& src) {
280     if (this == &src) {
281       return;
282     }
283 
284     if (src.fragmentationVectorSize != fragmentationVectorSize) {
285       // new size of vectors
286 
287       // delete old
288       delete[] fragmentationOffset;
289       fragmentationOffset = NULL;
290       delete[] fragmentationLength;
291       fragmentationLength = NULL;
292       delete[] fragmentationTimeDiff;
293       fragmentationTimeDiff = NULL;
294       delete[] fragmentationPlType;
295       fragmentationPlType = NULL;
296 
297       if (src.fragmentationVectorSize > 0) {
298         // allocate new
299         if (src.fragmentationOffset) {
300           fragmentationOffset = new size_t[src.fragmentationVectorSize];
301         }
302         if (src.fragmentationLength) {
303           fragmentationLength = new size_t[src.fragmentationVectorSize];
304         }
305         if (src.fragmentationTimeDiff) {
306           fragmentationTimeDiff = new uint16_t[src.fragmentationVectorSize];
307         }
308         if (src.fragmentationPlType) {
309           fragmentationPlType = new uint8_t[src.fragmentationVectorSize];
310         }
311       }
312       // set new size
313       fragmentationVectorSize = src.fragmentationVectorSize;
314     }
315 
316     if (src.fragmentationVectorSize > 0) {
317       // copy values
318       if (src.fragmentationOffset) {
319         memcpy(fragmentationOffset, src.fragmentationOffset,
320                src.fragmentationVectorSize * sizeof(size_t));
321       }
322       if (src.fragmentationLength) {
323         memcpy(fragmentationLength, src.fragmentationLength,
324                src.fragmentationVectorSize * sizeof(size_t));
325       }
326       if (src.fragmentationTimeDiff) {
327         memcpy(fragmentationTimeDiff, src.fragmentationTimeDiff,
328                src.fragmentationVectorSize * sizeof(uint16_t));
329       }
330       if (src.fragmentationPlType) {
331         memcpy(fragmentationPlType, src.fragmentationPlType,
332                src.fragmentationVectorSize * sizeof(uint8_t));
333       }
334     }
335   }
336 
VerifyAndAllocateFragmentationHeader(const size_t size)337   void VerifyAndAllocateFragmentationHeader(const size_t size) {
338     assert(size <= std::numeric_limits<uint16_t>::max());
339     const uint16_t size16 = static_cast<uint16_t>(size);
340     if (fragmentationVectorSize < size16) {
341       uint16_t oldVectorSize = fragmentationVectorSize;
342       {
343         // offset
344         size_t* oldOffsets = fragmentationOffset;
345         fragmentationOffset = new size_t[size16];
346         memset(fragmentationOffset + oldVectorSize, 0,
347                sizeof(size_t) * (size16 - oldVectorSize));
348         // copy old values
349         memcpy(fragmentationOffset, oldOffsets,
350                sizeof(size_t) * oldVectorSize);
351         delete[] oldOffsets;
352       }
353       // length
354       {
355         size_t* oldLengths = fragmentationLength;
356         fragmentationLength = new size_t[size16];
357         memset(fragmentationLength + oldVectorSize, 0,
358                sizeof(size_t) * (size16 - oldVectorSize));
359         memcpy(fragmentationLength, oldLengths,
360                sizeof(size_t) * oldVectorSize);
361         delete[] oldLengths;
362       }
363       // time diff
364       {
365         uint16_t* oldTimeDiffs = fragmentationTimeDiff;
366         fragmentationTimeDiff = new uint16_t[size16];
367         memset(fragmentationTimeDiff + oldVectorSize, 0,
368                sizeof(uint16_t) * (size16 - oldVectorSize));
369         memcpy(fragmentationTimeDiff, oldTimeDiffs,
370                sizeof(uint16_t) * oldVectorSize);
371         delete[] oldTimeDiffs;
372       }
373       // payload type
374       {
375         uint8_t* oldTimePlTypes = fragmentationPlType;
376         fragmentationPlType = new uint8_t[size16];
377         memset(fragmentationPlType + oldVectorSize, 0,
378                sizeof(uint8_t) * (size16 - oldVectorSize));
379         memcpy(fragmentationPlType, oldTimePlTypes,
380                sizeof(uint8_t) * oldVectorSize);
381         delete[] oldTimePlTypes;
382       }
383       fragmentationVectorSize = size16;
384     }
385   }
386 
387   uint16_t fragmentationVectorSize;  // Number of fragmentations
388   size_t* fragmentationOffset;       // Offset of pointer to data for each
389                                      // fragmentation
390   size_t* fragmentationLength;       // Data size for each fragmentation
391   uint16_t* fragmentationTimeDiff;   // Timestamp difference relative "now" for
392                                      // each fragmentation
393   uint8_t* fragmentationPlType;      // Payload type of each fragmentation
394 
395  private:
396   RTC_DISALLOW_COPY_AND_ASSIGN(RTPFragmentationHeader);
397 };
398 
399 struct RTCPVoIPMetric {
400   // RFC 3611 4.7
401   uint8_t lossRate;
402   uint8_t discardRate;
403   uint8_t burstDensity;
404   uint8_t gapDensity;
405   uint16_t burstDuration;
406   uint16_t gapDuration;
407   uint16_t roundTripDelay;
408   uint16_t endSystemDelay;
409   uint8_t signalLevel;
410   uint8_t noiseLevel;
411   uint8_t RERL;
412   uint8_t Gmin;
413   uint8_t Rfactor;
414   uint8_t extRfactor;
415   uint8_t MOSLQ;
416   uint8_t MOSCQ;
417   uint8_t RXconfig;
418   uint16_t JBnominal;
419   uint16_t JBmax;
420   uint16_t JBabsMax;
421 };
422 
423 // Types for the FEC packet masks. The type |kFecMaskRandom| is based on a
424 // random loss model. The type |kFecMaskBursty| is based on a bursty/consecutive
425 // loss model. The packet masks are defined in
426 // modules/rtp_rtcp/fec_private_tables_random(bursty).h
427 enum FecMaskType {
428   kFecMaskRandom,
429   kFecMaskBursty,
430 };
431 
432 // Struct containing forward error correction settings.
433 struct FecProtectionParams {
434   int fec_rate;
435   bool use_uep_protection;
436   int max_fec_frames;
437   FecMaskType fec_mask_type;
438 };
439 
440 // Interface used by the CallStats class to distribute call statistics.
441 // Callbacks will be triggered as soon as the class has been registered to a
442 // CallStats object using RegisterStatsObserver.
443 class CallStatsObserver {
444  public:
445   virtual void OnRttUpdate(int64_t avg_rtt_ms, int64_t max_rtt_ms) = 0;
446 
~CallStatsObserver()447   virtual ~CallStatsObserver() {}
448 };
449 
450 struct VideoContentMetrics {
VideoContentMetricsVideoContentMetrics451   VideoContentMetrics()
452       : motion_magnitude(0.0f),
453         spatial_pred_err(0.0f),
454         spatial_pred_err_h(0.0f),
455         spatial_pred_err_v(0.0f) {}
456 
ResetVideoContentMetrics457   void Reset() {
458     motion_magnitude = 0.0f;
459     spatial_pred_err = 0.0f;
460     spatial_pred_err_h = 0.0f;
461     spatial_pred_err_v = 0.0f;
462   }
463   float motion_magnitude;
464   float spatial_pred_err;
465   float spatial_pred_err_h;
466   float spatial_pred_err_v;
467 };
468 
469 /* This class holds up to 60 ms of super-wideband (32 kHz) stereo audio. It
470  * allows for adding and subtracting frames while keeping track of the resulting
471  * states.
472  *
473  * Notes
474  * - The total number of samples in |data_| is
475  *   samples_per_channel_ * num_channels_
476  *
477  * - Stereo data is interleaved starting with the left channel.
478  *
479  * - The +operator assume that you would never add exactly opposite frames when
480  *   deciding the resulting state. To do this use the -operator.
481  */
482 class AudioFrame {
483  public:
484   // Stereo, 32 kHz, 60 ms (2 * 32 * 60)
485   static const size_t kMaxDataSizeSamples = 3840;
486 
487   enum VADActivity {
488     kVadActive = 0,
489     kVadPassive = 1,
490     kVadUnknown = 2
491   };
492   enum SpeechType {
493     kNormalSpeech = 0,
494     kPLC = 1,
495     kCNG = 2,
496     kPLCCNG = 3,
497     kUndefined = 4
498   };
499 
500   AudioFrame();
~AudioFrame()501   virtual ~AudioFrame() {}
502 
503   // Resets all members to their default state (except does not modify the
504   // contents of |data_|).
505   void Reset();
506 
507   // |interleaved_| is not changed by this method.
508   void UpdateFrame(int id, uint32_t timestamp, const int16_t* data,
509                    size_t samples_per_channel, int sample_rate_hz,
510                    SpeechType speech_type, VADActivity vad_activity,
511                    int num_channels = 1, uint32_t energy = -1);
512 
513   AudioFrame& Append(const AudioFrame& rhs);
514 
515   void CopyFrom(const AudioFrame& src);
516 
517   void Mute();
518 
519   AudioFrame& operator>>=(const int rhs);
520   AudioFrame& operator+=(const AudioFrame& rhs);
521   AudioFrame& operator-=(const AudioFrame& rhs);
522 
523   int id_;
524   // RTP timestamp of the first sample in the AudioFrame.
525   uint32_t timestamp_;
526   // Time since the first frame in milliseconds.
527   // -1 represents an uninitialized value.
528   int64_t elapsed_time_ms_;
529   // NTP time of the estimated capture time in local timebase in milliseconds.
530   // -1 represents an uninitialized value.
531   int64_t ntp_time_ms_;
532   int16_t data_[kMaxDataSizeSamples];
533   size_t samples_per_channel_;
534   int sample_rate_hz_;
535   int num_channels_;
536   SpeechType speech_type_;
537   VADActivity vad_activity_;
538   // Note that there is no guarantee that |energy_| is correct. Any user of this
539   // member must verify that the value is correct.
540   // TODO(henrike) Remove |energy_|.
541   // See https://code.google.com/p/webrtc/issues/detail?id=3315.
542   uint32_t energy_;
543   bool interleaved_;
544 
545  private:
546   RTC_DISALLOW_COPY_AND_ASSIGN(AudioFrame);
547 };
548 
AudioFrame()549 inline AudioFrame::AudioFrame()
550     : data_() {
551   Reset();
552 }
553 
Reset()554 inline void AudioFrame::Reset() {
555   id_ = -1;
556   // TODO(wu): Zero is a valid value for |timestamp_|. We should initialize
557   // to an invalid value, or add a new member to indicate invalidity.
558   timestamp_ = 0;
559   elapsed_time_ms_ = -1;
560   ntp_time_ms_ = -1;
561   samples_per_channel_ = 0;
562   sample_rate_hz_ = 0;
563   num_channels_ = 0;
564   speech_type_ = kUndefined;
565   vad_activity_ = kVadUnknown;
566   energy_ = 0xffffffff;
567   interleaved_ = true;
568 }
569 
UpdateFrame(int id,uint32_t timestamp,const int16_t * data,size_t samples_per_channel,int sample_rate_hz,SpeechType speech_type,VADActivity vad_activity,int num_channels,uint32_t energy)570 inline void AudioFrame::UpdateFrame(int id,
571                                     uint32_t timestamp,
572                                     const int16_t* data,
573                                     size_t samples_per_channel,
574                                     int sample_rate_hz,
575                                     SpeechType speech_type,
576                                     VADActivity vad_activity,
577                                     int num_channels,
578                                     uint32_t energy) {
579   id_ = id;
580   timestamp_ = timestamp;
581   samples_per_channel_ = samples_per_channel;
582   sample_rate_hz_ = sample_rate_hz;
583   speech_type_ = speech_type;
584   vad_activity_ = vad_activity;
585   num_channels_ = num_channels;
586   energy_ = energy;
587 
588   assert(num_channels >= 0);
589   const size_t length = samples_per_channel * num_channels;
590   assert(length <= kMaxDataSizeSamples);
591   if (data != NULL) {
592     memcpy(data_, data, sizeof(int16_t) * length);
593   } else {
594     memset(data_, 0, sizeof(int16_t) * length);
595   }
596 }
597 
CopyFrom(const AudioFrame & src)598 inline void AudioFrame::CopyFrom(const AudioFrame& src) {
599   if (this == &src) return;
600 
601   id_ = src.id_;
602   timestamp_ = src.timestamp_;
603   elapsed_time_ms_ = src.elapsed_time_ms_;
604   ntp_time_ms_ = src.ntp_time_ms_;
605   samples_per_channel_ = src.samples_per_channel_;
606   sample_rate_hz_ = src.sample_rate_hz_;
607   speech_type_ = src.speech_type_;
608   vad_activity_ = src.vad_activity_;
609   num_channels_ = src.num_channels_;
610   energy_ = src.energy_;
611   interleaved_ = src.interleaved_;
612 
613   assert(num_channels_ >= 0);
614   const size_t length = samples_per_channel_ * num_channels_;
615   assert(length <= kMaxDataSizeSamples);
616   memcpy(data_, src.data_, sizeof(int16_t) * length);
617 }
618 
Mute()619 inline void AudioFrame::Mute() {
620   memset(data_, 0, samples_per_channel_ * num_channels_ * sizeof(int16_t));
621 }
622 
623 inline AudioFrame& AudioFrame::operator>>=(const int rhs) {
624   assert((num_channels_ > 0) && (num_channels_ < 3));
625   if ((num_channels_ > 2) || (num_channels_ < 1)) return *this;
626 
627   for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) {
628     data_[i] = static_cast<int16_t>(data_[i] >> rhs);
629   }
630   return *this;
631 }
632 
Append(const AudioFrame & rhs)633 inline AudioFrame& AudioFrame::Append(const AudioFrame& rhs) {
634   // Sanity check
635   assert((num_channels_ > 0) && (num_channels_ < 3));
636   assert(interleaved_ == rhs.interleaved_);
637   if ((num_channels_ > 2) || (num_channels_ < 1)) return *this;
638   if (num_channels_ != rhs.num_channels_) return *this;
639 
640   if ((vad_activity_ == kVadActive) || rhs.vad_activity_ == kVadActive) {
641     vad_activity_ = kVadActive;
642   } else if (vad_activity_ == kVadUnknown || rhs.vad_activity_ == kVadUnknown) {
643     vad_activity_ = kVadUnknown;
644   }
645   if (speech_type_ != rhs.speech_type_) {
646     speech_type_ = kUndefined;
647   }
648 
649   size_t offset = samples_per_channel_ * num_channels_;
650   for (size_t i = 0; i < rhs.samples_per_channel_ * rhs.num_channels_; i++) {
651     data_[offset + i] = rhs.data_[i];
652   }
653   samples_per_channel_ += rhs.samples_per_channel_;
654   return *this;
655 }
656 
657 namespace {
ClampToInt16(int32_t input)658 inline int16_t ClampToInt16(int32_t input) {
659   if (input < -0x00008000) {
660     return -0x8000;
661   } else if (input > 0x00007FFF) {
662     return 0x7FFF;
663   } else {
664     return static_cast<int16_t>(input);
665   }
666 }
667 }
668 
669 inline AudioFrame& AudioFrame::operator+=(const AudioFrame& rhs) {
670   // Sanity check
671   assert((num_channels_ > 0) && (num_channels_ < 3));
672   assert(interleaved_ == rhs.interleaved_);
673   if ((num_channels_ > 2) || (num_channels_ < 1)) return *this;
674   if (num_channels_ != rhs.num_channels_) return *this;
675 
676   bool noPrevData = false;
677   if (samples_per_channel_ != rhs.samples_per_channel_) {
678     if (samples_per_channel_ == 0) {
679       // special case we have no data to start with
680       samples_per_channel_ = rhs.samples_per_channel_;
681       noPrevData = true;
682     } else {
683       return *this;
684     }
685   }
686 
687   if ((vad_activity_ == kVadActive) || rhs.vad_activity_ == kVadActive) {
688     vad_activity_ = kVadActive;
689   } else if (vad_activity_ == kVadUnknown || rhs.vad_activity_ == kVadUnknown) {
690     vad_activity_ = kVadUnknown;
691   }
692 
693   if (speech_type_ != rhs.speech_type_) speech_type_ = kUndefined;
694 
695   if (noPrevData) {
696     memcpy(data_, rhs.data_,
697            sizeof(int16_t) * rhs.samples_per_channel_ * num_channels_);
698   } else {
699     // IMPROVEMENT this can be done very fast in assembly
700     for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) {
701       int32_t wrap_guard =
702           static_cast<int32_t>(data_[i]) + static_cast<int32_t>(rhs.data_[i]);
703       data_[i] = ClampToInt16(wrap_guard);
704     }
705   }
706   energy_ = 0xffffffff;
707   return *this;
708 }
709 
710 inline AudioFrame& AudioFrame::operator-=(const AudioFrame& rhs) {
711   // Sanity check
712   assert((num_channels_ > 0) && (num_channels_ < 3));
713   assert(interleaved_ == rhs.interleaved_);
714   if ((num_channels_ > 2) || (num_channels_ < 1)) return *this;
715 
716   if ((samples_per_channel_ != rhs.samples_per_channel_) ||
717       (num_channels_ != rhs.num_channels_)) {
718     return *this;
719   }
720   if ((vad_activity_ != kVadPassive) || rhs.vad_activity_ != kVadPassive) {
721     vad_activity_ = kVadUnknown;
722   }
723   speech_type_ = kUndefined;
724 
725   for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) {
726     int32_t wrap_guard =
727         static_cast<int32_t>(data_[i]) - static_cast<int32_t>(rhs.data_[i]);
728     data_[i] = ClampToInt16(wrap_guard);
729   }
730   energy_ = 0xffffffff;
731   return *this;
732 }
733 
IsNewerSequenceNumber(uint16_t sequence_number,uint16_t prev_sequence_number)734 inline bool IsNewerSequenceNumber(uint16_t sequence_number,
735                                   uint16_t prev_sequence_number) {
736   // Distinguish between elements that are exactly 0x8000 apart.
737   // If s1>s2 and |s1-s2| = 0x8000: IsNewer(s1,s2)=true, IsNewer(s2,s1)=false
738   // rather than having IsNewer(s1,s2) = IsNewer(s2,s1) = false.
739   if (static_cast<uint16_t>(sequence_number - prev_sequence_number) == 0x8000) {
740     return sequence_number > prev_sequence_number;
741   }
742   return sequence_number != prev_sequence_number &&
743          static_cast<uint16_t>(sequence_number - prev_sequence_number) < 0x8000;
744 }
745 
IsNewerTimestamp(uint32_t timestamp,uint32_t prev_timestamp)746 inline bool IsNewerTimestamp(uint32_t timestamp, uint32_t prev_timestamp) {
747   // Distinguish between elements that are exactly 0x80000000 apart.
748   // If t1>t2 and |t1-t2| = 0x80000000: IsNewer(t1,t2)=true,
749   // IsNewer(t2,t1)=false
750   // rather than having IsNewer(t1,t2) = IsNewer(t2,t1) = false.
751   if (static_cast<uint32_t>(timestamp - prev_timestamp) == 0x80000000) {
752     return timestamp > prev_timestamp;
753   }
754   return timestamp != prev_timestamp &&
755          static_cast<uint32_t>(timestamp - prev_timestamp) < 0x80000000;
756 }
757 
LatestSequenceNumber(uint16_t sequence_number1,uint16_t sequence_number2)758 inline uint16_t LatestSequenceNumber(uint16_t sequence_number1,
759                                      uint16_t sequence_number2) {
760   return IsNewerSequenceNumber(sequence_number1, sequence_number2)
761              ? sequence_number1
762              : sequence_number2;
763 }
764 
LatestTimestamp(uint32_t timestamp1,uint32_t timestamp2)765 inline uint32_t LatestTimestamp(uint32_t timestamp1, uint32_t timestamp2) {
766   return IsNewerTimestamp(timestamp1, timestamp2) ? timestamp1 : timestamp2;
767 }
768 
769 // Utility class to unwrap a sequence number to a larger type, for easier
770 // handling large ranges. Note that sequence numbers will never be unwrapped
771 // to a negative value.
772 class SequenceNumberUnwrapper {
773  public:
SequenceNumberUnwrapper()774   SequenceNumberUnwrapper() : last_seq_(-1) {}
775 
776   // Get the unwrapped sequence, but don't update the internal state.
UnwrapWithoutUpdate(uint16_t sequence_number)777   int64_t UnwrapWithoutUpdate(uint16_t sequence_number) {
778     if (last_seq_ == -1)
779       return sequence_number;
780 
781     uint16_t cropped_last = static_cast<uint16_t>(last_seq_);
782     int64_t delta = sequence_number - cropped_last;
783     if (IsNewerSequenceNumber(sequence_number, cropped_last)) {
784       if (delta < 0)
785         delta += (1 << 16);  // Wrap forwards.
786     } else if (delta > 0 && (last_seq_ + delta - (1 << 16)) >= 0) {
787       // If sequence_number is older but delta is positive, this is a backwards
788       // wrap-around. However, don't wrap backwards past 0 (unwrapped).
789       delta -= (1 << 16);
790     }
791 
792     return last_seq_ + delta;
793   }
794 
795   // Only update the internal state to the specified last (unwrapped) sequence.
UpdateLast(int64_t last_sequence)796   void UpdateLast(int64_t last_sequence) { last_seq_ = last_sequence; }
797 
798   // Unwrap the sequence number and update the internal state.
Unwrap(uint16_t sequence_number)799   int64_t Unwrap(uint16_t sequence_number) {
800     int64_t unwrapped = UnwrapWithoutUpdate(sequence_number);
801     UpdateLast(unwrapped);
802     return unwrapped;
803   }
804 
805  private:
806   int64_t last_seq_;
807 };
808 
809 }  // namespace webrtc
810 
811 #endif  // WEBRTC_MODULES_INCLUDE_MODULE_COMMON_TYPES_H_
812