1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #ifndef MODULE_COMMON_TYPES_H
12 #define MODULE_COMMON_TYPES_H
13 
14 #include <assert.h>
15 #include <string.h>  // memcpy
16 
17 #include <algorithm>
18 #include <limits>
19 
20 #include "webrtc/base/constructormagic.h"
21 #include "webrtc/common_types.h"
22 #ifndef WEBRTC_AUDIO_PROCESSING_ONLY_BUILD
23 #include "webrtc/common_video/rotation.h"
24 #endif
25 #include "webrtc/typedefs.h"
26 
27 namespace webrtc {
28 
29 struct RTPAudioHeader {
30   uint8_t numEnergy;                  // number of valid entries in arrOfEnergy
31   uint8_t arrOfEnergy[kRtpCsrcSize];  // one energy byte (0-9) per channel
32   bool isCNG;                         // is this CNG
33   uint8_t channel;                    // number of channels 2 = stereo
34 };
35 
36 const int16_t kNoPictureId = -1;
37 const int16_t kMaxOneBytePictureId = 0x7F;    // 7 bits
38 const int16_t kMaxTwoBytePictureId = 0x7FFF;  // 15 bits
39 const int16_t kNoTl0PicIdx = -1;
40 const uint8_t kNoTemporalIdx = 0xFF;
41 const uint8_t kNoSpatialIdx = 0xFF;
42 const uint8_t kNoGofIdx = 0xFF;
43 const size_t kMaxVp9RefPics = 3;
44 const size_t kMaxVp9FramesInGof = 0xFF;  // 8 bits
45 const size_t kMaxVp9NumberOfSpatialLayers = 8;
46 const int kNoKeyIdx = -1;
47 
48 struct RTPVideoHeaderVP8 {
InitRTPVideoHeaderVP8RTPVideoHeaderVP849   void InitRTPVideoHeaderVP8() {
50     nonReference = false;
51     pictureId = kNoPictureId;
52     tl0PicIdx = kNoTl0PicIdx;
53     temporalIdx = kNoTemporalIdx;
54     layerSync = false;
55     keyIdx = kNoKeyIdx;
56     partitionId = 0;
57     beginningOfPartition = false;
58   }
59 
60   bool nonReference;          // Frame is discardable.
61   int16_t pictureId;          // Picture ID index, 15 bits;
62                               // kNoPictureId if PictureID does not exist.
63   int16_t tl0PicIdx;          // TL0PIC_IDX, 8 bits;
64                               // kNoTl0PicIdx means no value provided.
65   uint8_t temporalIdx;        // Temporal layer index, or kNoTemporalIdx.
66   bool layerSync;             // This frame is a layer sync frame.
67                               // Disabled if temporalIdx == kNoTemporalIdx.
68   int keyIdx;                 // 5 bits; kNoKeyIdx means not used.
69   int partitionId;            // VP8 partition ID
70   bool beginningOfPartition;  // True if this packet is the first
71                               // in a VP8 partition. Otherwise false
72 };
73 
74 enum TemporalStructureMode {
75   kTemporalStructureMode1,    // 1 temporal layer structure - i.e., IPPP...
76   kTemporalStructureMode2,    // 2 temporal layers 0-1-0-1...
77   kTemporalStructureMode3     // 3 temporal layers 0-2-1-2-0-2-1-2...
78 };
79 
80 struct GofInfoVP9 {
SetGofInfoVP9GofInfoVP981   void SetGofInfoVP9(TemporalStructureMode tm) {
82     switch (tm) {
83       case kTemporalStructureMode1:
84         num_frames_in_gof = 1;
85         temporal_idx[0] = 0;
86         temporal_up_switch[0] = false;
87         num_ref_pics[0] = 1;
88         pid_diff[0][0] = 1;
89         break;
90       case kTemporalStructureMode2:
91         num_frames_in_gof = 2;
92         temporal_idx[0] = 0;
93         temporal_up_switch[0] = false;
94         num_ref_pics[0] = 1;
95         pid_diff[0][0] = 2;
96 
97         temporal_idx[1] = 1;
98         temporal_up_switch[1] = true;
99         num_ref_pics[1] = 1;
100         pid_diff[1][0] = 1;
101         break;
102       case kTemporalStructureMode3:
103         num_frames_in_gof = 4;
104         temporal_idx[0] = 0;
105         temporal_up_switch[0] = false;
106         num_ref_pics[0] = 1;
107         pid_diff[0][0] = 4;
108 
109         temporal_idx[1] = 2;
110         temporal_up_switch[1] = true;
111         num_ref_pics[1] = 1;
112         pid_diff[1][0] = 1;
113 
114         temporal_idx[2] = 1;
115         temporal_up_switch[2] = true;
116         num_ref_pics[2] = 1;
117         pid_diff[2][0] = 2;
118 
119         temporal_idx[3] = 2;
120         temporal_up_switch[3] = false;
121         num_ref_pics[3] = 2;
122         pid_diff[3][0] = 1;
123         pid_diff[3][1] = 2;
124         break;
125       default:
126         assert(false);
127     }
128   }
129 
CopyGofInfoVP9GofInfoVP9130   void CopyGofInfoVP9(const GofInfoVP9& src) {
131     num_frames_in_gof = src.num_frames_in_gof;
132     for (size_t i = 0; i < num_frames_in_gof; ++i) {
133       temporal_idx[i] = src.temporal_idx[i];
134       temporal_up_switch[i] = src.temporal_up_switch[i];
135       num_ref_pics[i] = src.num_ref_pics[i];
136       for (size_t r = 0; r < num_ref_pics[i]; ++r) {
137         pid_diff[i][r] = src.pid_diff[i][r];
138       }
139     }
140   }
141 
142   size_t num_frames_in_gof;
143   uint8_t temporal_idx[kMaxVp9FramesInGof];
144   bool temporal_up_switch[kMaxVp9FramesInGof];
145   size_t num_ref_pics[kMaxVp9FramesInGof];
146   int16_t pid_diff[kMaxVp9FramesInGof][kMaxVp9RefPics];
147 };
148 
149 struct RTPVideoHeaderVP9 {
InitRTPVideoHeaderVP9RTPVideoHeaderVP9150   void InitRTPVideoHeaderVP9() {
151     inter_pic_predicted = false;
152     flexible_mode = false;
153     beginning_of_frame = false;
154     end_of_frame = false;
155     ss_data_available = false;
156     picture_id = kNoPictureId;
157     max_picture_id = kMaxTwoBytePictureId;
158     tl0_pic_idx = kNoTl0PicIdx;
159     temporal_idx = kNoTemporalIdx;
160     spatial_idx = kNoSpatialIdx;
161     temporal_up_switch = false;
162     inter_layer_predicted = false;
163     gof_idx = kNoGofIdx;
164     num_ref_pics = 0;
165     num_spatial_layers = 1;
166   }
167 
168   bool inter_pic_predicted;  // This layer frame is dependent on previously
169                              // coded frame(s).
170   bool flexible_mode;        // This frame is in flexible mode.
171   bool beginning_of_frame;   // True if this packet is the first in a VP9 layer
172                              // frame.
173   bool end_of_frame;  // True if this packet is the last in a VP9 layer frame.
174   bool ss_data_available;  // True if SS data is available in this payload
175                            // descriptor.
176   int16_t picture_id;      // PictureID index, 15 bits;
177                            // kNoPictureId if PictureID does not exist.
178   int16_t max_picture_id;  // Maximum picture ID index; either 0x7F or 0x7FFF;
179   int16_t tl0_pic_idx;     // TL0PIC_IDX, 8 bits;
180                            // kNoTl0PicIdx means no value provided.
181   uint8_t temporal_idx;    // Temporal layer index, or kNoTemporalIdx.
182   uint8_t spatial_idx;     // Spatial layer index, or kNoSpatialIdx.
183   bool temporal_up_switch;  // True if upswitch to higher frame rate is possible
184                             // starting from this frame.
185   bool inter_layer_predicted;  // Frame is dependent on directly lower spatial
186                                // layer frame.
187 
188   uint8_t gof_idx;  // Index to predefined temporal frame info in SS data.
189 
190   size_t num_ref_pics;  // Number of reference pictures used by this layer
191                         // frame.
192   int16_t pid_diff[kMaxVp9RefPics];  // P_DIFF signaled to derive the PictureID
193                                      // of the reference pictures.
194   int16_t ref_picture_id[kMaxVp9RefPics];  // PictureID of reference pictures.
195 
196   // SS data.
197   size_t num_spatial_layers;  // Always populated.
198   bool spatial_layer_resolution_present;
199   uint16_t width[kMaxVp9NumberOfSpatialLayers];
200   uint16_t height[kMaxVp9NumberOfSpatialLayers];
201   GofInfoVP9 gof;
202 };
203 
204 // The packetization types that we support: single, aggregated, and fragmented.
205 enum H264PacketizationTypes {
206   kH264SingleNalu,  // This packet contains a single NAL unit.
207   kH264StapA,       // This packet contains STAP-A (single time
208                     // aggregation) packets. If this packet has an
209                     // associated NAL unit type, it'll be for the
210                     // first such aggregated packet.
211   kH264FuA,         // This packet contains a FU-A (fragmentation
212                     // unit) packet, meaning it is a part of a frame
213                     // that was too large to fit into a single packet.
214 };
215 
216 struct RTPVideoHeaderH264 {
217   uint8_t nalu_type;  // The NAL unit type. If this is a header for a
218                       // fragmented packet, it's the NAL unit type of
219                       // the original data. If this is the header for an
220                       // aggregated packet, it's the NAL unit type of
221                       // the first NAL unit in the packet.
222   H264PacketizationTypes packetization_type;
223 };
224 
225 union RTPVideoTypeHeader {
226   RTPVideoHeaderVP8 VP8;
227   RTPVideoHeaderVP9 VP9;
228   RTPVideoHeaderH264 H264;
229 };
230 
231 enum RtpVideoCodecTypes {
232   kRtpVideoNone,
233   kRtpVideoGeneric,
234   kRtpVideoVp8,
235   kRtpVideoVp9,
236   kRtpVideoH264
237 };
238 #ifndef WEBRTC_AUDIO_PROCESSING_ONLY_BUILD
239 // Since RTPVideoHeader is used as a member of a union, it can't have a
240 // non-trivial default constructor.
241 struct RTPVideoHeader {
242   uint16_t width;  // size
243   uint16_t height;
244   VideoRotation rotation;
245 
246   bool isFirstPacket;    // first packet in frame
247   uint8_t simulcastIdx;  // Index if the simulcast encoder creating
248                          // this frame, 0 if not using simulcast.
249   RtpVideoCodecTypes codec;
250   RTPVideoTypeHeader codecHeader;
251 };
252 #endif
253 union RTPTypeHeader {
254   RTPAudioHeader Audio;
255 #ifndef WEBRTC_AUDIO_PROCESSING_ONLY_BUILD
256   RTPVideoHeader Video;
257 #endif
258 };
259 
260 struct WebRtcRTPHeader {
261   RTPHeader header;
262   FrameType frameType;
263   RTPTypeHeader type;
264   // NTP time of the capture time in local timebase in milliseconds.
265   int64_t ntp_time_ms;
266 };
267 
268 class RTPFragmentationHeader {
269  public:
RTPFragmentationHeader()270   RTPFragmentationHeader()
271       : fragmentationVectorSize(0),
272         fragmentationOffset(NULL),
273         fragmentationLength(NULL),
274         fragmentationTimeDiff(NULL),
275         fragmentationPlType(NULL) {};
276 
~RTPFragmentationHeader()277   ~RTPFragmentationHeader() {
278     delete[] fragmentationOffset;
279     delete[] fragmentationLength;
280     delete[] fragmentationTimeDiff;
281     delete[] fragmentationPlType;
282   }
283 
CopyFrom(const RTPFragmentationHeader & src)284   void CopyFrom(const RTPFragmentationHeader& src) {
285     if (this == &src) {
286       return;
287     }
288 
289     if (src.fragmentationVectorSize != fragmentationVectorSize) {
290       // new size of vectors
291 
292       // delete old
293       delete[] fragmentationOffset;
294       fragmentationOffset = NULL;
295       delete[] fragmentationLength;
296       fragmentationLength = NULL;
297       delete[] fragmentationTimeDiff;
298       fragmentationTimeDiff = NULL;
299       delete[] fragmentationPlType;
300       fragmentationPlType = NULL;
301 
302       if (src.fragmentationVectorSize > 0) {
303         // allocate new
304         if (src.fragmentationOffset) {
305           fragmentationOffset = new size_t[src.fragmentationVectorSize];
306         }
307         if (src.fragmentationLength) {
308           fragmentationLength = new size_t[src.fragmentationVectorSize];
309         }
310         if (src.fragmentationTimeDiff) {
311           fragmentationTimeDiff = new uint16_t[src.fragmentationVectorSize];
312         }
313         if (src.fragmentationPlType) {
314           fragmentationPlType = new uint8_t[src.fragmentationVectorSize];
315         }
316       }
317       // set new size
318       fragmentationVectorSize = src.fragmentationVectorSize;
319     }
320 
321     if (src.fragmentationVectorSize > 0) {
322       // copy values
323       if (src.fragmentationOffset) {
324         memcpy(fragmentationOffset, src.fragmentationOffset,
325                src.fragmentationVectorSize * sizeof(size_t));
326       }
327       if (src.fragmentationLength) {
328         memcpy(fragmentationLength, src.fragmentationLength,
329                src.fragmentationVectorSize * sizeof(size_t));
330       }
331       if (src.fragmentationTimeDiff) {
332         memcpy(fragmentationTimeDiff, src.fragmentationTimeDiff,
333                src.fragmentationVectorSize * sizeof(uint16_t));
334       }
335       if (src.fragmentationPlType) {
336         memcpy(fragmentationPlType, src.fragmentationPlType,
337                src.fragmentationVectorSize * sizeof(uint8_t));
338       }
339     }
340   }
341 
VerifyAndAllocateFragmentationHeader(const size_t size)342   void VerifyAndAllocateFragmentationHeader(const size_t size) {
343     assert(size <= std::numeric_limits<uint16_t>::max());
344     const uint16_t size16 = static_cast<uint16_t>(size);
345     if (fragmentationVectorSize < size16) {
346       uint16_t oldVectorSize = fragmentationVectorSize;
347       {
348         // offset
349         size_t* oldOffsets = fragmentationOffset;
350         fragmentationOffset = new size_t[size16];
351         memset(fragmentationOffset + oldVectorSize, 0,
352                sizeof(size_t) * (size16 - oldVectorSize));
353         // copy old values
354         memcpy(fragmentationOffset, oldOffsets,
355                sizeof(size_t) * oldVectorSize);
356         delete[] oldOffsets;
357       }
358       // length
359       {
360         size_t* oldLengths = fragmentationLength;
361         fragmentationLength = new size_t[size16];
362         memset(fragmentationLength + oldVectorSize, 0,
363                sizeof(size_t) * (size16 - oldVectorSize));
364         memcpy(fragmentationLength, oldLengths,
365                sizeof(size_t) * oldVectorSize);
366         delete[] oldLengths;
367       }
368       // time diff
369       {
370         uint16_t* oldTimeDiffs = fragmentationTimeDiff;
371         fragmentationTimeDiff = new uint16_t[size16];
372         memset(fragmentationTimeDiff + oldVectorSize, 0,
373                sizeof(uint16_t) * (size16 - oldVectorSize));
374         memcpy(fragmentationTimeDiff, oldTimeDiffs,
375                sizeof(uint16_t) * oldVectorSize);
376         delete[] oldTimeDiffs;
377       }
378       // payload type
379       {
380         uint8_t* oldTimePlTypes = fragmentationPlType;
381         fragmentationPlType = new uint8_t[size16];
382         memset(fragmentationPlType + oldVectorSize, 0,
383                sizeof(uint8_t) * (size16 - oldVectorSize));
384         memcpy(fragmentationPlType, oldTimePlTypes,
385                sizeof(uint8_t) * oldVectorSize);
386         delete[] oldTimePlTypes;
387       }
388       fragmentationVectorSize = size16;
389     }
390   }
391 
392   uint16_t fragmentationVectorSize;  // Number of fragmentations
393   size_t* fragmentationOffset;       // Offset of pointer to data for each
394                                      // fragmentation
395   size_t* fragmentationLength;       // Data size for each fragmentation
396   uint16_t* fragmentationTimeDiff;   // Timestamp difference relative "now" for
397                                      // each fragmentation
398   uint8_t* fragmentationPlType;      // Payload type of each fragmentation
399 
400  private:
401   RTC_DISALLOW_COPY_AND_ASSIGN(RTPFragmentationHeader);
402 };
403 
404 struct RTCPVoIPMetric {
405   // RFC 3611 4.7
406   uint8_t lossRate;
407   uint8_t discardRate;
408   uint8_t burstDensity;
409   uint8_t gapDensity;
410   uint16_t burstDuration;
411   uint16_t gapDuration;
412   uint16_t roundTripDelay;
413   uint16_t endSystemDelay;
414   uint8_t signalLevel;
415   uint8_t noiseLevel;
416   uint8_t RERL;
417   uint8_t Gmin;
418   uint8_t Rfactor;
419   uint8_t extRfactor;
420   uint8_t MOSLQ;
421   uint8_t MOSCQ;
422   uint8_t RXconfig;
423   uint16_t JBnominal;
424   uint16_t JBmax;
425   uint16_t JBabsMax;
426 };
427 
428 // Types for the FEC packet masks. The type |kFecMaskRandom| is based on a
429 // random loss model. The type |kFecMaskBursty| is based on a bursty/consecutive
430 // loss model. The packet masks are defined in
431 // modules/rtp_rtcp/fec_private_tables_random(bursty).h
432 enum FecMaskType {
433   kFecMaskRandom,
434   kFecMaskBursty,
435 };
436 
437 // Struct containing forward error correction settings.
438 struct FecProtectionParams {
439   int fec_rate;
440   bool use_uep_protection;
441   int max_fec_frames;
442   FecMaskType fec_mask_type;
443 };
444 
445 // Interface used by the CallStats class to distribute call statistics.
446 // Callbacks will be triggered as soon as the class has been registered to a
447 // CallStats object using RegisterStatsObserver.
448 class CallStatsObserver {
449  public:
450   virtual void OnRttUpdate(int64_t avg_rtt_ms, int64_t max_rtt_ms) = 0;
451 
~CallStatsObserver()452   virtual ~CallStatsObserver() {}
453 };
454 
455 struct VideoContentMetrics {
VideoContentMetricsVideoContentMetrics456   VideoContentMetrics()
457       : motion_magnitude(0.0f),
458         spatial_pred_err(0.0f),
459         spatial_pred_err_h(0.0f),
460         spatial_pred_err_v(0.0f) {}
461 
ResetVideoContentMetrics462   void Reset() {
463     motion_magnitude = 0.0f;
464     spatial_pred_err = 0.0f;
465     spatial_pred_err_h = 0.0f;
466     spatial_pred_err_v = 0.0f;
467   }
468   float motion_magnitude;
469   float spatial_pred_err;
470   float spatial_pred_err_h;
471   float spatial_pred_err_v;
472 };
473 
474 /* This class holds up to 60 ms of super-wideband (32 kHz) stereo audio. It
475  * allows for adding and subtracting frames while keeping track of the resulting
476  * states.
477  *
478  * Notes
479  * - The total number of samples in |data_| is
480  *   samples_per_channel_ * num_channels_
481  *
482  * - Stereo data is interleaved starting with the left channel.
483  *
484  * - The +operator assume that you would never add exactly opposite frames when
485  *   deciding the resulting state. To do this use the -operator.
486  */
487 class AudioFrame {
488  public:
489   // Stereo, 32 kHz, 60 ms (2 * 32 * 60)
490   static const size_t kMaxDataSizeSamples = 3840;
491 
492   enum VADActivity {
493     kVadActive = 0,
494     kVadPassive = 1,
495     kVadUnknown = 2
496   };
497   enum SpeechType {
498     kNormalSpeech = 0,
499     kPLC = 1,
500     kCNG = 2,
501     kPLCCNG = 3,
502     kUndefined = 4
503   };
504 
505   AudioFrame();
~AudioFrame()506   virtual ~AudioFrame() {}
507 
508   // Resets all members to their default state (except does not modify the
509   // contents of |data_|).
510   void Reset();
511 
512   // |interleaved_| is not changed by this method.
513   void UpdateFrame(int id, uint32_t timestamp, const int16_t* data,
514                    size_t samples_per_channel, int sample_rate_hz,
515                    SpeechType speech_type, VADActivity vad_activity,
516                    int num_channels = 1, uint32_t energy = -1);
517 
518   AudioFrame& Append(const AudioFrame& rhs);
519 
520   void CopyFrom(const AudioFrame& src);
521 
522   void Mute();
523 
524   AudioFrame& operator>>=(const int rhs);
525   AudioFrame& operator+=(const AudioFrame& rhs);
526   AudioFrame& operator-=(const AudioFrame& rhs);
527 
528   int id_;
529   // RTP timestamp of the first sample in the AudioFrame.
530   uint32_t timestamp_;
531   // Time since the first frame in milliseconds.
532   // -1 represents an uninitialized value.
533   int64_t elapsed_time_ms_;
534   // NTP time of the estimated capture time in local timebase in milliseconds.
535   // -1 represents an uninitialized value.
536   int64_t ntp_time_ms_;
537   int16_t data_[kMaxDataSizeSamples];
538   size_t samples_per_channel_;
539   int sample_rate_hz_;
540   int num_channels_;
541   SpeechType speech_type_;
542   VADActivity vad_activity_;
543   // Note that there is no guarantee that |energy_| is correct. Any user of this
544   // member must verify that the value is correct.
545   // TODO(henrike) Remove |energy_|.
546   // See https://code.google.com/p/webrtc/issues/detail?id=3315.
547   uint32_t energy_;
548   bool interleaved_;
549 
550  private:
551   RTC_DISALLOW_COPY_AND_ASSIGN(AudioFrame);
552 };
553 
AudioFrame()554 inline AudioFrame::AudioFrame()
555     : data_() {
556   Reset();
557 }
558 
Reset()559 inline void AudioFrame::Reset() {
560   id_ = -1;
561   // TODO(wu): Zero is a valid value for |timestamp_|. We should initialize
562   // to an invalid value, or add a new member to indicate invalidity.
563   timestamp_ = 0;
564   elapsed_time_ms_ = -1;
565   ntp_time_ms_ = -1;
566   samples_per_channel_ = 0;
567   sample_rate_hz_ = 0;
568   num_channels_ = 0;
569   speech_type_ = kUndefined;
570   vad_activity_ = kVadUnknown;
571   energy_ = 0xffffffff;
572   interleaved_ = true;
573 }
574 
UpdateFrame(int id,uint32_t timestamp,const int16_t * data,size_t samples_per_channel,int sample_rate_hz,SpeechType speech_type,VADActivity vad_activity,int num_channels,uint32_t energy)575 inline void AudioFrame::UpdateFrame(int id,
576                                     uint32_t timestamp,
577                                     const int16_t* data,
578                                     size_t samples_per_channel,
579                                     int sample_rate_hz,
580                                     SpeechType speech_type,
581                                     VADActivity vad_activity,
582                                     int num_channels,
583                                     uint32_t energy) {
584   id_ = id;
585   timestamp_ = timestamp;
586   samples_per_channel_ = samples_per_channel;
587   sample_rate_hz_ = sample_rate_hz;
588   speech_type_ = speech_type;
589   vad_activity_ = vad_activity;
590   num_channels_ = num_channels;
591   energy_ = energy;
592 
593   assert(num_channels >= 0);
594   const size_t length = samples_per_channel * num_channels;
595   assert(length <= kMaxDataSizeSamples);
596   if (data != NULL) {
597     memcpy(data_, data, sizeof(int16_t) * length);
598   } else {
599     memset(data_, 0, sizeof(int16_t) * length);
600   }
601 }
602 
CopyFrom(const AudioFrame & src)603 inline void AudioFrame::CopyFrom(const AudioFrame& src) {
604   if (this == &src) return;
605 
606   id_ = src.id_;
607   timestamp_ = src.timestamp_;
608   elapsed_time_ms_ = src.elapsed_time_ms_;
609   ntp_time_ms_ = src.ntp_time_ms_;
610   samples_per_channel_ = src.samples_per_channel_;
611   sample_rate_hz_ = src.sample_rate_hz_;
612   speech_type_ = src.speech_type_;
613   vad_activity_ = src.vad_activity_;
614   num_channels_ = src.num_channels_;
615   energy_ = src.energy_;
616   interleaved_ = src.interleaved_;
617 
618   assert(num_channels_ >= 0);
619   const size_t length = samples_per_channel_ * num_channels_;
620   assert(length <= kMaxDataSizeSamples);
621   memcpy(data_, src.data_, sizeof(int16_t) * length);
622 }
623 
Mute()624 inline void AudioFrame::Mute() {
625   memset(data_, 0, samples_per_channel_ * num_channels_ * sizeof(int16_t));
626 }
627 
628 inline AudioFrame& AudioFrame::operator>>=(const int rhs) {
629   assert((num_channels_ > 0) && (num_channels_ < 3));
630   if ((num_channels_ > 2) || (num_channels_ < 1)) return *this;
631 
632   for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) {
633     data_[i] = static_cast<int16_t>(data_[i] >> rhs);
634   }
635   return *this;
636 }
637 
Append(const AudioFrame & rhs)638 inline AudioFrame& AudioFrame::Append(const AudioFrame& rhs) {
639   // Sanity check
640   assert((num_channels_ > 0) && (num_channels_ < 3));
641   assert(interleaved_ == rhs.interleaved_);
642   if ((num_channels_ > 2) || (num_channels_ < 1)) return *this;
643   if (num_channels_ != rhs.num_channels_) return *this;
644 
645   if ((vad_activity_ == kVadActive) || rhs.vad_activity_ == kVadActive) {
646     vad_activity_ = kVadActive;
647   } else if (vad_activity_ == kVadUnknown || rhs.vad_activity_ == kVadUnknown) {
648     vad_activity_ = kVadUnknown;
649   }
650   if (speech_type_ != rhs.speech_type_) {
651     speech_type_ = kUndefined;
652   }
653 
654   size_t offset = samples_per_channel_ * num_channels_;
655   for (size_t i = 0; i < rhs.samples_per_channel_ * rhs.num_channels_; i++) {
656     data_[offset + i] = rhs.data_[i];
657   }
658   samples_per_channel_ += rhs.samples_per_channel_;
659   return *this;
660 }
661 
662 namespace {
ClampToInt16(int32_t input)663 inline int16_t ClampToInt16(int32_t input) {
664   if (input < -0x00008000) {
665     return -0x8000;
666   } else if (input > 0x00007FFF) {
667     return 0x7FFF;
668   } else {
669     return static_cast<int16_t>(input);
670   }
671 }
672 }
673 
674 inline AudioFrame& AudioFrame::operator+=(const AudioFrame& rhs) {
675   // Sanity check
676   assert((num_channels_ > 0) && (num_channels_ < 3));
677   assert(interleaved_ == rhs.interleaved_);
678   if ((num_channels_ > 2) || (num_channels_ < 1)) return *this;
679   if (num_channels_ != rhs.num_channels_) return *this;
680 
681   bool noPrevData = false;
682   if (samples_per_channel_ != rhs.samples_per_channel_) {
683     if (samples_per_channel_ == 0) {
684       // special case we have no data to start with
685       samples_per_channel_ = rhs.samples_per_channel_;
686       noPrevData = true;
687     } else {
688       return *this;
689     }
690   }
691 
692   if ((vad_activity_ == kVadActive) || rhs.vad_activity_ == kVadActive) {
693     vad_activity_ = kVadActive;
694   } else if (vad_activity_ == kVadUnknown || rhs.vad_activity_ == kVadUnknown) {
695     vad_activity_ = kVadUnknown;
696   }
697 
698   if (speech_type_ != rhs.speech_type_) speech_type_ = kUndefined;
699 
700   if (noPrevData) {
701     memcpy(data_, rhs.data_,
702            sizeof(int16_t) * rhs.samples_per_channel_ * num_channels_);
703   } else {
704     // IMPROVEMENT this can be done very fast in assembly
705     for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) {
706       int32_t wrap_guard =
707           static_cast<int32_t>(data_[i]) + static_cast<int32_t>(rhs.data_[i]);
708       data_[i] = ClampToInt16(wrap_guard);
709     }
710   }
711   energy_ = 0xffffffff;
712   return *this;
713 }
714 
715 inline AudioFrame& AudioFrame::operator-=(const AudioFrame& rhs) {
716   // Sanity check
717   assert((num_channels_ > 0) && (num_channels_ < 3));
718   assert(interleaved_ == rhs.interleaved_);
719   if ((num_channels_ > 2) || (num_channels_ < 1)) return *this;
720 
721   if ((samples_per_channel_ != rhs.samples_per_channel_) ||
722       (num_channels_ != rhs.num_channels_)) {
723     return *this;
724   }
725   if ((vad_activity_ != kVadPassive) || rhs.vad_activity_ != kVadPassive) {
726     vad_activity_ = kVadUnknown;
727   }
728   speech_type_ = kUndefined;
729 
730   for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) {
731     int32_t wrap_guard =
732         static_cast<int32_t>(data_[i]) - static_cast<int32_t>(rhs.data_[i]);
733     data_[i] = ClampToInt16(wrap_guard);
734   }
735   energy_ = 0xffffffff;
736   return *this;
737 }
738 
IsNewerSequenceNumber(uint16_t sequence_number,uint16_t prev_sequence_number)739 inline bool IsNewerSequenceNumber(uint16_t sequence_number,
740                                   uint16_t prev_sequence_number) {
741   // Distinguish between elements that are exactly 0x8000 apart.
742   // If s1>s2 and |s1-s2| = 0x8000: IsNewer(s1,s2)=true, IsNewer(s2,s1)=false
743   // rather than having IsNewer(s1,s2) = IsNewer(s2,s1) = false.
744   if (static_cast<uint16_t>(sequence_number - prev_sequence_number) == 0x8000) {
745     return sequence_number > prev_sequence_number;
746   }
747   return sequence_number != prev_sequence_number &&
748          static_cast<uint16_t>(sequence_number - prev_sequence_number) < 0x8000;
749 }
750 
IsNewerTimestamp(uint32_t timestamp,uint32_t prev_timestamp)751 inline bool IsNewerTimestamp(uint32_t timestamp, uint32_t prev_timestamp) {
752   // Distinguish between elements that are exactly 0x80000000 apart.
753   // If t1>t2 and |t1-t2| = 0x80000000: IsNewer(t1,t2)=true,
754   // IsNewer(t2,t1)=false
755   // rather than having IsNewer(t1,t2) = IsNewer(t2,t1) = false.
756   if (static_cast<uint32_t>(timestamp - prev_timestamp) == 0x80000000) {
757     return timestamp > prev_timestamp;
758   }
759   return timestamp != prev_timestamp &&
760          static_cast<uint32_t>(timestamp - prev_timestamp) < 0x80000000;
761 }
762 
LatestSequenceNumber(uint16_t sequence_number1,uint16_t sequence_number2)763 inline uint16_t LatestSequenceNumber(uint16_t sequence_number1,
764                                      uint16_t sequence_number2) {
765   return IsNewerSequenceNumber(sequence_number1, sequence_number2)
766              ? sequence_number1
767              : sequence_number2;
768 }
769 
LatestTimestamp(uint32_t timestamp1,uint32_t timestamp2)770 inline uint32_t LatestTimestamp(uint32_t timestamp1, uint32_t timestamp2) {
771   return IsNewerTimestamp(timestamp1, timestamp2) ? timestamp1 : timestamp2;
772 }
773 
774 // Utility class to unwrap a sequence number to a larger type, for easier
775 // handling large ranges. Note that sequence numbers will never be unwrapped
776 // to a negative value.
777 class SequenceNumberUnwrapper {
778  public:
SequenceNumberUnwrapper()779   SequenceNumberUnwrapper() : last_seq_(-1) {}
780 
781   // Get the unwrapped sequence, but don't update the internal state.
UnwrapWithoutUpdate(uint16_t sequence_number)782   int64_t UnwrapWithoutUpdate(uint16_t sequence_number) {
783     if (last_seq_ == -1)
784       return sequence_number;
785 
786     uint16_t cropped_last = static_cast<uint16_t>(last_seq_);
787     int64_t delta = sequence_number - cropped_last;
788     if (IsNewerSequenceNumber(sequence_number, cropped_last)) {
789       if (delta < 0)
790         delta += (1 << 16);  // Wrap forwards.
791     } else if (delta > 0 && (last_seq_ + delta - (1 << 16)) >= 0) {
792       // If sequence_number is older but delta is positive, this is a backwards
793       // wrap-around. However, don't wrap backwards past 0 (unwrapped).
794       delta -= (1 << 16);
795     }
796 
797     return last_seq_ + delta;
798   }
799 
800   // Only update the internal state to the specified last (unwrapped) sequence.
UpdateLast(int64_t last_sequence)801   void UpdateLast(int64_t last_sequence) { last_seq_ = last_sequence; }
802 
803   // Unwrap the sequence number and update the internal state.
Unwrap(uint16_t sequence_number)804   int64_t Unwrap(uint16_t sequence_number) {
805     int64_t unwrapped = UnwrapWithoutUpdate(sequence_number);
806     UpdateLast(unwrapped);
807     return unwrapped;
808   }
809 
810  private:
811   int64_t last_seq_;
812 };
813 
814 }  // namespace webrtc
815 
816 #endif  // MODULE_COMMON_TYPES_H
817