1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #ifndef MODULE_COMMON_TYPES_H
12 #define MODULE_COMMON_TYPES_H
13
14 #include <assert.h>
15 #include <string.h> // memcpy
16
17 #include <algorithm>
18 #include <limits>
19
20 #include "webrtc/base/constructormagic.h"
21 #include "webrtc/common_types.h"
22 #ifndef WEBRTC_AUDIO_PROCESSING_ONLY_BUILD
23 #include "webrtc/common_video/rotation.h"
24 #endif
25 #include "webrtc/typedefs.h"
26
27 namespace webrtc {
28
29 struct RTPAudioHeader {
30 uint8_t numEnergy; // number of valid entries in arrOfEnergy
31 uint8_t arrOfEnergy[kRtpCsrcSize]; // one energy byte (0-9) per channel
32 bool isCNG; // is this CNG
33 uint8_t channel; // number of channels 2 = stereo
34 };
35
36 const int16_t kNoPictureId = -1;
37 const int16_t kMaxOneBytePictureId = 0x7F; // 7 bits
38 const int16_t kMaxTwoBytePictureId = 0x7FFF; // 15 bits
39 const int16_t kNoTl0PicIdx = -1;
40 const uint8_t kNoTemporalIdx = 0xFF;
41 const uint8_t kNoSpatialIdx = 0xFF;
42 const uint8_t kNoGofIdx = 0xFF;
43 const size_t kMaxVp9RefPics = 3;
44 const size_t kMaxVp9FramesInGof = 0xFF; // 8 bits
45 const size_t kMaxVp9NumberOfSpatialLayers = 8;
46 const int kNoKeyIdx = -1;
47
48 struct RTPVideoHeaderVP8 {
InitRTPVideoHeaderVP8RTPVideoHeaderVP849 void InitRTPVideoHeaderVP8() {
50 nonReference = false;
51 pictureId = kNoPictureId;
52 tl0PicIdx = kNoTl0PicIdx;
53 temporalIdx = kNoTemporalIdx;
54 layerSync = false;
55 keyIdx = kNoKeyIdx;
56 partitionId = 0;
57 beginningOfPartition = false;
58 }
59
60 bool nonReference; // Frame is discardable.
61 int16_t pictureId; // Picture ID index, 15 bits;
62 // kNoPictureId if PictureID does not exist.
63 int16_t tl0PicIdx; // TL0PIC_IDX, 8 bits;
64 // kNoTl0PicIdx means no value provided.
65 uint8_t temporalIdx; // Temporal layer index, or kNoTemporalIdx.
66 bool layerSync; // This frame is a layer sync frame.
67 // Disabled if temporalIdx == kNoTemporalIdx.
68 int keyIdx; // 5 bits; kNoKeyIdx means not used.
69 int partitionId; // VP8 partition ID
70 bool beginningOfPartition; // True if this packet is the first
71 // in a VP8 partition. Otherwise false
72 };
73
74 enum TemporalStructureMode {
75 kTemporalStructureMode1, // 1 temporal layer structure - i.e., IPPP...
76 kTemporalStructureMode2, // 2 temporal layers 0-1-0-1...
77 kTemporalStructureMode3 // 3 temporal layers 0-2-1-2-0-2-1-2...
78 };
79
80 struct GofInfoVP9 {
SetGofInfoVP9GofInfoVP981 void SetGofInfoVP9(TemporalStructureMode tm) {
82 switch (tm) {
83 case kTemporalStructureMode1:
84 num_frames_in_gof = 1;
85 temporal_idx[0] = 0;
86 temporal_up_switch[0] = false;
87 num_ref_pics[0] = 1;
88 pid_diff[0][0] = 1;
89 break;
90 case kTemporalStructureMode2:
91 num_frames_in_gof = 2;
92 temporal_idx[0] = 0;
93 temporal_up_switch[0] = false;
94 num_ref_pics[0] = 1;
95 pid_diff[0][0] = 2;
96
97 temporal_idx[1] = 1;
98 temporal_up_switch[1] = true;
99 num_ref_pics[1] = 1;
100 pid_diff[1][0] = 1;
101 break;
102 case kTemporalStructureMode3:
103 num_frames_in_gof = 4;
104 temporal_idx[0] = 0;
105 temporal_up_switch[0] = false;
106 num_ref_pics[0] = 1;
107 pid_diff[0][0] = 4;
108
109 temporal_idx[1] = 2;
110 temporal_up_switch[1] = true;
111 num_ref_pics[1] = 1;
112 pid_diff[1][0] = 1;
113
114 temporal_idx[2] = 1;
115 temporal_up_switch[2] = true;
116 num_ref_pics[2] = 1;
117 pid_diff[2][0] = 2;
118
119 temporal_idx[3] = 2;
120 temporal_up_switch[3] = false;
121 num_ref_pics[3] = 2;
122 pid_diff[3][0] = 1;
123 pid_diff[3][1] = 2;
124 break;
125 default:
126 assert(false);
127 }
128 }
129
CopyGofInfoVP9GofInfoVP9130 void CopyGofInfoVP9(const GofInfoVP9& src) {
131 num_frames_in_gof = src.num_frames_in_gof;
132 for (size_t i = 0; i < num_frames_in_gof; ++i) {
133 temporal_idx[i] = src.temporal_idx[i];
134 temporal_up_switch[i] = src.temporal_up_switch[i];
135 num_ref_pics[i] = src.num_ref_pics[i];
136 for (size_t r = 0; r < num_ref_pics[i]; ++r) {
137 pid_diff[i][r] = src.pid_diff[i][r];
138 }
139 }
140 }
141
142 size_t num_frames_in_gof;
143 uint8_t temporal_idx[kMaxVp9FramesInGof];
144 bool temporal_up_switch[kMaxVp9FramesInGof];
145 size_t num_ref_pics[kMaxVp9FramesInGof];
146 int16_t pid_diff[kMaxVp9FramesInGof][kMaxVp9RefPics];
147 };
148
149 struct RTPVideoHeaderVP9 {
InitRTPVideoHeaderVP9RTPVideoHeaderVP9150 void InitRTPVideoHeaderVP9() {
151 inter_pic_predicted = false;
152 flexible_mode = false;
153 beginning_of_frame = false;
154 end_of_frame = false;
155 ss_data_available = false;
156 picture_id = kNoPictureId;
157 max_picture_id = kMaxTwoBytePictureId;
158 tl0_pic_idx = kNoTl0PicIdx;
159 temporal_idx = kNoTemporalIdx;
160 spatial_idx = kNoSpatialIdx;
161 temporal_up_switch = false;
162 inter_layer_predicted = false;
163 gof_idx = kNoGofIdx;
164 num_ref_pics = 0;
165 num_spatial_layers = 1;
166 }
167
168 bool inter_pic_predicted; // This layer frame is dependent on previously
169 // coded frame(s).
170 bool flexible_mode; // This frame is in flexible mode.
171 bool beginning_of_frame; // True if this packet is the first in a VP9 layer
172 // frame.
173 bool end_of_frame; // True if this packet is the last in a VP9 layer frame.
174 bool ss_data_available; // True if SS data is available in this payload
175 // descriptor.
176 int16_t picture_id; // PictureID index, 15 bits;
177 // kNoPictureId if PictureID does not exist.
178 int16_t max_picture_id; // Maximum picture ID index; either 0x7F or 0x7FFF;
179 int16_t tl0_pic_idx; // TL0PIC_IDX, 8 bits;
180 // kNoTl0PicIdx means no value provided.
181 uint8_t temporal_idx; // Temporal layer index, or kNoTemporalIdx.
182 uint8_t spatial_idx; // Spatial layer index, or kNoSpatialIdx.
183 bool temporal_up_switch; // True if upswitch to higher frame rate is possible
184 // starting from this frame.
185 bool inter_layer_predicted; // Frame is dependent on directly lower spatial
186 // layer frame.
187
188 uint8_t gof_idx; // Index to predefined temporal frame info in SS data.
189
190 size_t num_ref_pics; // Number of reference pictures used by this layer
191 // frame.
192 int16_t pid_diff[kMaxVp9RefPics]; // P_DIFF signaled to derive the PictureID
193 // of the reference pictures.
194 int16_t ref_picture_id[kMaxVp9RefPics]; // PictureID of reference pictures.
195
196 // SS data.
197 size_t num_spatial_layers; // Always populated.
198 bool spatial_layer_resolution_present;
199 uint16_t width[kMaxVp9NumberOfSpatialLayers];
200 uint16_t height[kMaxVp9NumberOfSpatialLayers];
201 GofInfoVP9 gof;
202 };
203
204 // The packetization types that we support: single, aggregated, and fragmented.
205 enum H264PacketizationTypes {
206 kH264SingleNalu, // This packet contains a single NAL unit.
207 kH264StapA, // This packet contains STAP-A (single time
208 // aggregation) packets. If this packet has an
209 // associated NAL unit type, it'll be for the
210 // first such aggregated packet.
211 kH264FuA, // This packet contains a FU-A (fragmentation
212 // unit) packet, meaning it is a part of a frame
213 // that was too large to fit into a single packet.
214 };
215
216 struct RTPVideoHeaderH264 {
217 uint8_t nalu_type; // The NAL unit type. If this is a header for a
218 // fragmented packet, it's the NAL unit type of
219 // the original data. If this is the header for an
220 // aggregated packet, it's the NAL unit type of
221 // the first NAL unit in the packet.
222 H264PacketizationTypes packetization_type;
223 };
224
225 union RTPVideoTypeHeader {
226 RTPVideoHeaderVP8 VP8;
227 RTPVideoHeaderVP9 VP9;
228 RTPVideoHeaderH264 H264;
229 };
230
231 enum RtpVideoCodecTypes {
232 kRtpVideoNone,
233 kRtpVideoGeneric,
234 kRtpVideoVp8,
235 kRtpVideoVp9,
236 kRtpVideoH264
237 };
238 #ifndef WEBRTC_AUDIO_PROCESSING_ONLY_BUILD
239 // Since RTPVideoHeader is used as a member of a union, it can't have a
240 // non-trivial default constructor.
241 struct RTPVideoHeader {
242 uint16_t width; // size
243 uint16_t height;
244 VideoRotation rotation;
245
246 bool isFirstPacket; // first packet in frame
247 uint8_t simulcastIdx; // Index if the simulcast encoder creating
248 // this frame, 0 if not using simulcast.
249 RtpVideoCodecTypes codec;
250 RTPVideoTypeHeader codecHeader;
251 };
252 #endif
253 union RTPTypeHeader {
254 RTPAudioHeader Audio;
255 #ifndef WEBRTC_AUDIO_PROCESSING_ONLY_BUILD
256 RTPVideoHeader Video;
257 #endif
258 };
259
260 struct WebRtcRTPHeader {
261 RTPHeader header;
262 FrameType frameType;
263 RTPTypeHeader type;
264 // NTP time of the capture time in local timebase in milliseconds.
265 int64_t ntp_time_ms;
266 };
267
268 class RTPFragmentationHeader {
269 public:
RTPFragmentationHeader()270 RTPFragmentationHeader()
271 : fragmentationVectorSize(0),
272 fragmentationOffset(NULL),
273 fragmentationLength(NULL),
274 fragmentationTimeDiff(NULL),
275 fragmentationPlType(NULL) {};
276
~RTPFragmentationHeader()277 ~RTPFragmentationHeader() {
278 delete[] fragmentationOffset;
279 delete[] fragmentationLength;
280 delete[] fragmentationTimeDiff;
281 delete[] fragmentationPlType;
282 }
283
CopyFrom(const RTPFragmentationHeader & src)284 void CopyFrom(const RTPFragmentationHeader& src) {
285 if (this == &src) {
286 return;
287 }
288
289 if (src.fragmentationVectorSize != fragmentationVectorSize) {
290 // new size of vectors
291
292 // delete old
293 delete[] fragmentationOffset;
294 fragmentationOffset = NULL;
295 delete[] fragmentationLength;
296 fragmentationLength = NULL;
297 delete[] fragmentationTimeDiff;
298 fragmentationTimeDiff = NULL;
299 delete[] fragmentationPlType;
300 fragmentationPlType = NULL;
301
302 if (src.fragmentationVectorSize > 0) {
303 // allocate new
304 if (src.fragmentationOffset) {
305 fragmentationOffset = new size_t[src.fragmentationVectorSize];
306 }
307 if (src.fragmentationLength) {
308 fragmentationLength = new size_t[src.fragmentationVectorSize];
309 }
310 if (src.fragmentationTimeDiff) {
311 fragmentationTimeDiff = new uint16_t[src.fragmentationVectorSize];
312 }
313 if (src.fragmentationPlType) {
314 fragmentationPlType = new uint8_t[src.fragmentationVectorSize];
315 }
316 }
317 // set new size
318 fragmentationVectorSize = src.fragmentationVectorSize;
319 }
320
321 if (src.fragmentationVectorSize > 0) {
322 // copy values
323 if (src.fragmentationOffset) {
324 memcpy(fragmentationOffset, src.fragmentationOffset,
325 src.fragmentationVectorSize * sizeof(size_t));
326 }
327 if (src.fragmentationLength) {
328 memcpy(fragmentationLength, src.fragmentationLength,
329 src.fragmentationVectorSize * sizeof(size_t));
330 }
331 if (src.fragmentationTimeDiff) {
332 memcpy(fragmentationTimeDiff, src.fragmentationTimeDiff,
333 src.fragmentationVectorSize * sizeof(uint16_t));
334 }
335 if (src.fragmentationPlType) {
336 memcpy(fragmentationPlType, src.fragmentationPlType,
337 src.fragmentationVectorSize * sizeof(uint8_t));
338 }
339 }
340 }
341
VerifyAndAllocateFragmentationHeader(const size_t size)342 void VerifyAndAllocateFragmentationHeader(const size_t size) {
343 assert(size <= std::numeric_limits<uint16_t>::max());
344 const uint16_t size16 = static_cast<uint16_t>(size);
345 if (fragmentationVectorSize < size16) {
346 uint16_t oldVectorSize = fragmentationVectorSize;
347 {
348 // offset
349 size_t* oldOffsets = fragmentationOffset;
350 fragmentationOffset = new size_t[size16];
351 memset(fragmentationOffset + oldVectorSize, 0,
352 sizeof(size_t) * (size16 - oldVectorSize));
353 // copy old values
354 memcpy(fragmentationOffset, oldOffsets,
355 sizeof(size_t) * oldVectorSize);
356 delete[] oldOffsets;
357 }
358 // length
359 {
360 size_t* oldLengths = fragmentationLength;
361 fragmentationLength = new size_t[size16];
362 memset(fragmentationLength + oldVectorSize, 0,
363 sizeof(size_t) * (size16 - oldVectorSize));
364 memcpy(fragmentationLength, oldLengths,
365 sizeof(size_t) * oldVectorSize);
366 delete[] oldLengths;
367 }
368 // time diff
369 {
370 uint16_t* oldTimeDiffs = fragmentationTimeDiff;
371 fragmentationTimeDiff = new uint16_t[size16];
372 memset(fragmentationTimeDiff + oldVectorSize, 0,
373 sizeof(uint16_t) * (size16 - oldVectorSize));
374 memcpy(fragmentationTimeDiff, oldTimeDiffs,
375 sizeof(uint16_t) * oldVectorSize);
376 delete[] oldTimeDiffs;
377 }
378 // payload type
379 {
380 uint8_t* oldTimePlTypes = fragmentationPlType;
381 fragmentationPlType = new uint8_t[size16];
382 memset(fragmentationPlType + oldVectorSize, 0,
383 sizeof(uint8_t) * (size16 - oldVectorSize));
384 memcpy(fragmentationPlType, oldTimePlTypes,
385 sizeof(uint8_t) * oldVectorSize);
386 delete[] oldTimePlTypes;
387 }
388 fragmentationVectorSize = size16;
389 }
390 }
391
392 uint16_t fragmentationVectorSize; // Number of fragmentations
393 size_t* fragmentationOffset; // Offset of pointer to data for each
394 // fragmentation
395 size_t* fragmentationLength; // Data size for each fragmentation
396 uint16_t* fragmentationTimeDiff; // Timestamp difference relative "now" for
397 // each fragmentation
398 uint8_t* fragmentationPlType; // Payload type of each fragmentation
399
400 private:
401 RTC_DISALLOW_COPY_AND_ASSIGN(RTPFragmentationHeader);
402 };
403
404 struct RTCPVoIPMetric {
405 // RFC 3611 4.7
406 uint8_t lossRate;
407 uint8_t discardRate;
408 uint8_t burstDensity;
409 uint8_t gapDensity;
410 uint16_t burstDuration;
411 uint16_t gapDuration;
412 uint16_t roundTripDelay;
413 uint16_t endSystemDelay;
414 uint8_t signalLevel;
415 uint8_t noiseLevel;
416 uint8_t RERL;
417 uint8_t Gmin;
418 uint8_t Rfactor;
419 uint8_t extRfactor;
420 uint8_t MOSLQ;
421 uint8_t MOSCQ;
422 uint8_t RXconfig;
423 uint16_t JBnominal;
424 uint16_t JBmax;
425 uint16_t JBabsMax;
426 };
427
428 // Types for the FEC packet masks. The type |kFecMaskRandom| is based on a
429 // random loss model. The type |kFecMaskBursty| is based on a bursty/consecutive
430 // loss model. The packet masks are defined in
431 // modules/rtp_rtcp/fec_private_tables_random(bursty).h
432 enum FecMaskType {
433 kFecMaskRandom,
434 kFecMaskBursty,
435 };
436
437 // Struct containing forward error correction settings.
438 struct FecProtectionParams {
439 int fec_rate;
440 bool use_uep_protection;
441 int max_fec_frames;
442 FecMaskType fec_mask_type;
443 };
444
445 // Interface used by the CallStats class to distribute call statistics.
446 // Callbacks will be triggered as soon as the class has been registered to a
447 // CallStats object using RegisterStatsObserver.
448 class CallStatsObserver {
449 public:
450 virtual void OnRttUpdate(int64_t avg_rtt_ms, int64_t max_rtt_ms) = 0;
451
~CallStatsObserver()452 virtual ~CallStatsObserver() {}
453 };
454
455 struct VideoContentMetrics {
VideoContentMetricsVideoContentMetrics456 VideoContentMetrics()
457 : motion_magnitude(0.0f),
458 spatial_pred_err(0.0f),
459 spatial_pred_err_h(0.0f),
460 spatial_pred_err_v(0.0f) {}
461
ResetVideoContentMetrics462 void Reset() {
463 motion_magnitude = 0.0f;
464 spatial_pred_err = 0.0f;
465 spatial_pred_err_h = 0.0f;
466 spatial_pred_err_v = 0.0f;
467 }
468 float motion_magnitude;
469 float spatial_pred_err;
470 float spatial_pred_err_h;
471 float spatial_pred_err_v;
472 };
473
474 /* This class holds up to 60 ms of super-wideband (32 kHz) stereo audio. It
475 * allows for adding and subtracting frames while keeping track of the resulting
476 * states.
477 *
478 * Notes
479 * - The total number of samples in |data_| is
480 * samples_per_channel_ * num_channels_
481 *
482 * - Stereo data is interleaved starting with the left channel.
483 *
484 * - The +operator assume that you would never add exactly opposite frames when
485 * deciding the resulting state. To do this use the -operator.
486 */
487 class AudioFrame {
488 public:
489 // Stereo, 32 kHz, 60 ms (2 * 32 * 60)
490 static const size_t kMaxDataSizeSamples = 3840;
491
492 enum VADActivity {
493 kVadActive = 0,
494 kVadPassive = 1,
495 kVadUnknown = 2
496 };
497 enum SpeechType {
498 kNormalSpeech = 0,
499 kPLC = 1,
500 kCNG = 2,
501 kPLCCNG = 3,
502 kUndefined = 4
503 };
504
505 AudioFrame();
~AudioFrame()506 virtual ~AudioFrame() {}
507
508 // Resets all members to their default state (except does not modify the
509 // contents of |data_|).
510 void Reset();
511
512 // |interleaved_| is not changed by this method.
513 void UpdateFrame(int id, uint32_t timestamp, const int16_t* data,
514 size_t samples_per_channel, int sample_rate_hz,
515 SpeechType speech_type, VADActivity vad_activity,
516 int num_channels = 1, uint32_t energy = -1);
517
518 AudioFrame& Append(const AudioFrame& rhs);
519
520 void CopyFrom(const AudioFrame& src);
521
522 void Mute();
523
524 AudioFrame& operator>>=(const int rhs);
525 AudioFrame& operator+=(const AudioFrame& rhs);
526 AudioFrame& operator-=(const AudioFrame& rhs);
527
528 int id_;
529 // RTP timestamp of the first sample in the AudioFrame.
530 uint32_t timestamp_;
531 // Time since the first frame in milliseconds.
532 // -1 represents an uninitialized value.
533 int64_t elapsed_time_ms_;
534 // NTP time of the estimated capture time in local timebase in milliseconds.
535 // -1 represents an uninitialized value.
536 int64_t ntp_time_ms_;
537 int16_t data_[kMaxDataSizeSamples];
538 size_t samples_per_channel_;
539 int sample_rate_hz_;
540 int num_channels_;
541 SpeechType speech_type_;
542 VADActivity vad_activity_;
543 // Note that there is no guarantee that |energy_| is correct. Any user of this
544 // member must verify that the value is correct.
545 // TODO(henrike) Remove |energy_|.
546 // See https://code.google.com/p/webrtc/issues/detail?id=3315.
547 uint32_t energy_;
548 bool interleaved_;
549
550 private:
551 RTC_DISALLOW_COPY_AND_ASSIGN(AudioFrame);
552 };
553
AudioFrame()554 inline AudioFrame::AudioFrame()
555 : data_() {
556 Reset();
557 }
558
Reset()559 inline void AudioFrame::Reset() {
560 id_ = -1;
561 // TODO(wu): Zero is a valid value for |timestamp_|. We should initialize
562 // to an invalid value, or add a new member to indicate invalidity.
563 timestamp_ = 0;
564 elapsed_time_ms_ = -1;
565 ntp_time_ms_ = -1;
566 samples_per_channel_ = 0;
567 sample_rate_hz_ = 0;
568 num_channels_ = 0;
569 speech_type_ = kUndefined;
570 vad_activity_ = kVadUnknown;
571 energy_ = 0xffffffff;
572 interleaved_ = true;
573 }
574
UpdateFrame(int id,uint32_t timestamp,const int16_t * data,size_t samples_per_channel,int sample_rate_hz,SpeechType speech_type,VADActivity vad_activity,int num_channels,uint32_t energy)575 inline void AudioFrame::UpdateFrame(int id,
576 uint32_t timestamp,
577 const int16_t* data,
578 size_t samples_per_channel,
579 int sample_rate_hz,
580 SpeechType speech_type,
581 VADActivity vad_activity,
582 int num_channels,
583 uint32_t energy) {
584 id_ = id;
585 timestamp_ = timestamp;
586 samples_per_channel_ = samples_per_channel;
587 sample_rate_hz_ = sample_rate_hz;
588 speech_type_ = speech_type;
589 vad_activity_ = vad_activity;
590 num_channels_ = num_channels;
591 energy_ = energy;
592
593 assert(num_channels >= 0);
594 const size_t length = samples_per_channel * num_channels;
595 assert(length <= kMaxDataSizeSamples);
596 if (data != NULL) {
597 memcpy(data_, data, sizeof(int16_t) * length);
598 } else {
599 memset(data_, 0, sizeof(int16_t) * length);
600 }
601 }
602
CopyFrom(const AudioFrame & src)603 inline void AudioFrame::CopyFrom(const AudioFrame& src) {
604 if (this == &src) return;
605
606 id_ = src.id_;
607 timestamp_ = src.timestamp_;
608 elapsed_time_ms_ = src.elapsed_time_ms_;
609 ntp_time_ms_ = src.ntp_time_ms_;
610 samples_per_channel_ = src.samples_per_channel_;
611 sample_rate_hz_ = src.sample_rate_hz_;
612 speech_type_ = src.speech_type_;
613 vad_activity_ = src.vad_activity_;
614 num_channels_ = src.num_channels_;
615 energy_ = src.energy_;
616 interleaved_ = src.interleaved_;
617
618 assert(num_channels_ >= 0);
619 const size_t length = samples_per_channel_ * num_channels_;
620 assert(length <= kMaxDataSizeSamples);
621 memcpy(data_, src.data_, sizeof(int16_t) * length);
622 }
623
Mute()624 inline void AudioFrame::Mute() {
625 memset(data_, 0, samples_per_channel_ * num_channels_ * sizeof(int16_t));
626 }
627
628 inline AudioFrame& AudioFrame::operator>>=(const int rhs) {
629 assert((num_channels_ > 0) && (num_channels_ < 3));
630 if ((num_channels_ > 2) || (num_channels_ < 1)) return *this;
631
632 for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) {
633 data_[i] = static_cast<int16_t>(data_[i] >> rhs);
634 }
635 return *this;
636 }
637
Append(const AudioFrame & rhs)638 inline AudioFrame& AudioFrame::Append(const AudioFrame& rhs) {
639 // Sanity check
640 assert((num_channels_ > 0) && (num_channels_ < 3));
641 assert(interleaved_ == rhs.interleaved_);
642 if ((num_channels_ > 2) || (num_channels_ < 1)) return *this;
643 if (num_channels_ != rhs.num_channels_) return *this;
644
645 if ((vad_activity_ == kVadActive) || rhs.vad_activity_ == kVadActive) {
646 vad_activity_ = kVadActive;
647 } else if (vad_activity_ == kVadUnknown || rhs.vad_activity_ == kVadUnknown) {
648 vad_activity_ = kVadUnknown;
649 }
650 if (speech_type_ != rhs.speech_type_) {
651 speech_type_ = kUndefined;
652 }
653
654 size_t offset = samples_per_channel_ * num_channels_;
655 for (size_t i = 0; i < rhs.samples_per_channel_ * rhs.num_channels_; i++) {
656 data_[offset + i] = rhs.data_[i];
657 }
658 samples_per_channel_ += rhs.samples_per_channel_;
659 return *this;
660 }
661
662 namespace {
ClampToInt16(int32_t input)663 inline int16_t ClampToInt16(int32_t input) {
664 if (input < -0x00008000) {
665 return -0x8000;
666 } else if (input > 0x00007FFF) {
667 return 0x7FFF;
668 } else {
669 return static_cast<int16_t>(input);
670 }
671 }
672 }
673
674 inline AudioFrame& AudioFrame::operator+=(const AudioFrame& rhs) {
675 // Sanity check
676 assert((num_channels_ > 0) && (num_channels_ < 3));
677 assert(interleaved_ == rhs.interleaved_);
678 if ((num_channels_ > 2) || (num_channels_ < 1)) return *this;
679 if (num_channels_ != rhs.num_channels_) return *this;
680
681 bool noPrevData = false;
682 if (samples_per_channel_ != rhs.samples_per_channel_) {
683 if (samples_per_channel_ == 0) {
684 // special case we have no data to start with
685 samples_per_channel_ = rhs.samples_per_channel_;
686 noPrevData = true;
687 } else {
688 return *this;
689 }
690 }
691
692 if ((vad_activity_ == kVadActive) || rhs.vad_activity_ == kVadActive) {
693 vad_activity_ = kVadActive;
694 } else if (vad_activity_ == kVadUnknown || rhs.vad_activity_ == kVadUnknown) {
695 vad_activity_ = kVadUnknown;
696 }
697
698 if (speech_type_ != rhs.speech_type_) speech_type_ = kUndefined;
699
700 if (noPrevData) {
701 memcpy(data_, rhs.data_,
702 sizeof(int16_t) * rhs.samples_per_channel_ * num_channels_);
703 } else {
704 // IMPROVEMENT this can be done very fast in assembly
705 for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) {
706 int32_t wrap_guard =
707 static_cast<int32_t>(data_[i]) + static_cast<int32_t>(rhs.data_[i]);
708 data_[i] = ClampToInt16(wrap_guard);
709 }
710 }
711 energy_ = 0xffffffff;
712 return *this;
713 }
714
715 inline AudioFrame& AudioFrame::operator-=(const AudioFrame& rhs) {
716 // Sanity check
717 assert((num_channels_ > 0) && (num_channels_ < 3));
718 assert(interleaved_ == rhs.interleaved_);
719 if ((num_channels_ > 2) || (num_channels_ < 1)) return *this;
720
721 if ((samples_per_channel_ != rhs.samples_per_channel_) ||
722 (num_channels_ != rhs.num_channels_)) {
723 return *this;
724 }
725 if ((vad_activity_ != kVadPassive) || rhs.vad_activity_ != kVadPassive) {
726 vad_activity_ = kVadUnknown;
727 }
728 speech_type_ = kUndefined;
729
730 for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) {
731 int32_t wrap_guard =
732 static_cast<int32_t>(data_[i]) - static_cast<int32_t>(rhs.data_[i]);
733 data_[i] = ClampToInt16(wrap_guard);
734 }
735 energy_ = 0xffffffff;
736 return *this;
737 }
738
IsNewerSequenceNumber(uint16_t sequence_number,uint16_t prev_sequence_number)739 inline bool IsNewerSequenceNumber(uint16_t sequence_number,
740 uint16_t prev_sequence_number) {
741 // Distinguish between elements that are exactly 0x8000 apart.
742 // If s1>s2 and |s1-s2| = 0x8000: IsNewer(s1,s2)=true, IsNewer(s2,s1)=false
743 // rather than having IsNewer(s1,s2) = IsNewer(s2,s1) = false.
744 if (static_cast<uint16_t>(sequence_number - prev_sequence_number) == 0x8000) {
745 return sequence_number > prev_sequence_number;
746 }
747 return sequence_number != prev_sequence_number &&
748 static_cast<uint16_t>(sequence_number - prev_sequence_number) < 0x8000;
749 }
750
IsNewerTimestamp(uint32_t timestamp,uint32_t prev_timestamp)751 inline bool IsNewerTimestamp(uint32_t timestamp, uint32_t prev_timestamp) {
752 // Distinguish between elements that are exactly 0x80000000 apart.
753 // If t1>t2 and |t1-t2| = 0x80000000: IsNewer(t1,t2)=true,
754 // IsNewer(t2,t1)=false
755 // rather than having IsNewer(t1,t2) = IsNewer(t2,t1) = false.
756 if (static_cast<uint32_t>(timestamp - prev_timestamp) == 0x80000000) {
757 return timestamp > prev_timestamp;
758 }
759 return timestamp != prev_timestamp &&
760 static_cast<uint32_t>(timestamp - prev_timestamp) < 0x80000000;
761 }
762
LatestSequenceNumber(uint16_t sequence_number1,uint16_t sequence_number2)763 inline uint16_t LatestSequenceNumber(uint16_t sequence_number1,
764 uint16_t sequence_number2) {
765 return IsNewerSequenceNumber(sequence_number1, sequence_number2)
766 ? sequence_number1
767 : sequence_number2;
768 }
769
LatestTimestamp(uint32_t timestamp1,uint32_t timestamp2)770 inline uint32_t LatestTimestamp(uint32_t timestamp1, uint32_t timestamp2) {
771 return IsNewerTimestamp(timestamp1, timestamp2) ? timestamp1 : timestamp2;
772 }
773
774 // Utility class to unwrap a sequence number to a larger type, for easier
775 // handling large ranges. Note that sequence numbers will never be unwrapped
776 // to a negative value.
777 class SequenceNumberUnwrapper {
778 public:
SequenceNumberUnwrapper()779 SequenceNumberUnwrapper() : last_seq_(-1) {}
780
781 // Get the unwrapped sequence, but don't update the internal state.
UnwrapWithoutUpdate(uint16_t sequence_number)782 int64_t UnwrapWithoutUpdate(uint16_t sequence_number) {
783 if (last_seq_ == -1)
784 return sequence_number;
785
786 uint16_t cropped_last = static_cast<uint16_t>(last_seq_);
787 int64_t delta = sequence_number - cropped_last;
788 if (IsNewerSequenceNumber(sequence_number, cropped_last)) {
789 if (delta < 0)
790 delta += (1 << 16); // Wrap forwards.
791 } else if (delta > 0 && (last_seq_ + delta - (1 << 16)) >= 0) {
792 // If sequence_number is older but delta is positive, this is a backwards
793 // wrap-around. However, don't wrap backwards past 0 (unwrapped).
794 delta -= (1 << 16);
795 }
796
797 return last_seq_ + delta;
798 }
799
800 // Only update the internal state to the specified last (unwrapped) sequence.
UpdateLast(int64_t last_sequence)801 void UpdateLast(int64_t last_sequence) { last_seq_ = last_sequence; }
802
803 // Unwrap the sequence number and update the internal state.
Unwrap(uint16_t sequence_number)804 int64_t Unwrap(uint16_t sequence_number) {
805 int64_t unwrapped = UnwrapWithoutUpdate(sequence_number);
806 UpdateLast(unwrapped);
807 return unwrapped;
808 }
809
810 private:
811 int64_t last_seq_;
812 };
813
814 } // namespace webrtc
815
816 #endif // MODULE_COMMON_TYPES_H
817