1 /*
2 * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "modules/rtp_rtcp/source/video_rtp_depacketizer_h264.h"
12
13 #include <cstddef>
14 #include <cstdint>
15 #include <utility>
16 #include <vector>
17
18 #include "absl/base/macros.h"
19 #include "absl/types/optional.h"
20 #include "absl/types/variant.h"
21 #include "common_video/h264/h264_common.h"
22 #include "common_video/h264/pps_parser.h"
23 #include "common_video/h264/sps_parser.h"
24 #include "common_video/h264/sps_vui_rewriter.h"
25 #include "modules/rtp_rtcp/source/byte_io.h"
26 #include "modules/rtp_rtcp/source/video_rtp_depacketizer.h"
27 #include "rtc_base/checks.h"
28 #include "rtc_base/copy_on_write_buffer.h"
29 #include "rtc_base/logging.h"
30
31 namespace webrtc {
32 namespace {
33
34 constexpr size_t kNalHeaderSize = 1;
35 constexpr size_t kFuAHeaderSize = 2;
36 constexpr size_t kLengthFieldSize = 2;
37 constexpr size_t kStapAHeaderSize = kNalHeaderSize + kLengthFieldSize;
38
39 // Bit masks for FU (A and B) indicators.
40 enum NalDefs : uint8_t { kFBit = 0x80, kNriMask = 0x60, kTypeMask = 0x1F };
41
42 // Bit masks for FU (A and B) headers.
43 enum FuDefs : uint8_t { kSBit = 0x80, kEBit = 0x40, kRBit = 0x20 };
44
45 // TODO(pbos): Avoid parsing this here as well as inside the jitter buffer.
ParseStapAStartOffsets(const uint8_t * nalu_ptr,size_t length_remaining,std::vector<size_t> * offsets)46 bool ParseStapAStartOffsets(const uint8_t* nalu_ptr,
47 size_t length_remaining,
48 std::vector<size_t>* offsets) {
49 size_t offset = 0;
50 while (length_remaining > 0) {
51 // Buffer doesn't contain room for additional nalu length.
52 if (length_remaining < sizeof(uint16_t))
53 return false;
54 uint16_t nalu_size = ByteReader<uint16_t>::ReadBigEndian(nalu_ptr);
55 nalu_ptr += sizeof(uint16_t);
56 length_remaining -= sizeof(uint16_t);
57 if (nalu_size > length_remaining)
58 return false;
59 nalu_ptr += nalu_size;
60 length_remaining -= nalu_size;
61
62 offsets->push_back(offset + kStapAHeaderSize);
63 offset += kLengthFieldSize + nalu_size;
64 }
65 return true;
66 }
67
ProcessStapAOrSingleNalu(rtc::CopyOnWriteBuffer rtp_payload)68 absl::optional<VideoRtpDepacketizer::ParsedRtpPayload> ProcessStapAOrSingleNalu(
69 rtc::CopyOnWriteBuffer rtp_payload) {
70 const uint8_t* const payload_data = rtp_payload.cdata();
71 absl::optional<VideoRtpDepacketizer::ParsedRtpPayload> parsed_payload(
72 absl::in_place);
73 bool modified_buffer = false;
74 parsed_payload->video_payload = rtp_payload;
75 parsed_payload->video_header.width = 0;
76 parsed_payload->video_header.height = 0;
77 parsed_payload->video_header.codec = kVideoCodecH264;
78 parsed_payload->video_header.simulcastIdx = 0;
79 parsed_payload->video_header.is_first_packet_in_frame = true;
80 auto& h264_header = parsed_payload->video_header.video_type_header
81 .emplace<RTPVideoHeaderH264>();
82
83 const uint8_t* nalu_start = payload_data + kNalHeaderSize;
84 const size_t nalu_length = rtp_payload.size() - kNalHeaderSize;
85 uint8_t nal_type = payload_data[0] & kTypeMask;
86 std::vector<size_t> nalu_start_offsets;
87 if (nal_type == H264::NaluType::kStapA) {
88 // Skip the StapA header (StapA NAL type + length).
89 if (rtp_payload.size() <= kStapAHeaderSize) {
90 RTC_LOG(LS_ERROR) << "StapA header truncated.";
91 return absl::nullopt;
92 }
93
94 if (!ParseStapAStartOffsets(nalu_start, nalu_length, &nalu_start_offsets)) {
95 RTC_LOG(LS_ERROR) << "StapA packet with incorrect NALU packet lengths.";
96 return absl::nullopt;
97 }
98
99 h264_header.packetization_type = kH264StapA;
100 nal_type = payload_data[kStapAHeaderSize] & kTypeMask;
101 } else {
102 h264_header.packetization_type = kH264SingleNalu;
103 nalu_start_offsets.push_back(0);
104 }
105 h264_header.nalu_type = nal_type;
106 parsed_payload->video_header.frame_type = VideoFrameType::kVideoFrameDelta;
107
108 nalu_start_offsets.push_back(rtp_payload.size() +
109 kLengthFieldSize); // End offset.
110 for (size_t i = 0; i < nalu_start_offsets.size() - 1; ++i) {
111 size_t start_offset = nalu_start_offsets[i];
112 // End offset is actually start offset for next unit, excluding length field
113 // so remove that from this units length.
114 size_t end_offset = nalu_start_offsets[i + 1] - kLengthFieldSize;
115 if (end_offset - start_offset < H264::kNaluTypeSize) {
116 RTC_LOG(LS_ERROR) << "STAP-A packet too short";
117 return absl::nullopt;
118 }
119
120 NaluInfo nalu;
121 nalu.type = payload_data[start_offset] & kTypeMask;
122 nalu.sps_id = -1;
123 nalu.pps_id = -1;
124 start_offset += H264::kNaluTypeSize;
125
126 switch (nalu.type) {
127 case H264::NaluType::kSps: {
128 // Check if VUI is present in SPS and if it needs to be modified to
129 // avoid
130 // excessive decoder latency.
131
132 // Copy any previous data first (likely just the first header).
133 rtc::Buffer output_buffer;
134 if (start_offset)
135 output_buffer.AppendData(payload_data, start_offset);
136
137 absl::optional<SpsParser::SpsState> sps;
138
139 SpsVuiRewriter::ParseResult result = SpsVuiRewriter::ParseAndRewriteSps(
140 &payload_data[start_offset], end_offset - start_offset, &sps,
141 nullptr, &output_buffer, SpsVuiRewriter::Direction::kIncoming);
142
143 if (result == SpsVuiRewriter::ParseResult::kVuiRewritten) {
144 if (modified_buffer) {
145 RTC_LOG(LS_WARNING)
146 << "More than one H264 SPS NAL units needing "
147 "rewriting found within a single STAP-A packet. "
148 "Keeping the first and rewriting the last.";
149 }
150
151 // Rewrite length field to new SPS size.
152 if (h264_header.packetization_type == kH264StapA) {
153 size_t length_field_offset =
154 start_offset - (H264::kNaluTypeSize + kLengthFieldSize);
155 // Stap-A Length includes payload data and type header.
156 size_t rewritten_size =
157 output_buffer.size() - start_offset + H264::kNaluTypeSize;
158 ByteWriter<uint16_t>::WriteBigEndian(
159 &output_buffer[length_field_offset], rewritten_size);
160 }
161
162 parsed_payload->video_payload.SetData(output_buffer.data(),
163 output_buffer.size());
164 // Append rest of packet.
165 parsed_payload->video_payload.AppendData(
166 &payload_data[end_offset],
167 nalu_length + kNalHeaderSize - end_offset);
168
169 modified_buffer = true;
170 }
171
172 if (sps) {
173 parsed_payload->video_header.width = sps->width;
174 parsed_payload->video_header.height = sps->height;
175 nalu.sps_id = sps->id;
176 } else {
177 RTC_LOG(LS_WARNING) << "Failed to parse SPS id from SPS slice.";
178 }
179 parsed_payload->video_header.frame_type =
180 VideoFrameType::kVideoFrameKey;
181 break;
182 }
183 case H264::NaluType::kPps: {
184 uint32_t pps_id;
185 uint32_t sps_id;
186 if (PpsParser::ParsePpsIds(&payload_data[start_offset],
187 end_offset - start_offset, &pps_id,
188 &sps_id)) {
189 nalu.pps_id = pps_id;
190 nalu.sps_id = sps_id;
191 } else {
192 RTC_LOG(LS_WARNING)
193 << "Failed to parse PPS id and SPS id from PPS slice.";
194 }
195 break;
196 }
197 case H264::NaluType::kIdr:
198 parsed_payload->video_header.frame_type =
199 VideoFrameType::kVideoFrameKey;
200 ABSL_FALLTHROUGH_INTENDED;
201 case H264::NaluType::kSlice: {
202 absl::optional<uint32_t> pps_id = PpsParser::ParsePpsIdFromSlice(
203 &payload_data[start_offset], end_offset - start_offset);
204 if (pps_id) {
205 nalu.pps_id = *pps_id;
206 } else {
207 RTC_LOG(LS_WARNING) << "Failed to parse PPS id from slice of type: "
208 << static_cast<int>(nalu.type);
209 }
210 break;
211 }
212 // Slices below don't contain SPS or PPS ids.
213 case H264::NaluType::kAud:
214 case H264::NaluType::kEndOfSequence:
215 case H264::NaluType::kEndOfStream:
216 case H264::NaluType::kFiller:
217 case H264::NaluType::kSei:
218 break;
219 case H264::NaluType::kStapA:
220 case H264::NaluType::kFuA:
221 RTC_LOG(LS_WARNING) << "Unexpected STAP-A or FU-A received.";
222 return absl::nullopt;
223 }
224
225 if (h264_header.nalus_length == kMaxNalusPerPacket) {
226 RTC_LOG(LS_WARNING)
227 << "Received packet containing more than " << kMaxNalusPerPacket
228 << " NAL units. Will not keep track sps and pps ids for all of them.";
229 } else {
230 h264_header.nalus[h264_header.nalus_length++] = nalu;
231 }
232 }
233
234 return parsed_payload;
235 }
236
ParseFuaNalu(rtc::CopyOnWriteBuffer rtp_payload)237 absl::optional<VideoRtpDepacketizer::ParsedRtpPayload> ParseFuaNalu(
238 rtc::CopyOnWriteBuffer rtp_payload) {
239 if (rtp_payload.size() < kFuAHeaderSize) {
240 RTC_LOG(LS_ERROR) << "FU-A NAL units truncated.";
241 return absl::nullopt;
242 }
243 absl::optional<VideoRtpDepacketizer::ParsedRtpPayload> parsed_payload(
244 absl::in_place);
245 uint8_t fnri = rtp_payload.cdata()[0] & (kFBit | kNriMask);
246 uint8_t original_nal_type = rtp_payload.cdata()[1] & kTypeMask;
247 bool first_fragment = (rtp_payload.cdata()[1] & kSBit) > 0;
248 NaluInfo nalu;
249 nalu.type = original_nal_type;
250 nalu.sps_id = -1;
251 nalu.pps_id = -1;
252 if (first_fragment) {
253 absl::optional<uint32_t> pps_id =
254 PpsParser::ParsePpsIdFromSlice(rtp_payload.cdata() + 2 * kNalHeaderSize,
255 rtp_payload.size() - 2 * kNalHeaderSize);
256 if (pps_id) {
257 nalu.pps_id = *pps_id;
258 } else {
259 RTC_LOG(LS_WARNING)
260 << "Failed to parse PPS from first fragment of FU-A NAL "
261 "unit with original type: "
262 << static_cast<int>(nalu.type);
263 }
264 uint8_t original_nal_header = fnri | original_nal_type;
265 rtp_payload =
266 rtp_payload.Slice(kNalHeaderSize, rtp_payload.size() - kNalHeaderSize);
267 rtp_payload[0] = original_nal_header;
268 parsed_payload->video_payload = std::move(rtp_payload);
269 } else {
270 parsed_payload->video_payload =
271 rtp_payload.Slice(kFuAHeaderSize, rtp_payload.size() - kFuAHeaderSize);
272 }
273
274 if (original_nal_type == H264::NaluType::kIdr) {
275 parsed_payload->video_header.frame_type = VideoFrameType::kVideoFrameKey;
276 } else {
277 parsed_payload->video_header.frame_type = VideoFrameType::kVideoFrameDelta;
278 }
279 parsed_payload->video_header.width = 0;
280 parsed_payload->video_header.height = 0;
281 parsed_payload->video_header.codec = kVideoCodecH264;
282 parsed_payload->video_header.simulcastIdx = 0;
283 parsed_payload->video_header.is_first_packet_in_frame = first_fragment;
284 auto& h264_header = parsed_payload->video_header.video_type_header
285 .emplace<RTPVideoHeaderH264>();
286 h264_header.packetization_type = kH264FuA;
287 h264_header.nalu_type = original_nal_type;
288 if (first_fragment) {
289 h264_header.nalus[h264_header.nalus_length] = nalu;
290 h264_header.nalus_length = 1;
291 }
292 return parsed_payload;
293 }
294
295 } // namespace
296
297 absl::optional<VideoRtpDepacketizer::ParsedRtpPayload>
Parse(rtc::CopyOnWriteBuffer rtp_payload)298 VideoRtpDepacketizerH264::Parse(rtc::CopyOnWriteBuffer rtp_payload) {
299 if (rtp_payload.size() == 0) {
300 RTC_LOG(LS_ERROR) << "Empty payload.";
301 return absl::nullopt;
302 }
303
304 uint8_t nal_type = rtp_payload.cdata()[0] & kTypeMask;
305
306 if (nal_type == H264::NaluType::kFuA) {
307 // Fragmented NAL units (FU-A).
308 return ParseFuaNalu(std::move(rtp_payload));
309 } else {
310 // We handle STAP-A and single NALU's the same way here. The jitter buffer
311 // will depacketize the STAP-A into NAL units later.
312 return ProcessStapAOrSingleNalu(std::move(rtp_payload));
313 }
314 }
315
316 } // namespace webrtc
317