1 // Copyright (c) 2007-2017 Fredrik Mellbin
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20
21 #include "videosource.h"
22 #include "indexing.h"
23 #include "videoutils.h"
24 #include <algorithm>
25 #include <thread>
26
27
SanityCheckFrameForData(AVFrame * Frame)28 void FFMS_VideoSource::SanityCheckFrameForData(AVFrame *Frame) {
29 for (int i = 0; i < 4; i++) {
30 if (Frame->data[i] != nullptr && Frame->linesize[i] != 0)
31 return;
32 }
33
34 throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_CODEC, "Insanity detected: decoder returned an empty frame");
35 }
36
GetFrameCheck(int n)37 void FFMS_VideoSource::GetFrameCheck(int n) {
38 if (n < 0 || n >= VP.NumFrames)
39 throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_INVALID_ARGUMENT,
40 "Out of bounds frame requested");
41 }
42
OutputFrame(AVFrame * Frame)43 FFMS_Frame *FFMS_VideoSource::OutputFrame(AVFrame *Frame) {
44 SanityCheckFrameForData(Frame);
45
46 if (LastFrameWidth != Frame->width || LastFrameHeight != Frame->height || LastFramePixelFormat != Frame->format) {
47 if (TargetHeight > 0 && TargetWidth > 0 && !TargetPixelFormats.empty()) {
48 if (!InputFormatOverridden) {
49 InputFormat = AV_PIX_FMT_NONE;
50 InputColorSpace = AVCOL_SPC_UNSPECIFIED;
51 InputColorRange = AVCOL_RANGE_UNSPECIFIED;
52 }
53
54 ReAdjustOutputFormat(Frame);
55 } else {
56 OutputFormat = (AVPixelFormat) Frame->format;
57 }
58 }
59
60 if (SWS) {
61 sws_scale(SWS, Frame->data, Frame->linesize, 0, Frame->height, SWSFrameData, SWSFrameLinesize);
62 for (int i = 0; i < 4; i++) {
63 LocalFrame.Data[i] = SWSFrameData[i];
64 LocalFrame.Linesize[i] = SWSFrameLinesize[i];
65 }
66 } else {
67 // Special case to avoid ugly casts
68 for (int i = 0; i < 4; i++) {
69 LocalFrame.Data[i] = Frame->data[i];
70 LocalFrame.Linesize[i] = Frame->linesize[i];
71 }
72 }
73
74 LocalFrame.EncodedWidth = Frame->width;
75 LocalFrame.EncodedHeight = Frame->height;
76 LocalFrame.EncodedPixelFormat = Frame->format;
77 LocalFrame.ScaledWidth = TargetWidth;
78 LocalFrame.ScaledHeight = TargetHeight;
79 LocalFrame.ConvertedPixelFormat = OutputFormat;
80 LocalFrame.KeyFrame = Frame->key_frame;
81 LocalFrame.PictType = av_get_picture_type_char(Frame->pict_type);
82 LocalFrame.RepeatPict = Frame->repeat_pict;
83 LocalFrame.InterlacedFrame = Frame->interlaced_frame;
84 LocalFrame.TopFieldFirst = Frame->top_field_first;
85 LocalFrame.ColorSpace = OutputColorSpaceSet ? OutputColorSpace : Frame->colorspace;
86 LocalFrame.ColorRange = OutputColorRangeSet ? OutputColorRange : Frame->color_range;
87 LocalFrame.ColorPrimaries = (OutputColorPrimaries >= 0) ? OutputColorPrimaries : Frame->color_primaries;
88 LocalFrame.TransferCharateristics = (OutputTransferCharateristics >= 0) ? OutputTransferCharateristics : Frame->color_trc;
89 LocalFrame.ChromaLocation = (OutputChromaLocation >= 0) ? OutputChromaLocation : Frame->chroma_location;
90
91 const AVFrameSideData *MasteringDisplaySideData = av_frame_get_side_data(Frame, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA);
92 if (MasteringDisplaySideData) {
93 const AVMasteringDisplayMetadata *MasteringDisplay = reinterpret_cast<const AVMasteringDisplayMetadata *>(MasteringDisplaySideData->data);
94 if (MasteringDisplay->has_primaries) {
95 LocalFrame.HasMasteringDisplayPrimaries = MasteringDisplay->has_primaries;
96 for (int i = 0; i < 3; i++) {
97 LocalFrame.MasteringDisplayPrimariesX[i] = av_q2d(MasteringDisplay->display_primaries[i][0]);
98 LocalFrame.MasteringDisplayPrimariesY[i] = av_q2d(MasteringDisplay->display_primaries[i][1]);
99 }
100 LocalFrame.MasteringDisplayWhitePointX = av_q2d(MasteringDisplay->white_point[0]);
101 LocalFrame.MasteringDisplayWhitePointY = av_q2d(MasteringDisplay->white_point[1]);
102 }
103 if (MasteringDisplay->has_luminance) {
104 LocalFrame.HasMasteringDisplayLuminance = MasteringDisplay->has_luminance;
105 LocalFrame.MasteringDisplayMinLuminance = av_q2d(MasteringDisplay->min_luminance);
106 LocalFrame.MasteringDisplayMaxLuminance = av_q2d(MasteringDisplay->max_luminance);
107 }
108 }
109 LocalFrame.HasMasteringDisplayPrimaries = !!LocalFrame.MasteringDisplayPrimariesX[0] && !!LocalFrame.MasteringDisplayPrimariesY[0] &&
110 !!LocalFrame.MasteringDisplayPrimariesX[1] && !!LocalFrame.MasteringDisplayPrimariesY[1] &&
111 !!LocalFrame.MasteringDisplayPrimariesX[2] && !!LocalFrame.MasteringDisplayPrimariesY[2] &&
112 !!LocalFrame.MasteringDisplayWhitePointX && !!LocalFrame.MasteringDisplayWhitePointY;
113 /* MasteringDisplayMinLuminance can be 0 */
114 LocalFrame.HasMasteringDisplayLuminance = !!LocalFrame.MasteringDisplayMaxLuminance;
115
116 const AVFrameSideData *ContentLightSideData = av_frame_get_side_data(Frame, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL);
117 if (ContentLightSideData) {
118 const AVContentLightMetadata *ContentLightLevel = reinterpret_cast<const AVContentLightMetadata *>(ContentLightSideData->data);
119 LocalFrame.ContentLightLevelMax = ContentLightLevel->MaxCLL;
120 LocalFrame.ContentLightLevelAverage = ContentLightLevel->MaxFALL;
121 }
122 /* Only check for either of them */
123 LocalFrame.HasContentLightLevel = !!LocalFrame.ContentLightLevelMax || !!LocalFrame.ContentLightLevelAverage;
124
125 LastFrameHeight = Frame->height;
126 LastFrameWidth = Frame->width;
127 LastFramePixelFormat = (AVPixelFormat) Frame->format;
128
129 return &LocalFrame;
130 }
131
FFMS_VideoSource(const char * SourceFile,FFMS_Index & Index,int Track,int Threads,int SeekMode)132 FFMS_VideoSource::FFMS_VideoSource(const char *SourceFile, FFMS_Index &Index, int Track, int Threads, int SeekMode)
133 : Index(Index), SeekMode(SeekMode) {
134
135 try {
136 if (Track < 0 || Track >= static_cast<int>(Index.size()))
137 throw FFMS_Exception(FFMS_ERROR_INDEX, FFMS_ERROR_INVALID_ARGUMENT,
138 "Out of bounds track index selected");
139
140 if (Index[Track].TT != FFMS_TYPE_VIDEO)
141 throw FFMS_Exception(FFMS_ERROR_INDEX, FFMS_ERROR_INVALID_ARGUMENT,
142 "Not a video track");
143
144 if (Index[Track].empty())
145 throw FFMS_Exception(FFMS_ERROR_INDEX, FFMS_ERROR_INVALID_ARGUMENT,
146 "Video track contains no frames");
147
148 if (!Index.CompareFileSignature(SourceFile))
149 throw FFMS_Exception(FFMS_ERROR_INDEX, FFMS_ERROR_FILE_MISMATCH,
150 "The index does not match the source file");
151
152 Frames = Index[Track];
153 VideoTrack = Track;
154
155 if (Threads < 1)
156 DecodingThreads = (std::min)(std::thread::hardware_concurrency(), 16u);
157 else
158 DecodingThreads = Threads;
159
160 DecodeFrame = av_frame_alloc();
161 LastDecodedFrame = av_frame_alloc();
162
163 if (!DecodeFrame || !LastDecodedFrame)
164 throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_ALLOCATION_FAILED,
165 "Could not allocate dummy frame.");
166
167 // Dummy allocations so the unallocated case doesn't have to be handled later
168 if (av_image_alloc(SWSFrameData, SWSFrameLinesize, 16, 16, AV_PIX_FMT_GRAY8, 4) < 0)
169 throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_ALLOCATION_FAILED,
170 "Could not allocate dummy frame.");
171
172 LAVFOpenFile(SourceFile, FormatContext, VideoTrack);
173
174 AVCodec *Codec = avcodec_find_decoder(FormatContext->streams[VideoTrack]->codecpar->codec_id);
175 if (Codec == nullptr)
176 throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_CODEC,
177 "Video codec not found");
178
179 CodecContext = avcodec_alloc_context3(Codec);
180 if (CodecContext == nullptr)
181 throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_ALLOCATION_FAILED,
182 "Could not allocate video codec context.");
183 if (avcodec_parameters_to_context(CodecContext, FormatContext->streams[VideoTrack]->codecpar) < 0)
184 throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_CODEC,
185 "Could not copy video decoder parameters.");
186 CodecContext->thread_count = DecodingThreads;
187 CodecContext->has_b_frames = Frames.MaxBFrames;
188
189 // Full explanation by more clever person availale here: https://github.com/Nevcairiel/LAVFilters/issues/113
190 if (CodecContext->codec_id == AV_CODEC_ID_H264 && CodecContext->has_b_frames)
191 CodecContext->has_b_frames = 15; // the maximum possible value for h264
192
193 if (avcodec_open2(CodecContext, Codec, nullptr) < 0)
194 throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_CODEC,
195 "Could not open video codec");
196
197 // Similar yet different to h264 workaround above
198 // vc1 simply sets has_b_frames to 1 no matter how many there are so instead we set it to the max value
199 // in order to not confuse our own delay guesses later
200 // Doesn't affect actual vc1 reordering unlike h264
201 if (CodecContext->codec_id == AV_CODEC_ID_VC1 && CodecContext->has_b_frames)
202 Delay = 7 + (CodecContext->thread_count - 1); // the maximum possible value for vc1
203 else
204 Delay = CodecContext->has_b_frames + (CodecContext->thread_count - 1); // Normal decoder delay
205
206 // Always try to decode a frame to make sure all required parameters are known
207 int64_t DummyPTS = 0, DummyPos = 0;
208 DecodeNextFrame(DummyPTS, DummyPos);
209
210 //VP.image_type = VideoInfo::IT_TFF;
211 VP.FPSDenominator = FormatContext->streams[VideoTrack]->time_base.num;
212 VP.FPSNumerator = FormatContext->streams[VideoTrack]->time_base.den;
213
214 // sanity check framerate
215 if (VP.FPSDenominator <= 0 || VP.FPSNumerator <= 0) {
216 VP.FPSDenominator = 1;
217 VP.FPSNumerator = 30;
218 }
219
220 // Calculate the average framerate
221 size_t TotalFrames = 0;
222 for (size_t i = 0; i < Frames.size(); i++)
223 if (!Frames[i].Hidden)
224 TotalFrames++;
225
226 if (TotalFrames >= 2) {
227 double PTSDiff = (double)(Frames.back().PTS - Frames.front().PTS);
228 double TD = (double)(Frames.TB.Den);
229 double TN = (double)(Frames.TB.Num);
230 VP.FPSDenominator = (unsigned int)(PTSDiff * TN / TD * 1000.0 / (TotalFrames - 1));
231 VP.FPSNumerator = 1000000;
232 }
233
234 // Set the video properties from the codec context
235 SetVideoProperties();
236
237 // Set the SAR from the container if the codec SAR is invalid
238 if (VP.SARNum <= 0 || VP.SARDen <= 0) {
239 VP.SARNum = FormatContext->streams[VideoTrack]->sample_aspect_ratio.num;
240 VP.SARDen = FormatContext->streams[VideoTrack]->sample_aspect_ratio.den;
241 }
242
243 // Set stereoscopic 3d type
244 VP.Stereo3DType = FFMS_S3D_TYPE_2D;
245 VP.Stereo3DFlags = 0;
246
247 for (int i = 0; i < FormatContext->streams[VideoTrack]->nb_side_data; i++) {
248 if (FormatContext->streams[VideoTrack]->side_data[i].type == AV_PKT_DATA_STEREO3D) {
249 const AVStereo3D *StereoSideData = (const AVStereo3D *)FormatContext->streams[VideoTrack]->side_data[i].data;
250 VP.Stereo3DType = StereoSideData->type;
251 VP.Stereo3DFlags = StereoSideData->flags;
252 } else if (FormatContext->streams[VideoTrack]->side_data[i].type == AV_PKT_DATA_MASTERING_DISPLAY_METADATA) {
253 const AVMasteringDisplayMetadata *MasteringDisplay = (const AVMasteringDisplayMetadata *)FormatContext->streams[VideoTrack]->side_data[i].data;
254 if (MasteringDisplay->has_primaries) {
255 VP.HasMasteringDisplayPrimaries = MasteringDisplay->has_primaries;
256 for (int i = 0; i < 3; i++) {
257 VP.MasteringDisplayPrimariesX[i] = av_q2d(MasteringDisplay->display_primaries[i][0]);
258 VP.MasteringDisplayPrimariesY[i] = av_q2d(MasteringDisplay->display_primaries[i][1]);
259 }
260 VP.MasteringDisplayWhitePointX = av_q2d(MasteringDisplay->white_point[0]);
261 VP.MasteringDisplayWhitePointY = av_q2d(MasteringDisplay->white_point[1]);
262 }
263 if (MasteringDisplay->has_luminance) {
264 VP.HasMasteringDisplayLuminance = MasteringDisplay->has_luminance;
265 VP.MasteringDisplayMinLuminance = av_q2d(MasteringDisplay->min_luminance);
266 VP.MasteringDisplayMaxLuminance = av_q2d(MasteringDisplay->max_luminance);
267 }
268
269 VP.HasMasteringDisplayPrimaries = !!VP.MasteringDisplayPrimariesX[0] && !!VP.MasteringDisplayPrimariesY[0] &&
270 !!VP.MasteringDisplayPrimariesX[1] && !!VP.MasteringDisplayPrimariesY[1] &&
271 !!VP.MasteringDisplayPrimariesX[2] && !!VP.MasteringDisplayPrimariesY[2] &&
272 !!VP.MasteringDisplayWhitePointX && !!VP.MasteringDisplayWhitePointY;
273 /* MasteringDisplayMinLuminance can be 0 */
274 VP.HasMasteringDisplayLuminance = !!VP.MasteringDisplayMaxLuminance;
275 } else if (FormatContext->streams[VideoTrack]->side_data[i].type == AV_PKT_DATA_CONTENT_LIGHT_LEVEL) {
276 const AVContentLightMetadata *ContentLightLevel = (const AVContentLightMetadata *)FormatContext->streams[VideoTrack]->side_data[i].data;
277
278 VP.ContentLightLevelMax = ContentLightLevel->MaxCLL;
279 VP.ContentLightLevelAverage = ContentLightLevel->MaxFALL;
280
281 /* Only check for either of them */
282 VP.HasContentLightLevel = !!VP.ContentLightLevelMax || !!VP.ContentLightLevelAverage;
283 }
284 }
285
286 // Set rotation
287 VP.Rotation = 0;
288 VP.Flip = 0;
289 int32_t *RotationMatrix = reinterpret_cast<int32_t *>(av_stream_get_side_data(FormatContext->streams[VideoTrack], AV_PKT_DATA_DISPLAYMATRIX, nullptr));
290 if (RotationMatrix) {
291 int64_t det = (int64_t)RotationMatrix[0] * RotationMatrix[4] - (int64_t)RotationMatrix[1] * RotationMatrix[3];
292 if (det < 0) {
293 /* Always assume an horizontal flip for simplicity, it can be changed later if rotation is 180. */
294 VP.Flip = 1;
295
296 /* Flip the matrix to decouple flip and rotation operations. */
297 av_display_matrix_flip(RotationMatrix, 1, 0);
298 }
299
300 int rot = lround(av_display_rotation_get(RotationMatrix));
301
302 if (rot == 180 && det < 0) {
303 /* This is a vertical flip with no rotation. */
304 VP.Flip = -1;
305 } else {
306 /* It is possible to have a 90/270 rotation and a horizontal flip:
307 * in this case, the rotation angle applies to the video frame
308 * (rather than the rendering frame), so add this step to nullify
309 * the conversion below. */
310 if (VP.Flip)
311 rot *= -1;
312
313 /* Return a positive value, noting that this converts angles
314 * from the rendering frame to the video frame. */
315 VP.Rotation = -rot;
316 if (VP.Rotation < 0)
317 VP.Rotation += 360;
318 }
319 }
320
321 if (SeekMode >= 0 && Frames.size() > 1) {
322 if (Seek(0) < 0) {
323 throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_CODEC,
324 "Video track is unseekable");
325 } else {
326 avcodec_flush_buffers(CodecContext);
327 // Since we seeked to frame 0 we need to specify that frame 0 is once again the next frame that wil be decoded
328 CurrentFrame = 0;
329 }
330 }
331
332 // Cannot "output" without doing all other initialization
333 // This is the additional mess required for seekmode=-1 to work in a reasonable way
334 OutputFrame(DecodeFrame);
335
336 if (LocalFrame.HasMasteringDisplayPrimaries) {
337 VP.HasMasteringDisplayPrimaries = LocalFrame.HasMasteringDisplayPrimaries;
338 for (int i = 0; i < 3; i++) {
339 VP.MasteringDisplayPrimariesX[i] = LocalFrame.MasteringDisplayPrimariesX[i];
340 VP.MasteringDisplayPrimariesY[i] = LocalFrame.MasteringDisplayPrimariesY[i];
341 }
342
343 // Simply copy this from the first frame to make it easier to access
344 VP.MasteringDisplayWhitePointX = LocalFrame.MasteringDisplayWhitePointX;
345 VP.MasteringDisplayWhitePointY = LocalFrame.MasteringDisplayWhitePointY;
346 }
347 if (LocalFrame.HasMasteringDisplayLuminance) {
348 VP.HasMasteringDisplayLuminance = LocalFrame.HasMasteringDisplayLuminance;
349 VP.MasteringDisplayMinLuminance = LocalFrame.MasteringDisplayMinLuminance;
350 VP.MasteringDisplayMaxLuminance = LocalFrame.MasteringDisplayMaxLuminance;
351 }
352 if (LocalFrame.HasContentLightLevel) {
353 VP.HasContentLightLevel = LocalFrame.HasContentLightLevel;
354 VP.ContentLightLevelMax = LocalFrame.ContentLightLevelMax;
355 VP.ContentLightLevelAverage = LocalFrame.ContentLightLevelAverage;
356 }
357 } catch (FFMS_Exception &) {
358 Free();
359 throw;
360 }
361 }
362
~FFMS_VideoSource()363 FFMS_VideoSource::~FFMS_VideoSource() {
364 Free();
365 }
366
GetFrameByTime(double Time)367 FFMS_Frame *FFMS_VideoSource::GetFrameByTime(double Time) {
368 int Frame = Frames.ClosestFrameFromPTS(static_cast<int64_t>((Time * 1000 * Frames.TB.Den) / Frames.TB.Num));
369 return GetFrame(Frame);
370 }
371
handle_jpeg(AVPixelFormat * format)372 static AVColorRange handle_jpeg(AVPixelFormat *format) {
373 switch (*format) {
374 case AV_PIX_FMT_YUVJ420P: *format = AV_PIX_FMT_YUV420P; return AVCOL_RANGE_JPEG;
375 case AV_PIX_FMT_YUVJ422P: *format = AV_PIX_FMT_YUV422P; return AVCOL_RANGE_JPEG;
376 case AV_PIX_FMT_YUVJ444P: *format = AV_PIX_FMT_YUV444P; return AVCOL_RANGE_JPEG;
377 case AV_PIX_FMT_YUVJ440P: *format = AV_PIX_FMT_YUV440P; return AVCOL_RANGE_JPEG;
378 default: return AVCOL_RANGE_UNSPECIFIED;
379 }
380 }
381
SetOutputFormat(const AVPixelFormat * TargetFormats,int Width,int Height,int Resizer)382 void FFMS_VideoSource::SetOutputFormat(const AVPixelFormat *TargetFormats, int Width, int Height, int Resizer) {
383 TargetWidth = Width;
384 TargetHeight = Height;
385 TargetResizer = Resizer;
386 TargetPixelFormats.clear();
387 while (*TargetFormats != AV_PIX_FMT_NONE)
388 TargetPixelFormats.push_back(*TargetFormats++);
389 OutputColorSpaceSet = true;
390 OutputColorRangeSet = true;
391 OutputFormat = AV_PIX_FMT_NONE;
392
393 ReAdjustOutputFormat(DecodeFrame);
394 OutputFrame(DecodeFrame);
395 }
396
SetInputFormat(int ColorSpace,int ColorRange,AVPixelFormat Format)397 void FFMS_VideoSource::SetInputFormat(int ColorSpace, int ColorRange, AVPixelFormat Format) {
398 InputFormatOverridden = true;
399
400 if (Format != AV_PIX_FMT_NONE)
401 InputFormat = Format;
402 if (ColorRange != AVCOL_RANGE_UNSPECIFIED)
403 InputColorRange = (AVColorRange)ColorRange;
404 if (ColorSpace != AVCOL_SPC_UNSPECIFIED)
405 InputColorSpace = (AVColorSpace)ColorSpace;
406
407 if (TargetPixelFormats.size()) {
408 ReAdjustOutputFormat(DecodeFrame);
409 OutputFrame(DecodeFrame);
410 }
411 }
412
DetectInputFormat()413 void FFMS_VideoSource::DetectInputFormat() {
414 if (InputFormat == AV_PIX_FMT_NONE)
415 InputFormat = CodecContext->pix_fmt;
416
417 AVColorRange RangeFromFormat = handle_jpeg(&InputFormat);
418
419 if (InputColorRange == AVCOL_RANGE_UNSPECIFIED)
420 InputColorRange = RangeFromFormat;
421 if (InputColorRange == AVCOL_RANGE_UNSPECIFIED)
422 InputColorRange = CodecContext->color_range;
423
424 if (InputColorSpace == AVCOL_SPC_UNSPECIFIED)
425 InputColorSpace = CodecContext->colorspace;
426 }
427
ReAdjustOutputFormat(AVFrame * Frame)428 void FFMS_VideoSource::ReAdjustOutputFormat(AVFrame *Frame) {
429 if (SWS) {
430 sws_freeContext(SWS);
431 SWS = nullptr;
432 }
433
434 DetectInputFormat();
435
436 OutputFormat = FindBestPixelFormat(TargetPixelFormats, InputFormat);
437 if (OutputFormat == AV_PIX_FMT_NONE) {
438 ResetOutputFormat();
439 throw FFMS_Exception(FFMS_ERROR_SCALING, FFMS_ERROR_INVALID_ARGUMENT,
440 "No suitable output format found");
441 }
442
443 OutputColorRange = handle_jpeg(&OutputFormat);
444 if (OutputColorRange == AVCOL_RANGE_UNSPECIFIED)
445 OutputColorRange = CodecContext->color_range;
446 if (OutputColorRange == AVCOL_RANGE_UNSPECIFIED)
447 OutputColorRange = InputColorRange;
448
449 OutputColorSpace = CodecContext->colorspace;
450 if (OutputColorSpace == AVCOL_SPC_UNSPECIFIED)
451 OutputColorSpace = InputColorSpace;
452
453 BCSType InputType = GuessCSType(InputFormat);
454 BCSType OutputType = GuessCSType(OutputFormat);
455
456 if (InputType != OutputType) {
457 if (OutputType == cRGB) {
458 OutputColorSpace = AVCOL_SPC_RGB;
459 OutputColorRange = AVCOL_RANGE_UNSPECIFIED;
460 OutputColorPrimaries = AVCOL_PRI_UNSPECIFIED;
461 OutputTransferCharateristics = AVCOL_TRC_UNSPECIFIED;
462 OutputChromaLocation = AVCHROMA_LOC_UNSPECIFIED;
463 } else if (OutputType == cYUV) {
464 OutputColorSpace = AVCOL_SPC_BT470BG;
465 OutputColorRange = AVCOL_RANGE_MPEG;
466 OutputColorPrimaries = AVCOL_PRI_UNSPECIFIED;
467 OutputTransferCharateristics = AVCOL_TRC_UNSPECIFIED;
468 OutputChromaLocation = AVCHROMA_LOC_LEFT;
469 } else if (OutputType == cGRAY) {
470 OutputColorSpace = AVCOL_SPC_UNSPECIFIED;
471 OutputColorRange = AVCOL_RANGE_UNSPECIFIED;
472 OutputColorPrimaries = AVCOL_PRI_UNSPECIFIED;
473 OutputTransferCharateristics = AVCOL_TRC_UNSPECIFIED;
474 OutputChromaLocation = AVCHROMA_LOC_UNSPECIFIED;
475 }
476 } else {
477 OutputColorPrimaries = -1;
478 OutputTransferCharateristics = -1;
479 OutputChromaLocation = -1;
480 }
481
482 if (InputFormat != OutputFormat ||
483 TargetWidth != CodecContext->width ||
484 TargetHeight != CodecContext->height ||
485 InputColorSpace != OutputColorSpace ||
486 InputColorRange != OutputColorRange) {
487 SWS = GetSwsContext(
488 Frame->width, Frame->height, InputFormat, InputColorSpace, InputColorRange,
489 TargetWidth, TargetHeight, OutputFormat, OutputColorSpace, OutputColorRange,
490 TargetResizer);
491
492 if (!SWS) {
493 ResetOutputFormat();
494 throw FFMS_Exception(FFMS_ERROR_SCALING, FFMS_ERROR_INVALID_ARGUMENT,
495 "Failed to allocate SWScale context");
496 }
497 }
498
499 av_freep(&SWSFrameData[0]);
500 if (av_image_alloc(SWSFrameData, SWSFrameLinesize, TargetWidth, TargetHeight, OutputFormat, 4) < 0)
501 throw FFMS_Exception(FFMS_ERROR_SCALING, FFMS_ERROR_ALLOCATION_FAILED,
502 "Could not allocate frame with new resolution.");
503 }
504
ResetOutputFormat()505 void FFMS_VideoSource::ResetOutputFormat() {
506 if (SWS) {
507 sws_freeContext(SWS);
508 SWS = nullptr;
509 }
510
511 TargetWidth = -1;
512 TargetHeight = -1;
513 TargetPixelFormats.clear();
514
515 OutputFormat = AV_PIX_FMT_NONE;
516 OutputColorSpace = AVCOL_SPC_UNSPECIFIED;
517 OutputColorRange = AVCOL_RANGE_UNSPECIFIED;
518 OutputColorSpaceSet = false;
519 OutputColorRangeSet = false;
520
521 OutputFrame(DecodeFrame);
522 }
523
ResetInputFormat()524 void FFMS_VideoSource::ResetInputFormat() {
525 InputFormatOverridden = false;
526 InputFormat = AV_PIX_FMT_NONE;
527 InputColorSpace = AVCOL_SPC_UNSPECIFIED;
528 InputColorRange = AVCOL_RANGE_UNSPECIFIED;
529
530 ReAdjustOutputFormat(DecodeFrame);
531 OutputFrame(DecodeFrame);
532 }
533
SetVideoProperties()534 void FFMS_VideoSource::SetVideoProperties() {
535 VP.RFFDenominator = CodecContext->time_base.num;
536 VP.RFFNumerator = CodecContext->time_base.den;
537 if (CodecContext->codec_id == AV_CODEC_ID_H264) {
538 if (VP.RFFNumerator & 1)
539 VP.RFFDenominator *= 2;
540 else
541 VP.RFFNumerator /= 2;
542 }
543 VP.NumFrames = Frames.VisibleFrameCount();
544 VP.TopFieldFirst = DecodeFrame->top_field_first;
545 VP.ColorSpace = CodecContext->colorspace;
546 VP.ColorRange = CodecContext->color_range;
547 // these pixfmt's are deprecated but still used
548 if (CodecContext->pix_fmt == AV_PIX_FMT_YUVJ420P ||
549 CodecContext->pix_fmt == AV_PIX_FMT_YUVJ422P ||
550 CodecContext->pix_fmt == AV_PIX_FMT_YUVJ444P
551 )
552 VP.ColorRange = AVCOL_RANGE_JPEG;
553
554
555 VP.FirstTime = ((Frames[Frames.RealFrameNumber(0)].PTS * Frames.TB.Num) / (double)Frames.TB.Den) / 1000;
556 VP.LastTime = ((Frames[Frames.RealFrameNumber(Frames.VisibleFrameCount()-1)].PTS * Frames.TB.Num) / (double)Frames.TB.Den) / 1000;
557 VP.LastEndTime = (((Frames[Frames.RealFrameNumber(Frames.VisibleFrameCount()-1)].PTS + Frames.LastDuration) * Frames.TB.Num) / (double)Frames.TB.Den) / 1000;
558
559 if (CodecContext->width <= 0 || CodecContext->height <= 0)
560 throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_CODEC,
561 "Codec returned zero size video");
562
563 // attempt to correct framerate to the proper NTSC fraction, if applicable
564 CorrectRationalFramerate(&VP.FPSNumerator, &VP.FPSDenominator);
565 // correct the timebase, if necessary
566 CorrectTimebase(&VP, &Frames.TB);
567
568 // Set AR variables
569 VP.SARNum = CodecContext->sample_aspect_ratio.num;
570 VP.SARDen = CodecContext->sample_aspect_ratio.den;
571
572 // Set input and output formats now that we have a CodecContext
573 DetectInputFormat();
574
575 OutputFormat = InputFormat;
576 OutputColorSpace = InputColorSpace;
577 OutputColorRange = InputColorRange;
578 }
579
HasPendingDelayedFrames()580 bool FFMS_VideoSource::HasPendingDelayedFrames() {
581 if (InitialDecode == -1) {
582 if (DelayCounter > Delay) {
583 --DelayCounter;
584 return true;
585 }
586 InitialDecode = 0;
587 }
588 return false;
589 }
590
DecodePacket(AVPacket * Packet)591 bool FFMS_VideoSource::DecodePacket(AVPacket *Packet) {
592 std::swap(DecodeFrame, LastDecodedFrame);
593 avcodec_send_packet(CodecContext, Packet);
594
595 int Ret = avcodec_receive_frame(CodecContext, DecodeFrame);
596 if (Ret != 0) {
597 std::swap(DecodeFrame, LastDecodedFrame);
598 if (!(Packet->flags & AV_PKT_FLAG_DISCARD))
599 DelayCounter++;
600 } else if (!!(Packet->flags & AV_PKT_FLAG_DISCARD)) {
601 // If sending discarded frame when the decode buffer is not empty, caller
602 // may still obtained bufferred decoded frames and the number of frames
603 // in the buffer decreases.
604 DelayCounter--;
605 }
606
607 if (Ret == 0 && InitialDecode == 1)
608 InitialDecode = -1;
609
610 // H.264 (PAFF) and HEVC can have one field per packet, and decoding delay needs
611 // to be adjusted accordingly.
612 if (CodecContext->codec_id == AV_CODEC_ID_H264 || CodecContext->codec_id == AV_CODEC_ID_HEVC) {
613 if (!PAFFAdjusted && DelayCounter > Delay && LastDecodedFrame->repeat_pict == 0 && Ret != 0) {
614 int OldBFrameDelay = Delay - (CodecContext->thread_count - 1);
615 Delay = 1 + OldBFrameDelay * 2 + (CodecContext->thread_count - 1);
616 PAFFAdjusted = true;
617 }
618 }
619
620 return (Ret == 0) || (DelayCounter > Delay && !InitialDecode);;
621 }
622
Seek(int n)623 int FFMS_VideoSource::Seek(int n) {
624 int ret = -1;
625
626 DelayCounter = 0;
627 InitialDecode = 1;
628
629 if (!SeekByPos || Frames[n].FilePos < 0) {
630 ret = av_seek_frame(FormatContext, VideoTrack, Frames[n].PTS, AVSEEK_FLAG_BACKWARD);
631 if (ret >= 0)
632 return ret;
633 }
634
635 if (Frames[n].FilePos >= 0) {
636 ret = av_seek_frame(FormatContext, VideoTrack, Frames[n].FilePos + PosOffset, AVSEEK_FLAG_BYTE);
637 if (ret >= 0)
638 SeekByPos = true;
639 }
640 return ret;
641 }
642
ReadFrame(AVPacket * pkt)643 int FFMS_VideoSource::ReadFrame(AVPacket *pkt) {
644 int ret = av_read_frame(FormatContext, pkt);
645 if (ret >= 0 || ret == AVERROR(EOF)) return ret;
646
647 // Lavf reports the beginning of the actual video data as the packet's
648 // position, but the reader requires the header, so we end up seeking
649 // to the wrong position. Wait until a read actual fails to adjust the
650 // seek targets, so that if this ever gets fixed upstream our workaround
651 // doesn't re-break it.
652 if (strcmp(FormatContext->iformat->name, "yuv4mpegpipe") == 0) {
653 PosOffset = -6;
654 Seek(CurrentFrame);
655 return av_read_frame(FormatContext, pkt);
656 }
657 return ret;
658 }
659
Free()660 void FFMS_VideoSource::Free() {
661 avcodec_free_context(&CodecContext);
662 avformat_close_input(&FormatContext);
663 if (SWS)
664 sws_freeContext(SWS);
665 av_freep(&SWSFrameData[0]);
666 av_frame_free(&DecodeFrame);
667 av_frame_free(&LastDecodedFrame);
668 }
669
DecodeNextFrame(int64_t & AStartTime,int64_t & Pos)670 void FFMS_VideoSource::DecodeNextFrame(int64_t &AStartTime, int64_t &Pos) {
671 AStartTime = -1;
672
673 if (HasPendingDelayedFrames())
674 return;
675
676 AVPacket Packet;
677 InitNullPacket(Packet);
678
679 while (ReadFrame(&Packet) >= 0) {
680 if (Packet.stream_index != VideoTrack) {
681 av_packet_unref(&Packet);
682 continue;
683 }
684
685 if (AStartTime < 0)
686 AStartTime = Frames.UseDTS ? Packet.dts : Packet.pts;
687
688 if (Pos < 0)
689 Pos = Packet.pos;
690
691 bool FrameFinished = DecodePacket(&Packet);
692 av_packet_unref(&Packet);
693 if (FrameFinished)
694 return;
695 }
696
697 // Flush final frames
698 InitNullPacket(Packet);
699 DecodePacket(&Packet);
700 }
701
SeekTo(int n,int SeekOffset)702 bool FFMS_VideoSource::SeekTo(int n, int SeekOffset) {
703 if (SeekMode >= 0) {
704 int TargetFrame = n + SeekOffset;
705 if (TargetFrame < 0)
706 throw FFMS_Exception(FFMS_ERROR_SEEKING, FFMS_ERROR_UNKNOWN,
707 "Frame accurate seeking is not possible in this file");
708
709 if (SeekMode < 3)
710 TargetFrame = Frames.FindClosestVideoKeyFrame(TargetFrame);
711
712 if (SeekMode == 0) {
713 if (n < CurrentFrame) {
714 Seek(0);
715 avcodec_flush_buffers(CodecContext);
716 CurrentFrame = 0;
717 }
718 } else {
719 // 10 frames is used as a margin to prevent excessive seeking since the predicted best keyframe isn't always selected by avformat
720 if (n < CurrentFrame || TargetFrame > CurrentFrame + 10 || (SeekMode == 3 && n > CurrentFrame + 10)) {
721 Seek(TargetFrame);
722 avcodec_flush_buffers(CodecContext);
723 return true;
724 }
725 }
726 } else if (n < CurrentFrame) {
727 throw FFMS_Exception(FFMS_ERROR_SEEKING, FFMS_ERROR_INVALID_ARGUMENT,
728 "Non-linear access attempted");
729 }
730 return false;
731 }
732
GetFrame(int n)733 FFMS_Frame *FFMS_VideoSource::GetFrame(int n) {
734 GetFrameCheck(n);
735 n = Frames.RealFrameNumber(n);
736
737 if (LastFrameNum == n)
738 return &LocalFrame;
739
740 int SeekOffset = 0;
741 bool Seek = true;
742
743 do {
744 bool HasSeeked = false;
745 if (Seek) {
746 HasSeeked = SeekTo(n, SeekOffset);
747 Seek = false;
748 }
749
750 int64_t StartTime = AV_NOPTS_VALUE, FilePos = -1;
751 bool Hidden = (((unsigned) CurrentFrame < Frames.size()) && Frames[CurrentFrame].Hidden);
752 if (HasSeeked || !Hidden)
753 DecodeNextFrame(StartTime, FilePos);
754
755 if (!HasSeeked)
756 continue;
757
758 if (StartTime == AV_NOPTS_VALUE && !Frames.HasTS) {
759 if (FilePos >= 0) {
760 CurrentFrame = Frames.FrameFromPos(FilePos);
761 if (CurrentFrame >= 0)
762 continue;
763 }
764 // If the track doesn't have timestamps or file positions then
765 // just trust that we got to the right place, since we have no
766 // way to tell where we are
767 else {
768 CurrentFrame = n;
769 continue;
770 }
771 }
772
773 CurrentFrame = Frames.FrameFromPTS(StartTime);
774
775 // Is the seek destination time known? Does it belong to a frame?
776 if (CurrentFrame < 0) {
777 if (SeekMode == 1 || StartTime < 0) {
778 // No idea where we are so go back a bit further
779 SeekOffset -= 10;
780 Seek = true;
781 continue;
782 }
783 CurrentFrame = Frames.ClosestFrameFromPTS(StartTime);
784 }
785
786 // We want to know the frame number that we just got out of the decoder,
787 // but what we currently know is the frame number of the first packet
788 // we fed into the decoder, and these can be different with open-gop or
789 // aggressive (non-keyframe) seeking.
790 int64_t Pos = Frames[CurrentFrame].FilePos;
791 if (CurrentFrame > 0 && Pos != -1) {
792 int Prev = CurrentFrame - 1;
793 while (Prev >= 0 && Frames[Prev].FilePos != -1 && Frames[Prev].FilePos > Pos)
794 --Prev;
795 CurrentFrame = Prev + 1;
796 }
797 } while (++CurrentFrame <= n);
798
799 LastFrameNum = n;
800 return OutputFrame(DecodeFrame);
801 }
802