1 //  Copyright (c) 2007-2017 Fredrik Mellbin
2 //
3 //  Permission is hereby granted, free of charge, to any person obtaining a copy
4 //  of this software and associated documentation files (the "Software"), to deal
5 //  in the Software without restriction, including without limitation the rights
6 //  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 //  copies of the Software, and to permit persons to whom the Software is
8 //  furnished to do so, subject to the following conditions:
9 //
10 //  The above copyright notice and this permission notice shall be included in
11 //  all copies or substantial portions of the Software.
12 //
13 //  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 //  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 //  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 //  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 //  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 //  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 //  THE SOFTWARE.
20 
21 #include "videosource.h"
22 #include "indexing.h"
23 #include "videoutils.h"
24 #include <algorithm>
25 #include <thread>
26 
27 
SanityCheckFrameForData(AVFrame * Frame)28 void FFMS_VideoSource::SanityCheckFrameForData(AVFrame *Frame) {
29     for (int i = 0; i < 4; i++) {
30         if (Frame->data[i] != nullptr && Frame->linesize[i] != 0)
31             return;
32     }
33 
34     throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_CODEC, "Insanity detected: decoder returned an empty frame");
35 }
36 
GetFrameCheck(int n)37 void FFMS_VideoSource::GetFrameCheck(int n) {
38     if (n < 0 || n >= VP.NumFrames)
39         throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_INVALID_ARGUMENT,
40             "Out of bounds frame requested");
41 }
42 
OutputFrame(AVFrame * Frame)43 FFMS_Frame *FFMS_VideoSource::OutputFrame(AVFrame *Frame) {
44     SanityCheckFrameForData(Frame);
45 
46     if (LastFrameWidth != Frame->width || LastFrameHeight != Frame->height || LastFramePixelFormat != Frame->format) {
47         if (TargetHeight > 0 && TargetWidth > 0 && !TargetPixelFormats.empty()) {
48             if (!InputFormatOverridden) {
49                 InputFormat = AV_PIX_FMT_NONE;
50                 InputColorSpace = AVCOL_SPC_UNSPECIFIED;
51                 InputColorRange = AVCOL_RANGE_UNSPECIFIED;
52             }
53 
54             ReAdjustOutputFormat(Frame);
55         } else {
56             OutputFormat = (AVPixelFormat) Frame->format;
57         }
58     }
59 
60     if (SWS) {
61         sws_scale(SWS, Frame->data, Frame->linesize, 0, Frame->height, SWSFrameData, SWSFrameLinesize);
62         for (int i = 0; i < 4; i++) {
63             LocalFrame.Data[i] = SWSFrameData[i];
64             LocalFrame.Linesize[i] = SWSFrameLinesize[i];
65         }
66     } else {
67         // Special case to avoid ugly casts
68         for (int i = 0; i < 4; i++) {
69             LocalFrame.Data[i] = Frame->data[i];
70             LocalFrame.Linesize[i] = Frame->linesize[i];
71         }
72     }
73 
74     LocalFrame.EncodedWidth = Frame->width;
75     LocalFrame.EncodedHeight = Frame->height;
76     LocalFrame.EncodedPixelFormat = Frame->format;
77     LocalFrame.ScaledWidth = TargetWidth;
78     LocalFrame.ScaledHeight = TargetHeight;
79     LocalFrame.ConvertedPixelFormat = OutputFormat;
80     LocalFrame.KeyFrame = Frame->key_frame;
81     LocalFrame.PictType = av_get_picture_type_char(Frame->pict_type);
82     LocalFrame.RepeatPict = Frame->repeat_pict;
83     LocalFrame.InterlacedFrame = Frame->interlaced_frame;
84     LocalFrame.TopFieldFirst = Frame->top_field_first;
85     LocalFrame.ColorSpace = OutputColorSpaceSet ? OutputColorSpace : Frame->colorspace;
86     LocalFrame.ColorRange = OutputColorRangeSet ? OutputColorRange : Frame->color_range;
87     LocalFrame.ColorPrimaries = (OutputColorPrimaries >= 0) ? OutputColorPrimaries : Frame->color_primaries;
88     LocalFrame.TransferCharateristics = (OutputTransferCharateristics >= 0) ? OutputTransferCharateristics : Frame->color_trc;
89     LocalFrame.ChromaLocation = (OutputChromaLocation >= 0) ? OutputChromaLocation : Frame->chroma_location;
90 
91     const AVFrameSideData *MasteringDisplaySideData = av_frame_get_side_data(Frame, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA);
92     if (MasteringDisplaySideData) {
93         const AVMasteringDisplayMetadata *MasteringDisplay = reinterpret_cast<const AVMasteringDisplayMetadata *>(MasteringDisplaySideData->data);
94         if (MasteringDisplay->has_primaries) {
95             LocalFrame.HasMasteringDisplayPrimaries = MasteringDisplay->has_primaries;
96             for (int i = 0; i < 3; i++) {
97                 LocalFrame.MasteringDisplayPrimariesX[i] = av_q2d(MasteringDisplay->display_primaries[i][0]);
98                 LocalFrame.MasteringDisplayPrimariesY[i] = av_q2d(MasteringDisplay->display_primaries[i][1]);
99             }
100             LocalFrame.MasteringDisplayWhitePointX = av_q2d(MasteringDisplay->white_point[0]);
101             LocalFrame.MasteringDisplayWhitePointY = av_q2d(MasteringDisplay->white_point[1]);
102         }
103         if (MasteringDisplay->has_luminance) {
104             LocalFrame.HasMasteringDisplayLuminance = MasteringDisplay->has_luminance;
105             LocalFrame.MasteringDisplayMinLuminance = av_q2d(MasteringDisplay->min_luminance);
106             LocalFrame.MasteringDisplayMaxLuminance = av_q2d(MasteringDisplay->max_luminance);
107         }
108     }
109     LocalFrame.HasMasteringDisplayPrimaries = !!LocalFrame.MasteringDisplayPrimariesX[0] && !!LocalFrame.MasteringDisplayPrimariesY[0] &&
110                                               !!LocalFrame.MasteringDisplayPrimariesX[1] && !!LocalFrame.MasteringDisplayPrimariesY[1] &&
111                                               !!LocalFrame.MasteringDisplayPrimariesX[2] && !!LocalFrame.MasteringDisplayPrimariesY[2] &&
112                                               !!LocalFrame.MasteringDisplayWhitePointX   && !!LocalFrame.MasteringDisplayWhitePointY;
113     /* MasteringDisplayMinLuminance can be 0 */
114     LocalFrame.HasMasteringDisplayLuminance = !!LocalFrame.MasteringDisplayMaxLuminance;
115 
116     const AVFrameSideData *ContentLightSideData = av_frame_get_side_data(Frame, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL);
117     if (ContentLightSideData) {
118         const AVContentLightMetadata *ContentLightLevel = reinterpret_cast<const AVContentLightMetadata *>(ContentLightSideData->data);
119         LocalFrame.ContentLightLevelMax = ContentLightLevel->MaxCLL;
120         LocalFrame.ContentLightLevelAverage = ContentLightLevel->MaxFALL;
121     }
122     /* Only check for either of them */
123     LocalFrame.HasContentLightLevel = !!LocalFrame.ContentLightLevelMax || !!LocalFrame.ContentLightLevelAverage;
124 
125     LastFrameHeight = Frame->height;
126     LastFrameWidth = Frame->width;
127     LastFramePixelFormat = (AVPixelFormat) Frame->format;
128 
129     return &LocalFrame;
130 }
131 
FFMS_VideoSource(const char * SourceFile,FFMS_Index & Index,int Track,int Threads,int SeekMode)132 FFMS_VideoSource::FFMS_VideoSource(const char *SourceFile, FFMS_Index &Index, int Track, int Threads, int SeekMode)
133     : Index(Index), SeekMode(SeekMode) {
134 
135     try {
136         if (Track < 0 || Track >= static_cast<int>(Index.size()))
137             throw FFMS_Exception(FFMS_ERROR_INDEX, FFMS_ERROR_INVALID_ARGUMENT,
138                 "Out of bounds track index selected");
139 
140         if (Index[Track].TT != FFMS_TYPE_VIDEO)
141             throw FFMS_Exception(FFMS_ERROR_INDEX, FFMS_ERROR_INVALID_ARGUMENT,
142                 "Not a video track");
143 
144         if (Index[Track].empty())
145             throw FFMS_Exception(FFMS_ERROR_INDEX, FFMS_ERROR_INVALID_ARGUMENT,
146                 "Video track contains no frames");
147 
148         if (!Index.CompareFileSignature(SourceFile))
149             throw FFMS_Exception(FFMS_ERROR_INDEX, FFMS_ERROR_FILE_MISMATCH,
150                 "The index does not match the source file");
151 
152         Frames = Index[Track];
153         VideoTrack = Track;
154 
155         if (Threads < 1)
156             DecodingThreads = (std::min)(std::thread::hardware_concurrency(), 16u);
157         else
158             DecodingThreads = Threads;
159 
160         DecodeFrame = av_frame_alloc();
161         LastDecodedFrame = av_frame_alloc();
162 
163         if (!DecodeFrame || !LastDecodedFrame)
164             throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_ALLOCATION_FAILED,
165                 "Could not allocate dummy frame.");
166 
167         // Dummy allocations so the unallocated case doesn't have to be handled later
168         if (av_image_alloc(SWSFrameData, SWSFrameLinesize, 16, 16, AV_PIX_FMT_GRAY8, 4) < 0)
169             throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_ALLOCATION_FAILED,
170                 "Could not allocate dummy frame.");
171 
172         LAVFOpenFile(SourceFile, FormatContext, VideoTrack);
173 
174         AVCodec *Codec = avcodec_find_decoder(FormatContext->streams[VideoTrack]->codecpar->codec_id);
175         if (Codec == nullptr)
176             throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_CODEC,
177                 "Video codec not found");
178 
179         CodecContext = avcodec_alloc_context3(Codec);
180         if (CodecContext == nullptr)
181             throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_ALLOCATION_FAILED,
182                 "Could not allocate video codec context.");
183         if (avcodec_parameters_to_context(CodecContext, FormatContext->streams[VideoTrack]->codecpar) < 0)
184             throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_CODEC,
185                 "Could not copy video decoder parameters.");
186         CodecContext->thread_count = DecodingThreads;
187         CodecContext->has_b_frames = Frames.MaxBFrames;
188 
189         // Full explanation by more clever person availale here: https://github.com/Nevcairiel/LAVFilters/issues/113
190         if (CodecContext->codec_id == AV_CODEC_ID_H264 && CodecContext->has_b_frames)
191             CodecContext->has_b_frames = 15; // the maximum possible value for h264
192 
193         if (avcodec_open2(CodecContext, Codec, nullptr) < 0)
194             throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_CODEC,
195                 "Could not open video codec");
196 
197         // Similar yet different to h264 workaround above
198         // vc1 simply sets has_b_frames to 1 no matter how many there are so instead we set it to the max value
199         // in order to not confuse our own delay guesses later
200         // Doesn't affect actual vc1 reordering unlike h264
201         if (CodecContext->codec_id == AV_CODEC_ID_VC1 && CodecContext->has_b_frames)
202             Delay = 7 + (CodecContext->thread_count - 1); // the maximum possible value for vc1
203         else
204             Delay = CodecContext->has_b_frames + (CodecContext->thread_count - 1); // Normal decoder delay
205 
206         // Always try to decode a frame to make sure all required parameters are known
207         int64_t DummyPTS = 0, DummyPos = 0;
208         DecodeNextFrame(DummyPTS, DummyPos);
209 
210         //VP.image_type = VideoInfo::IT_TFF;
211         VP.FPSDenominator = FormatContext->streams[VideoTrack]->time_base.num;
212         VP.FPSNumerator = FormatContext->streams[VideoTrack]->time_base.den;
213 
214         // sanity check framerate
215         if (VP.FPSDenominator <= 0 || VP.FPSNumerator <= 0) {
216             VP.FPSDenominator = 1;
217             VP.FPSNumerator = 30;
218         }
219 
220         // Calculate the average framerate
221         size_t TotalFrames = 0;
222         for (size_t i = 0; i < Frames.size(); i++)
223             if (!Frames[i].Hidden)
224                 TotalFrames++;
225 
226         if (TotalFrames >= 2) {
227             double PTSDiff = (double)(Frames.back().PTS - Frames.front().PTS);
228             double TD = (double)(Frames.TB.Den);
229             double TN = (double)(Frames.TB.Num);
230             VP.FPSDenominator = (unsigned int)(PTSDiff * TN / TD * 1000.0 / (TotalFrames - 1));
231             VP.FPSNumerator = 1000000;
232         }
233 
234         // Set the video properties from the codec context
235         SetVideoProperties();
236 
237         // Set the SAR from the container if the codec SAR is invalid
238         if (VP.SARNum <= 0 || VP.SARDen <= 0) {
239             VP.SARNum = FormatContext->streams[VideoTrack]->sample_aspect_ratio.num;
240             VP.SARDen = FormatContext->streams[VideoTrack]->sample_aspect_ratio.den;
241         }
242 
243         // Set stereoscopic 3d type
244         VP.Stereo3DType = FFMS_S3D_TYPE_2D;
245         VP.Stereo3DFlags = 0;
246 
247         for (int i = 0; i < FormatContext->streams[VideoTrack]->nb_side_data; i++) {
248             if (FormatContext->streams[VideoTrack]->side_data[i].type == AV_PKT_DATA_STEREO3D) {
249                 const AVStereo3D *StereoSideData = (const AVStereo3D *)FormatContext->streams[VideoTrack]->side_data[i].data;
250                 VP.Stereo3DType = StereoSideData->type;
251                 VP.Stereo3DFlags = StereoSideData->flags;
252             } else if (FormatContext->streams[VideoTrack]->side_data[i].type == AV_PKT_DATA_MASTERING_DISPLAY_METADATA) {
253                 const AVMasteringDisplayMetadata *MasteringDisplay = (const AVMasteringDisplayMetadata *)FormatContext->streams[VideoTrack]->side_data[i].data;
254                 if (MasteringDisplay->has_primaries) {
255                     VP.HasMasteringDisplayPrimaries = MasteringDisplay->has_primaries;
256                     for (int i = 0; i < 3; i++) {
257                         VP.MasteringDisplayPrimariesX[i] = av_q2d(MasteringDisplay->display_primaries[i][0]);
258                         VP.MasteringDisplayPrimariesY[i] = av_q2d(MasteringDisplay->display_primaries[i][1]);
259                     }
260                     VP.MasteringDisplayWhitePointX = av_q2d(MasteringDisplay->white_point[0]);
261                     VP.MasteringDisplayWhitePointY = av_q2d(MasteringDisplay->white_point[1]);
262                 }
263                 if (MasteringDisplay->has_luminance) {
264                     VP.HasMasteringDisplayLuminance = MasteringDisplay->has_luminance;
265                     VP.MasteringDisplayMinLuminance = av_q2d(MasteringDisplay->min_luminance);
266                     VP.MasteringDisplayMaxLuminance = av_q2d(MasteringDisplay->max_luminance);
267                 }
268 
269                 VP.HasMasteringDisplayPrimaries = !!VP.MasteringDisplayPrimariesX[0] && !!VP.MasteringDisplayPrimariesY[0] &&
270                                                   !!VP.MasteringDisplayPrimariesX[1] && !!VP.MasteringDisplayPrimariesY[1] &&
271                                                   !!VP.MasteringDisplayPrimariesX[2] && !!VP.MasteringDisplayPrimariesY[2] &&
272                                                   !!VP.MasteringDisplayWhitePointX   && !!VP.MasteringDisplayWhitePointY;
273                 /* MasteringDisplayMinLuminance can be 0 */
274                 VP.HasMasteringDisplayLuminance = !!VP.MasteringDisplayMaxLuminance;
275             } else if (FormatContext->streams[VideoTrack]->side_data[i].type == AV_PKT_DATA_CONTENT_LIGHT_LEVEL) {
276                 const AVContentLightMetadata *ContentLightLevel = (const AVContentLightMetadata *)FormatContext->streams[VideoTrack]->side_data[i].data;
277 
278                 VP.ContentLightLevelMax = ContentLightLevel->MaxCLL;
279                 VP.ContentLightLevelAverage = ContentLightLevel->MaxFALL;
280 
281                 /* Only check for either of them */
282                 VP.HasContentLightLevel = !!VP.ContentLightLevelMax || !!VP.ContentLightLevelAverage;
283             }
284         }
285 
286         // Set rotation
287         VP.Rotation = 0;
288         VP.Flip = 0;
289         int32_t *RotationMatrix = reinterpret_cast<int32_t *>(av_stream_get_side_data(FormatContext->streams[VideoTrack], AV_PKT_DATA_DISPLAYMATRIX, nullptr));
290         if (RotationMatrix) {
291             int64_t det = (int64_t)RotationMatrix[0] * RotationMatrix[4] - (int64_t)RotationMatrix[1] * RotationMatrix[3];
292             if (det < 0) {
293                 /* Always assume an horizontal flip for simplicity, it can be changed later if rotation is 180. */
294                 VP.Flip = 1;
295 
296                 /* Flip the matrix to decouple flip and rotation operations. */
297                 av_display_matrix_flip(RotationMatrix, 1, 0);
298             }
299 
300             int rot = lround(av_display_rotation_get(RotationMatrix));
301 
302             if (rot == 180 && det < 0) {
303                 /* This is a vertical flip with no rotation. */
304                 VP.Flip = -1;
305             } else {
306                 /* It is possible to have a 90/270 rotation and a horizontal flip:
307                  * in this case, the rotation angle applies to the video frame
308                  * (rather than the rendering frame), so add this step to nullify
309                  * the conversion below. */
310                 if (VP.Flip)
311                     rot *= -1;
312 
313                 /* Return a positive value, noting that this converts angles
314                  * from the rendering frame to the video frame. */
315                 VP.Rotation = -rot;
316                 if (VP.Rotation < 0)
317                     VP.Rotation += 360;
318             }
319         }
320 
321         if (SeekMode >= 0 && Frames.size() > 1) {
322             if (Seek(0) < 0) {
323                 throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_CODEC,
324                     "Video track is unseekable");
325             } else {
326                 avcodec_flush_buffers(CodecContext);
327                 // Since we seeked to frame 0 we need to specify that frame 0 is once again the next frame that wil be decoded
328                 CurrentFrame = 0;
329             }
330         }
331 
332         // Cannot "output" without doing all other initialization
333         // This is the additional mess required for seekmode=-1 to work in a reasonable way
334         OutputFrame(DecodeFrame);
335 
336         if (LocalFrame.HasMasteringDisplayPrimaries) {
337             VP.HasMasteringDisplayPrimaries = LocalFrame.HasMasteringDisplayPrimaries;
338             for (int i = 0; i < 3; i++) {
339                 VP.MasteringDisplayPrimariesX[i] = LocalFrame.MasteringDisplayPrimariesX[i];
340                 VP.MasteringDisplayPrimariesY[i] = LocalFrame.MasteringDisplayPrimariesY[i];
341             }
342 
343             // Simply copy this from the first frame to make it easier to access
344             VP.MasteringDisplayWhitePointX = LocalFrame.MasteringDisplayWhitePointX;
345             VP.MasteringDisplayWhitePointY = LocalFrame.MasteringDisplayWhitePointY;
346         }
347         if (LocalFrame.HasMasteringDisplayLuminance) {
348             VP.HasMasteringDisplayLuminance = LocalFrame.HasMasteringDisplayLuminance;
349             VP.MasteringDisplayMinLuminance = LocalFrame.MasteringDisplayMinLuminance;
350             VP.MasteringDisplayMaxLuminance = LocalFrame.MasteringDisplayMaxLuminance;
351         }
352         if (LocalFrame.HasContentLightLevel) {
353             VP.HasContentLightLevel = LocalFrame.HasContentLightLevel;
354             VP.ContentLightLevelMax = LocalFrame.ContentLightLevelMax;
355             VP.ContentLightLevelAverage = LocalFrame.ContentLightLevelAverage;
356         }
357     } catch (FFMS_Exception &) {
358         Free();
359         throw;
360     }
361 }
362 
~FFMS_VideoSource()363 FFMS_VideoSource::~FFMS_VideoSource() {
364     Free();
365 }
366 
GetFrameByTime(double Time)367 FFMS_Frame *FFMS_VideoSource::GetFrameByTime(double Time) {
368     int Frame = Frames.ClosestFrameFromPTS(static_cast<int64_t>((Time * 1000 * Frames.TB.Den) / Frames.TB.Num));
369     return GetFrame(Frame);
370 }
371 
handle_jpeg(AVPixelFormat * format)372 static AVColorRange handle_jpeg(AVPixelFormat *format) {
373     switch (*format) {
374     case AV_PIX_FMT_YUVJ420P: *format = AV_PIX_FMT_YUV420P; return AVCOL_RANGE_JPEG;
375     case AV_PIX_FMT_YUVJ422P: *format = AV_PIX_FMT_YUV422P; return AVCOL_RANGE_JPEG;
376     case AV_PIX_FMT_YUVJ444P: *format = AV_PIX_FMT_YUV444P; return AVCOL_RANGE_JPEG;
377     case AV_PIX_FMT_YUVJ440P: *format = AV_PIX_FMT_YUV440P; return AVCOL_RANGE_JPEG;
378     default:                                                      return AVCOL_RANGE_UNSPECIFIED;
379     }
380 }
381 
SetOutputFormat(const AVPixelFormat * TargetFormats,int Width,int Height,int Resizer)382 void FFMS_VideoSource::SetOutputFormat(const AVPixelFormat *TargetFormats, int Width, int Height, int Resizer) {
383     TargetWidth = Width;
384     TargetHeight = Height;
385     TargetResizer = Resizer;
386     TargetPixelFormats.clear();
387     while (*TargetFormats != AV_PIX_FMT_NONE)
388         TargetPixelFormats.push_back(*TargetFormats++);
389     OutputColorSpaceSet = true;
390     OutputColorRangeSet = true;
391     OutputFormat = AV_PIX_FMT_NONE;
392 
393     ReAdjustOutputFormat(DecodeFrame);
394     OutputFrame(DecodeFrame);
395 }
396 
SetInputFormat(int ColorSpace,int ColorRange,AVPixelFormat Format)397 void FFMS_VideoSource::SetInputFormat(int ColorSpace, int ColorRange, AVPixelFormat Format) {
398     InputFormatOverridden = true;
399 
400     if (Format != AV_PIX_FMT_NONE)
401         InputFormat = Format;
402     if (ColorRange != AVCOL_RANGE_UNSPECIFIED)
403         InputColorRange = (AVColorRange)ColorRange;
404     if (ColorSpace != AVCOL_SPC_UNSPECIFIED)
405         InputColorSpace = (AVColorSpace)ColorSpace;
406 
407     if (TargetPixelFormats.size()) {
408         ReAdjustOutputFormat(DecodeFrame);
409         OutputFrame(DecodeFrame);
410     }
411 }
412 
DetectInputFormat()413 void FFMS_VideoSource::DetectInputFormat() {
414     if (InputFormat == AV_PIX_FMT_NONE)
415         InputFormat = CodecContext->pix_fmt;
416 
417     AVColorRange RangeFromFormat = handle_jpeg(&InputFormat);
418 
419     if (InputColorRange == AVCOL_RANGE_UNSPECIFIED)
420         InputColorRange = RangeFromFormat;
421     if (InputColorRange == AVCOL_RANGE_UNSPECIFIED)
422         InputColorRange = CodecContext->color_range;
423 
424     if (InputColorSpace == AVCOL_SPC_UNSPECIFIED)
425         InputColorSpace = CodecContext->colorspace;
426 }
427 
ReAdjustOutputFormat(AVFrame * Frame)428 void FFMS_VideoSource::ReAdjustOutputFormat(AVFrame *Frame) {
429     if (SWS) {
430         sws_freeContext(SWS);
431         SWS = nullptr;
432     }
433 
434     DetectInputFormat();
435 
436     OutputFormat = FindBestPixelFormat(TargetPixelFormats, InputFormat);
437     if (OutputFormat == AV_PIX_FMT_NONE) {
438         ResetOutputFormat();
439         throw FFMS_Exception(FFMS_ERROR_SCALING, FFMS_ERROR_INVALID_ARGUMENT,
440             "No suitable output format found");
441     }
442 
443     OutputColorRange = handle_jpeg(&OutputFormat);
444     if (OutputColorRange == AVCOL_RANGE_UNSPECIFIED)
445         OutputColorRange = CodecContext->color_range;
446     if (OutputColorRange == AVCOL_RANGE_UNSPECIFIED)
447         OutputColorRange = InputColorRange;
448 
449     OutputColorSpace = CodecContext->colorspace;
450     if (OutputColorSpace == AVCOL_SPC_UNSPECIFIED)
451         OutputColorSpace = InputColorSpace;
452 
453     BCSType InputType = GuessCSType(InputFormat);
454     BCSType OutputType = GuessCSType(OutputFormat);
455 
456     if (InputType != OutputType) {
457         if (OutputType == cRGB) {
458             OutputColorSpace = AVCOL_SPC_RGB;
459             OutputColorRange = AVCOL_RANGE_UNSPECIFIED;
460             OutputColorPrimaries = AVCOL_PRI_UNSPECIFIED;
461             OutputTransferCharateristics = AVCOL_TRC_UNSPECIFIED;
462             OutputChromaLocation = AVCHROMA_LOC_UNSPECIFIED;
463         } else if (OutputType == cYUV) {
464             OutputColorSpace = AVCOL_SPC_BT470BG;
465             OutputColorRange = AVCOL_RANGE_MPEG;
466             OutputColorPrimaries = AVCOL_PRI_UNSPECIFIED;
467             OutputTransferCharateristics = AVCOL_TRC_UNSPECIFIED;
468             OutputChromaLocation = AVCHROMA_LOC_LEFT;
469         } else if (OutputType == cGRAY) {
470             OutputColorSpace = AVCOL_SPC_UNSPECIFIED;
471             OutputColorRange = AVCOL_RANGE_UNSPECIFIED;
472             OutputColorPrimaries = AVCOL_PRI_UNSPECIFIED;
473             OutputTransferCharateristics = AVCOL_TRC_UNSPECIFIED;
474             OutputChromaLocation = AVCHROMA_LOC_UNSPECIFIED;
475         }
476     } else {
477         OutputColorPrimaries = -1;
478         OutputTransferCharateristics = -1;
479         OutputChromaLocation = -1;
480     }
481 
482     if (InputFormat != OutputFormat ||
483         TargetWidth != CodecContext->width ||
484         TargetHeight != CodecContext->height ||
485         InputColorSpace != OutputColorSpace ||
486         InputColorRange != OutputColorRange) {
487         SWS = GetSwsContext(
488             Frame->width, Frame->height, InputFormat, InputColorSpace, InputColorRange,
489             TargetWidth, TargetHeight, OutputFormat, OutputColorSpace, OutputColorRange,
490             TargetResizer);
491 
492         if (!SWS) {
493             ResetOutputFormat();
494             throw FFMS_Exception(FFMS_ERROR_SCALING, FFMS_ERROR_INVALID_ARGUMENT,
495                 "Failed to allocate SWScale context");
496         }
497     }
498 
499     av_freep(&SWSFrameData[0]);
500     if (av_image_alloc(SWSFrameData, SWSFrameLinesize, TargetWidth, TargetHeight, OutputFormat, 4) < 0)
501         throw FFMS_Exception(FFMS_ERROR_SCALING, FFMS_ERROR_ALLOCATION_FAILED,
502             "Could not allocate frame with new resolution.");
503 }
504 
ResetOutputFormat()505 void FFMS_VideoSource::ResetOutputFormat() {
506     if (SWS) {
507         sws_freeContext(SWS);
508         SWS = nullptr;
509     }
510 
511     TargetWidth = -1;
512     TargetHeight = -1;
513     TargetPixelFormats.clear();
514 
515     OutputFormat = AV_PIX_FMT_NONE;
516     OutputColorSpace = AVCOL_SPC_UNSPECIFIED;
517     OutputColorRange = AVCOL_RANGE_UNSPECIFIED;
518     OutputColorSpaceSet = false;
519     OutputColorRangeSet = false;
520 
521     OutputFrame(DecodeFrame);
522 }
523 
ResetInputFormat()524 void FFMS_VideoSource::ResetInputFormat() {
525     InputFormatOverridden = false;
526     InputFormat = AV_PIX_FMT_NONE;
527     InputColorSpace = AVCOL_SPC_UNSPECIFIED;
528     InputColorRange = AVCOL_RANGE_UNSPECIFIED;
529 
530     ReAdjustOutputFormat(DecodeFrame);
531     OutputFrame(DecodeFrame);
532 }
533 
SetVideoProperties()534 void FFMS_VideoSource::SetVideoProperties() {
535     VP.RFFDenominator = CodecContext->time_base.num;
536     VP.RFFNumerator = CodecContext->time_base.den;
537     if (CodecContext->codec_id == AV_CODEC_ID_H264) {
538         if (VP.RFFNumerator & 1)
539             VP.RFFDenominator *= 2;
540         else
541             VP.RFFNumerator /= 2;
542     }
543     VP.NumFrames = Frames.VisibleFrameCount();
544     VP.TopFieldFirst = DecodeFrame->top_field_first;
545     VP.ColorSpace = CodecContext->colorspace;
546     VP.ColorRange = CodecContext->color_range;
547     // these pixfmt's are deprecated but still used
548     if (CodecContext->pix_fmt == AV_PIX_FMT_YUVJ420P ||
549         CodecContext->pix_fmt == AV_PIX_FMT_YUVJ422P ||
550         CodecContext->pix_fmt == AV_PIX_FMT_YUVJ444P
551         )
552         VP.ColorRange = AVCOL_RANGE_JPEG;
553 
554 
555     VP.FirstTime = ((Frames[Frames.RealFrameNumber(0)].PTS * Frames.TB.Num) / (double)Frames.TB.Den) / 1000;
556     VP.LastTime = ((Frames[Frames.RealFrameNumber(Frames.VisibleFrameCount()-1)].PTS * Frames.TB.Num) / (double)Frames.TB.Den) / 1000;
557     VP.LastEndTime = (((Frames[Frames.RealFrameNumber(Frames.VisibleFrameCount()-1)].PTS + Frames.LastDuration) * Frames.TB.Num) / (double)Frames.TB.Den) / 1000;
558 
559     if (CodecContext->width <= 0 || CodecContext->height <= 0)
560         throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_CODEC,
561             "Codec returned zero size video");
562 
563     // attempt to correct framerate to the proper NTSC fraction, if applicable
564     CorrectRationalFramerate(&VP.FPSNumerator, &VP.FPSDenominator);
565     // correct the timebase, if necessary
566     CorrectTimebase(&VP, &Frames.TB);
567 
568     // Set AR variables
569     VP.SARNum = CodecContext->sample_aspect_ratio.num;
570     VP.SARDen = CodecContext->sample_aspect_ratio.den;
571 
572     // Set input and output formats now that we have a CodecContext
573     DetectInputFormat();
574 
575     OutputFormat = InputFormat;
576     OutputColorSpace = InputColorSpace;
577     OutputColorRange = InputColorRange;
578 }
579 
HasPendingDelayedFrames()580 bool FFMS_VideoSource::HasPendingDelayedFrames() {
581     if (InitialDecode == -1) {
582         if (DelayCounter > Delay) {
583             --DelayCounter;
584             return true;
585         }
586         InitialDecode = 0;
587     }
588     return false;
589 }
590 
DecodePacket(AVPacket * Packet)591 bool FFMS_VideoSource::DecodePacket(AVPacket *Packet) {
592     std::swap(DecodeFrame, LastDecodedFrame);
593     avcodec_send_packet(CodecContext, Packet);
594 
595     int Ret = avcodec_receive_frame(CodecContext, DecodeFrame);
596     if (Ret != 0) {
597         std::swap(DecodeFrame, LastDecodedFrame);
598         if (!(Packet->flags & AV_PKT_FLAG_DISCARD))
599             DelayCounter++;
600     } else if (!!(Packet->flags & AV_PKT_FLAG_DISCARD)) {
601         // If sending discarded frame when the decode buffer is not empty, caller
602         // may still obtained bufferred decoded frames and the number of frames
603         // in the buffer decreases.
604         DelayCounter--;
605     }
606 
607     if (Ret == 0 && InitialDecode == 1)
608         InitialDecode = -1;
609 
610     // H.264 (PAFF) and HEVC can have one field per packet, and decoding delay needs
611     // to be adjusted accordingly.
612     if (CodecContext->codec_id == AV_CODEC_ID_H264 || CodecContext->codec_id == AV_CODEC_ID_HEVC) {
613         if (!PAFFAdjusted && DelayCounter > Delay && LastDecodedFrame->repeat_pict == 0 && Ret != 0) {
614             int OldBFrameDelay = Delay - (CodecContext->thread_count - 1);
615             Delay = 1 + OldBFrameDelay * 2 + (CodecContext->thread_count - 1);
616             PAFFAdjusted = true;
617         }
618     }
619 
620     return (Ret == 0) || (DelayCounter > Delay && !InitialDecode);;
621 }
622 
Seek(int n)623 int FFMS_VideoSource::Seek(int n) {
624     int ret = -1;
625 
626     DelayCounter = 0;
627     InitialDecode = 1;
628 
629     if (!SeekByPos || Frames[n].FilePos < 0) {
630         ret = av_seek_frame(FormatContext, VideoTrack, Frames[n].PTS, AVSEEK_FLAG_BACKWARD);
631         if (ret >= 0)
632             return ret;
633     }
634 
635     if (Frames[n].FilePos >= 0) {
636         ret = av_seek_frame(FormatContext, VideoTrack, Frames[n].FilePos + PosOffset, AVSEEK_FLAG_BYTE);
637         if (ret >= 0)
638             SeekByPos = true;
639     }
640     return ret;
641 }
642 
ReadFrame(AVPacket * pkt)643 int FFMS_VideoSource::ReadFrame(AVPacket *pkt) {
644     int ret = av_read_frame(FormatContext, pkt);
645     if (ret >= 0 || ret == AVERROR(EOF)) return ret;
646 
647     // Lavf reports the beginning of the actual video data as the packet's
648     // position, but the reader requires the header, so we end up seeking
649     // to the wrong position. Wait until a read actual fails to adjust the
650     // seek targets, so that if this ever gets fixed upstream our workaround
651     // doesn't re-break it.
652     if (strcmp(FormatContext->iformat->name, "yuv4mpegpipe") == 0) {
653         PosOffset = -6;
654         Seek(CurrentFrame);
655         return av_read_frame(FormatContext, pkt);
656     }
657     return ret;
658 }
659 
Free()660 void FFMS_VideoSource::Free() {
661     avcodec_free_context(&CodecContext);
662     avformat_close_input(&FormatContext);
663     if (SWS)
664         sws_freeContext(SWS);
665     av_freep(&SWSFrameData[0]);
666     av_frame_free(&DecodeFrame);
667     av_frame_free(&LastDecodedFrame);
668 }
669 
DecodeNextFrame(int64_t & AStartTime,int64_t & Pos)670 void FFMS_VideoSource::DecodeNextFrame(int64_t &AStartTime, int64_t &Pos) {
671     AStartTime = -1;
672 
673     if (HasPendingDelayedFrames())
674         return;
675 
676     AVPacket Packet;
677     InitNullPacket(Packet);
678 
679     while (ReadFrame(&Packet) >= 0) {
680         if (Packet.stream_index != VideoTrack) {
681             av_packet_unref(&Packet);
682             continue;
683         }
684 
685         if (AStartTime < 0)
686             AStartTime = Frames.UseDTS ? Packet.dts : Packet.pts;
687 
688         if (Pos < 0)
689             Pos = Packet.pos;
690 
691         bool FrameFinished = DecodePacket(&Packet);
692         av_packet_unref(&Packet);
693         if (FrameFinished)
694             return;
695     }
696 
697     // Flush final frames
698     InitNullPacket(Packet);
699     DecodePacket(&Packet);
700 }
701 
SeekTo(int n,int SeekOffset)702 bool FFMS_VideoSource::SeekTo(int n, int SeekOffset) {
703     if (SeekMode >= 0) {
704         int TargetFrame = n + SeekOffset;
705         if (TargetFrame < 0)
706             throw FFMS_Exception(FFMS_ERROR_SEEKING, FFMS_ERROR_UNKNOWN,
707                 "Frame accurate seeking is not possible in this file");
708 
709         if (SeekMode < 3)
710             TargetFrame = Frames.FindClosestVideoKeyFrame(TargetFrame);
711 
712         if (SeekMode == 0) {
713             if (n < CurrentFrame) {
714                 Seek(0);
715                 avcodec_flush_buffers(CodecContext);
716                 CurrentFrame = 0;
717             }
718         } else {
719             // 10 frames is used as a margin to prevent excessive seeking since the predicted best keyframe isn't always selected by avformat
720             if (n < CurrentFrame || TargetFrame > CurrentFrame + 10 || (SeekMode == 3 && n > CurrentFrame + 10)) {
721                 Seek(TargetFrame);
722                 avcodec_flush_buffers(CodecContext);
723                 return true;
724             }
725         }
726     } else if (n < CurrentFrame) {
727         throw FFMS_Exception(FFMS_ERROR_SEEKING, FFMS_ERROR_INVALID_ARGUMENT,
728             "Non-linear access attempted");
729     }
730     return false;
731 }
732 
GetFrame(int n)733 FFMS_Frame *FFMS_VideoSource::GetFrame(int n) {
734     GetFrameCheck(n);
735     n = Frames.RealFrameNumber(n);
736 
737     if (LastFrameNum == n)
738         return &LocalFrame;
739 
740     int SeekOffset = 0;
741     bool Seek = true;
742 
743     do {
744         bool HasSeeked = false;
745         if (Seek) {
746             HasSeeked = SeekTo(n, SeekOffset);
747             Seek = false;
748         }
749 
750         int64_t StartTime = AV_NOPTS_VALUE, FilePos = -1;
751         bool Hidden = (((unsigned) CurrentFrame < Frames.size()) && Frames[CurrentFrame].Hidden);
752         if (HasSeeked || !Hidden)
753             DecodeNextFrame(StartTime, FilePos);
754 
755         if (!HasSeeked)
756             continue;
757 
758         if (StartTime == AV_NOPTS_VALUE && !Frames.HasTS) {
759             if (FilePos >= 0) {
760                 CurrentFrame = Frames.FrameFromPos(FilePos);
761                 if (CurrentFrame >= 0)
762                     continue;
763             }
764             // If the track doesn't have timestamps or file positions then
765             // just trust that we got to the right place, since we have no
766             // way to tell where we are
767             else {
768                 CurrentFrame = n;
769                 continue;
770             }
771         }
772 
773         CurrentFrame = Frames.FrameFromPTS(StartTime);
774 
775         // Is the seek destination time known? Does it belong to a frame?
776         if (CurrentFrame < 0) {
777             if (SeekMode == 1 || StartTime < 0) {
778                 // No idea where we are so go back a bit further
779                 SeekOffset -= 10;
780                 Seek = true;
781                 continue;
782             }
783             CurrentFrame = Frames.ClosestFrameFromPTS(StartTime);
784         }
785 
786         // We want to know the frame number that we just got out of the decoder,
787         // but what we currently know is the frame number of the first packet
788         // we fed into the decoder, and these can be different with open-gop or
789         // aggressive (non-keyframe) seeking.
790         int64_t Pos = Frames[CurrentFrame].FilePos;
791         if (CurrentFrame > 0 && Pos != -1) {
792             int Prev = CurrentFrame - 1;
793             while (Prev >= 0 && Frames[Prev].FilePos != -1 && Frames[Prev].FilePos > Pos)
794                 --Prev;
795             CurrentFrame = Prev + 1;
796         }
797     } while (++CurrentFrame <= n);
798 
799     LastFrameNum = n;
800     return OutputFrame(DecodeFrame);
801 }
802