1 //  Copyright (c) 2014 Thomas Goyne <tgoyne@gmail.com>
2 //
3 //  Permission is hereby granted, free of charge, to any person obtaining a copy
4 //  of this software and associated documentation files (the "Software"), to deal
5 //  in the Software without restriction, including without limitation the rights
6 //  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 //  copies of the Software, and to permit persons to whom the Software is
8 //  furnished to do so, subject to the following conditions:
9 //
10 //  The above copyright notice and this permission notice shall be included in
11 //  all copies or substantial portions of the Software.
12 //
13 //  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 //  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 //  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 //  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 //  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 //  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 //  THE SOFTWARE.
20 
21 #include "track.h"
22 
23 #include "utils.h"
24 #include "zipfile.h"
25 #include "indexing.h"
26 
27 #include <algorithm>
28 #include <cmath>
29 
30 extern "C" {
31 #include <libavutil/avutil.h>
32 #include <libavutil/common.h>
33 #include <libavutil/mathematics.h>
34 }
35 
36 namespace {
ReadFrame(ZipFile & stream,FrameInfo const & prev,const FFMS_TrackType TT)37 FrameInfo ReadFrame(ZipFile &stream, FrameInfo const& prev, const FFMS_TrackType TT) {
38     FrameInfo f{};
39     f.PTS = stream.Read<int64_t>() + prev.PTS;
40     f.OriginalPTS = stream.Read<int64_t>() + prev.OriginalPTS;
41     f.KeyFrame = !!stream.Read<int8_t>();
42     f.FilePos = stream.Read<int64_t>() + prev.FilePos;
43     f.Hidden = !!stream.Read<int8_t>();
44 
45     if (TT == FFMS_TYPE_AUDIO) {
46         f.SampleStart = prev.SampleStart + prev.SampleCount;
47         f.SampleCount = stream.Read<uint32_t>() + prev.SampleCount;
48     } else if (TT == FFMS_TYPE_VIDEO) {
49         f.OriginalPos = static_cast<size_t>(stream.Read<uint64_t>() + prev.OriginalPos + 1);
50         f.RepeatPict = stream.Read<int32_t>();
51     }
52     return f;
53 }
54 
WriteFrame(ZipFile & stream,FrameInfo const & f,FrameInfo const & prev,const FFMS_TrackType TT)55 static void WriteFrame(ZipFile &stream, FrameInfo const& f, FrameInfo const& prev, const FFMS_TrackType TT) {
56     stream.Write(f.PTS - prev.PTS);
57     stream.Write(f.OriginalPTS - prev.OriginalPTS);
58     stream.Write<int8_t>(f.KeyFrame);
59     stream.Write(f.FilePos - prev.FilePos);
60     stream.Write<uint8_t>(f.Hidden);
61 
62     if (TT == FFMS_TYPE_AUDIO)
63         stream.Write(f.SampleCount - prev.SampleCount);
64     else if (TT == FFMS_TYPE_VIDEO) {
65         stream.Write(static_cast<uint64_t>(f.OriginalPos) - prev.OriginalPos - 1);
66         stream.Write<int32_t>(f.RepeatPict);
67     }
68 }
69 }
70 
FFMS_Track()71 FFMS_Track::FFMS_Track()
72     : Data(std::make_shared<TrackData>())
73 {
74 }
75 
FFMS_Track(int64_t Num,int64_t Den,FFMS_TrackType TT,bool HasDiscontTS,bool UseDTS,bool HasTS)76 FFMS_Track::FFMS_Track(int64_t Num, int64_t Den, FFMS_TrackType TT, bool HasDiscontTS, bool UseDTS, bool HasTS)
77     : Data(std::make_shared<TrackData>())
78     , TT(TT)
79     , UseDTS(UseDTS)
80     , HasTS(HasTS)
81     , HasDiscontTS(HasDiscontTS) {
82     TB.Num = Num;
83     TB.Den = Den;
84 }
85 
FFMS_Track(ZipFile & stream)86 FFMS_Track::FFMS_Track(ZipFile &stream)
87     : Data(std::make_shared<TrackData>()) {
88     frame_vec &Frames = Data->Frames;
89     TT = static_cast<FFMS_TrackType>(stream.Read<uint8_t>());
90     TB.Num = stream.Read<int64_t>();
91     TB.Den = stream.Read<int64_t>();
92     LastDuration = stream.Read<int64_t>();
93     MaxBFrames = stream.Read<int32_t>();
94     UseDTS = !!stream.Read<uint8_t>();
95     HasTS = !!stream.Read<uint8_t>();
96     size_t FrameCount = static_cast<size_t>(stream.Read<uint64_t>());
97 
98     if (!FrameCount) return;
99 
100     FrameInfo temp{};
101     Frames.reserve(FrameCount);
102     for (size_t i = 0; i < FrameCount; ++i)
103         Frames.push_back(ReadFrame(stream, i == 0 ? temp : Frames.back(), TT));
104 
105     if (TT == FFMS_TYPE_VIDEO)
106         GeneratePublicInfo();
107 }
108 
Write(ZipFile & stream) const109 void FFMS_Track::Write(ZipFile &stream) const {
110     frame_vec &Frames = Data->Frames;
111     stream.Write<uint8_t>(TT);
112     stream.Write(TB.Num);
113     stream.Write(TB.Den);
114     stream.Write<int64_t>(LastDuration);
115     stream.Write<int32_t>(MaxBFrames);
116     stream.Write<uint8_t>(UseDTS);
117     stream.Write<uint8_t>(HasTS);
118     stream.Write<uint64_t>(size());
119 
120     if (empty()) return;
121 
122     FrameInfo temp{};
123     for (size_t i = 0; i < size(); ++i)
124         WriteFrame(stream, Frames[i], i == 0 ? temp : Frames[i - 1], TT);
125 }
126 
AddVideoFrame(int64_t PTS,int RepeatPict,bool KeyFrame,int FrameType,int64_t FilePos,bool Hidden)127 void FFMS_Track::AddVideoFrame(int64_t PTS, int RepeatPict, bool KeyFrame, int FrameType, int64_t FilePos, bool Hidden) {
128     Data->Frames.push_back({ PTS, 0, FilePos, 0, 0, 0, FrameType, RepeatPict, KeyFrame, Hidden });
129 }
130 
AddAudioFrame(int64_t PTS,int64_t SampleStart,uint32_t SampleCount,bool KeyFrame,int64_t FilePos,bool Hidden)131 void FFMS_Track::AddAudioFrame(int64_t PTS, int64_t SampleStart, uint32_t SampleCount, bool KeyFrame, int64_t FilePos, bool Hidden) {
132     if (SampleCount > 0) {
133         Data->Frames.push_back({ PTS, 0, FilePos, SampleStart, SampleCount,
134             0, 0, 0, KeyFrame, Hidden });
135     }
136 }
137 
WriteTimecodes(const char * TimecodeFile) const138 void FFMS_Track::WriteTimecodes(const char *TimecodeFile) const {
139     frame_vec &Frames = Data->Frames;
140     FileHandle file(TimecodeFile, "w", FFMS_ERROR_TRACK, FFMS_ERROR_FILE_WRITE);
141 
142     file.Printf("# timecode format v2\n");
143     for (size_t i = 0; i < size(); ++i) {
144         if (!Frames[i].Hidden)
145             file.Printf("%.02f\n", (Frames[i].PTS * TB.Num) / (double)TB.Den);
146     }
147 }
148 
PTSComparison(FrameInfo FI1,FrameInfo FI2)149 static bool PTSComparison(FrameInfo FI1, FrameInfo FI2) {
150     return FI1.PTS < FI2.PTS;
151 }
152 
FrameFromPTS(int64_t PTS) const153 int FFMS_Track::FrameFromPTS(int64_t PTS) const {
154     FrameInfo F;
155     F.PTS = PTS;
156 
157     auto Pos = std::lower_bound(begin(), end(), F, PTSComparison);
158     while (Pos != end() && Pos->Hidden && Pos->PTS == PTS)
159         Pos++;
160 
161     if (Pos == end() || Pos->PTS != PTS)
162         return -1;
163     return std::distance(begin(), Pos);
164 }
165 
FrameFromPos(int64_t Pos) const166 int FFMS_Track::FrameFromPos(int64_t Pos) const {
167     for (size_t i = 0; i < size(); i++)
168         if (Data->Frames[i].FilePos == Pos && !Data->Frames[i].Hidden)
169             return static_cast<int>(i);
170     return -1;
171 }
172 
ClosestFrameFromPTS(int64_t PTS) const173 int FFMS_Track::ClosestFrameFromPTS(int64_t PTS) const {
174     FrameInfo F;
175     F.PTS = PTS;
176 
177     auto Pos = std::lower_bound(begin(), end(), F, PTSComparison);
178     while (Pos != end() && Pos->Hidden && Pos->PTS == PTS)
179         Pos++;
180 
181     if (Pos == end())
182         return static_cast<int>(size() - 1);
183     size_t Frame = std::distance(begin(), Pos);
184     if (Pos == begin() || FFABS(Pos->PTS - PTS) <= FFABS((Pos - 1)->PTS - PTS))
185         return static_cast<int>(Frame);
186     return static_cast<int>(Frame - 1);
187 }
188 
FindClosestVideoKeyFrame(int Frame) const189 int FFMS_Track::FindClosestVideoKeyFrame(int Frame) const {
190     frame_vec &Frames = Data->Frames;
191     Frame = std::min(std::max(Frame, 0), static_cast<int>(size()) - 1);
192     for (; Frame > 0 && !Frames[Frame].KeyFrame; Frame--);
193     for (; Frame > 0 && !Frames[Frames[Frame].OriginalPos].KeyFrame; Frame--);
194     return Frame;
195 }
196 
RealFrameNumber(int Frame) const197 int FFMS_Track::RealFrameNumber(int Frame) const {
198     return Data->RealFrameNumbers[Frame];
199 }
200 
VisibleFrameCount() const201 int FFMS_Track::VisibleFrameCount() const {
202     return TT == FFMS_TYPE_AUDIO ? static_cast<int>(Data->Frames.size()) : static_cast<int>(Data->RealFrameNumbers.size());
203 }
204 
MaybeReorderFrames()205 void FFMS_Track::MaybeReorderFrames() {
206     frame_vec &Frames = Data->Frames;
207     // First check if we need to do anything
208     bool has_b_frames = false;
209     for (size_t i = 1; i < size(); ++i) {
210         // If the timestamps are already out of order, then they actually are
211         // presentation timestamps and we don't need to do anything
212         if (Frames[i].PTS < Frames[i - 1].PTS)
213             return;
214 
215         if (Frames[i].FrameType == AV_PICTURE_TYPE_B) {
216             has_b_frames = true;
217 
218             // Reordering files with multiple b-frames is currently not
219             // supported
220             if (Frames[i - 1].FrameType == AV_PICTURE_TYPE_B)
221                 return;
222         }
223     }
224 
225     // Don't need to do anything if there are no b-frames as presentation order
226     // equals decoding order
227     if (!has_b_frames)
228         return;
229 
230     // We have b-frames, but the timestamps are monotonically increasing. This
231     // means that the timestamps we have are decoding timestamps, and we want
232     // presentation time stamps. Convert DTS to PTS by swapping the timestamp
233     // of each b-frame with the frame before it. This only works for the
234     // specific case of b-frames which reference the frame immediately after
235     // them temporally, but that happens to cover the only files I've seen
236     // with b-frames and no presentation timestamps.
237     for (size_t i = 1; i < size(); ++i) {
238         if (Frames[i].FrameType == AV_PICTURE_TYPE_B)
239             std::swap(Frames[i].PTS, Frames[i - 1].PTS);
240     }
241 }
242 
MaybeHideFrames()243 void FFMS_Track::MaybeHideFrames() {
244     frame_vec &Frames = Data->Frames;
245     // Awful handling for interlaced H.264: each frame is output twice, so hide
246     // frames with an invalid file position. The PTS will not match sometimes,
247     // since libavformat makes up timestamps... but only sometimes.
248     for (size_t i = 1; i < size(); ++i) {
249         FrameInfo const& prev = Frames[i - 1];
250         FrameInfo& cur = Frames[i];
251 
252         if (prev.FilePos >= 0 && (cur.FilePos == -1 || cur.FilePos == prev.FilePos) && !prev.Hidden)
253             cur.Hidden = true;
254     }
255 }
256 
FillAudioGaps()257 void FFMS_Track::FillAudioGaps() {
258     frame_vec &Frames = Data->Frames;
259     // There may not be audio data for the entire duration of the audio track,
260     // as some formats support gaps between the end time of one packet and the
261     // PTS of the next audio packet, and we should zero-fill those gaps.
262     // However, garbage or missing timestamps for audio tracks are very common,
263     // so we only want to trust them if they're all present, monotonically
264     // increasing, and result in a total duration meaningfully longer than the
265     // samples we have would cover.
266     if (size() < 2 || !HasTS || front().PTS == AV_NOPTS_VALUE || back().PTS == AV_NOPTS_VALUE)
267         return;
268 
269     const auto DurationToSamples = [this](int64_t Dur) {
270         auto Num = TB.Num * SampleRate;
271         auto Den = TB.Den * 1000;
272         return av_rescale(Dur, Num, Den);
273     };
274 
275     const auto SamplesToDuration = [this](int64_t Samples) {
276         auto Num = TB.Den * 1000;
277         auto Den = TB.Num * SampleRate;
278         return av_rescale(Samples, Num, Den);
279     };
280 
281     if (HasDiscontTS) {
282         int64_t shift = 0;
283         Frames[0].OriginalPTS = Frames[0].PTS;
284         for (size_t i = 1; i < size(); i++) {
285             Frames[i].OriginalPTS = Frames[i].PTS;
286             if (Frames[i].PTS != AV_NOPTS_VALUE && Frames[i].OriginalPTS <= Frames[i-1].OriginalPTS)
287                 shift = -(Frames[i].PTS) + Frames[i-1].PTS + SamplesToDuration(Frames[i-1].SampleCount);
288             Frames[i].PTS += shift;
289         }
290     }
291 
292     const auto ActualSamples = back().SampleStart + back().SampleCount;
293     const auto ExpectedSamples = DurationToSamples(back().PTS - front().PTS) + back().SampleCount;
294     if (ActualSamples + 5 > ExpectedSamples) // arbitrary threshold to cover rounding/not worth adjusting
295         return;
296 
297     // Verify that every frame has a timestamp and that they monotonically
298     // increase, as otherwise we can't trust them
299     auto PrevPTS = front().PTS - 1;
300     for (auto const& frame : *this) {
301         if (frame.PTS == AV_NOPTS_VALUE || frame.PTS <= PrevPTS)
302             return;
303         PrevPTS = frame.PTS;
304     }
305 
306     // There are some missing samples and the timestamps appear to all be valid,
307     // so go ahead and extend the frames to cover the gaps
308     const auto FirstPTS = front().PTS;
309     auto PrevFrame = &Frames.front();
310     int32_t Shift = 0;
311     for (auto& Frame : Frames) {
312         if (Shift > 0)
313             Frame.SampleStart += Shift;
314 
315         const auto ExpectedStartSample = DurationToSamples(Frame.PTS - FirstPTS);
316         const auto Gap = static_cast<int32_t>(ExpectedStartSample - Frame.SampleStart);
317         if (Gap > 0) {
318             PrevFrame->SampleCount += Gap;
319             Frame.SampleStart = ExpectedStartSample;
320         }
321         Shift += Gap;
322         PrevFrame = &Frame;
323     }
324 }
325 
FinalizeTrack()326 void FFMS_Track::FinalizeTrack() {
327     frame_vec &Frames = Data->Frames;
328     // With some formats (such as Vorbis) a bad final packet results in a
329     // frame with PTS 0, which we don't want to sort to the beginning
330     if (size() > 2 && front().PTS >= back().PTS)
331         Frames.pop_back();
332 
333     if (TT == FFMS_TYPE_AUDIO) {
334         FillAudioGaps();
335         return;
336     }
337 
338     if (TT != FFMS_TYPE_VIDEO)
339         return;
340 
341     for (size_t i = 0; i < size(); i++) {
342         Frames[i].OriginalPos = i;
343         Frames[i].OriginalPTS = Frames[i].PTS;
344     }
345 
346     MaybeReorderFrames();
347 
348     if (size() > 2 && HasDiscontTS) {
349         std::vector<size_t> secs = { 0 };
350 
351         auto lastPTS = Frames[0].PTS;
352         const auto thresh = std::abs(Frames[1].PTS - Frames[0].PTS) * 16; // A bad approximation of 16 frames, the max reorder buffer size.
353         for (size_t i = 0; i < size(); i++) {
354             if (Frames[i].PTS < lastPTS && (lastPTS - Frames[i].PTS) > thresh && i + 1 < size()) {
355                 secs.push_back(i);
356                 i++; // Sections must be at least 2 frames long.
357             }
358             lastPTS = Frames[i].PTS;
359         }
360 
361         // We need to sort each distinct sections by PTS to account for any reordering.
362         for (size_t i = 0; i < secs.size() - 1; i++)
363             sort(Frames.begin() + secs[i], Frames.begin() + secs[i + 1], PTSComparison);
364         sort(Frames.begin() + secs.back(), Frames.end(), PTSComparison);
365 
366         // Try and make up some sane timestamps based on previous sections, while
367         // keeping the same frame durations.
368         for (size_t i = 1; i < secs.size(); i++) {
369             const auto shift = -(Frames[secs[i]].PTS) + (Frames[secs[i] + 1].PTS - Frames[secs[i]].PTS) + Frames[secs[i] - 1].PTS;
370             size_t end;
371             if (i == secs.size() - 1)
372                 end = Frames.size();
373             else
374                 end = secs[i + 1];
375             for (size_t j = secs[i]; j < end; j++)
376                 Frames[j].PTS += shift;
377         }
378     } else {
379         sort(Frames.begin(), Frames.end(), PTSComparison);
380     }
381 
382     std::vector<size_t> ReorderTemp;
383     ReorderTemp.reserve(size());
384 
385     for (size_t i = 0; i < size(); i++)
386         ReorderTemp.push_back(Frames[i].OriginalPos);
387 
388     for (size_t i = 0; i < size(); i++)
389         Frames[ReorderTemp[i]].OriginalPos = i;
390 
391     GeneratePublicInfo();
392 }
393 
GeneratePublicInfo()394 void FFMS_Track::GeneratePublicInfo() {
395     frame_vec &Frames = Data->Frames;
396     std::vector<int> &RealFrameNumbers = Data->RealFrameNumbers;
397     std::vector<FFMS_FrameInfo> &PublicFrameInfo = Data->PublicFrameInfo;
398     RealFrameNumbers.reserve(size());
399     PublicFrameInfo.reserve(size());
400     for (size_t i = 0; i < size(); ++i) {
401         if (Frames[i].Hidden)
402             continue;
403         RealFrameNumbers.push_back(static_cast<int>(i));
404 
405         FFMS_FrameInfo info = { Frames[i].PTS, Frames[i].RepeatPict, Frames[i].KeyFrame, Frames[i].OriginalPTS };
406         PublicFrameInfo.push_back(info);
407     }
408 }
409 
GetFrameInfo(size_t N) const410 const FFMS_FrameInfo *FFMS_Track::GetFrameInfo(size_t N) const {
411     std::vector<FFMS_FrameInfo> &PublicFrameInfo = Data->PublicFrameInfo;
412     if (N >= PublicFrameInfo.size()) return nullptr;
413     return &PublicFrameInfo[N];
414 }
415