1 // Copyright (c) 2014 Thomas Goyne <tgoyne@gmail.com>
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20
21 #include "track.h"
22
23 #include "utils.h"
24 #include "zipfile.h"
25 #include "indexing.h"
26
27 #include <algorithm>
28 #include <cmath>
29
30 extern "C" {
31 #include <libavutil/avutil.h>
32 #include <libavutil/common.h>
33 #include <libavutil/mathematics.h>
34 }
35
36 namespace {
ReadFrame(ZipFile & stream,FrameInfo const & prev,const FFMS_TrackType TT)37 FrameInfo ReadFrame(ZipFile &stream, FrameInfo const& prev, const FFMS_TrackType TT) {
38 FrameInfo f{};
39 f.PTS = stream.Read<int64_t>() + prev.PTS;
40 f.OriginalPTS = stream.Read<int64_t>() + prev.OriginalPTS;
41 f.KeyFrame = !!stream.Read<int8_t>();
42 f.FilePos = stream.Read<int64_t>() + prev.FilePos;
43 f.Hidden = !!stream.Read<int8_t>();
44
45 if (TT == FFMS_TYPE_AUDIO) {
46 f.SampleStart = prev.SampleStart + prev.SampleCount;
47 f.SampleCount = stream.Read<uint32_t>() + prev.SampleCount;
48 } else if (TT == FFMS_TYPE_VIDEO) {
49 f.OriginalPos = static_cast<size_t>(stream.Read<uint64_t>() + prev.OriginalPos + 1);
50 f.RepeatPict = stream.Read<int32_t>();
51 }
52 return f;
53 }
54
WriteFrame(ZipFile & stream,FrameInfo const & f,FrameInfo const & prev,const FFMS_TrackType TT)55 static void WriteFrame(ZipFile &stream, FrameInfo const& f, FrameInfo const& prev, const FFMS_TrackType TT) {
56 stream.Write(f.PTS - prev.PTS);
57 stream.Write(f.OriginalPTS - prev.OriginalPTS);
58 stream.Write<int8_t>(f.KeyFrame);
59 stream.Write(f.FilePos - prev.FilePos);
60 stream.Write<uint8_t>(f.Hidden);
61
62 if (TT == FFMS_TYPE_AUDIO)
63 stream.Write(f.SampleCount - prev.SampleCount);
64 else if (TT == FFMS_TYPE_VIDEO) {
65 stream.Write(static_cast<uint64_t>(f.OriginalPos) - prev.OriginalPos - 1);
66 stream.Write<int32_t>(f.RepeatPict);
67 }
68 }
69 }
70
FFMS_Track()71 FFMS_Track::FFMS_Track()
72 : Data(std::make_shared<TrackData>())
73 {
74 }
75
FFMS_Track(int64_t Num,int64_t Den,FFMS_TrackType TT,bool HasDiscontTS,bool UseDTS,bool HasTS)76 FFMS_Track::FFMS_Track(int64_t Num, int64_t Den, FFMS_TrackType TT, bool HasDiscontTS, bool UseDTS, bool HasTS)
77 : Data(std::make_shared<TrackData>())
78 , TT(TT)
79 , UseDTS(UseDTS)
80 , HasTS(HasTS)
81 , HasDiscontTS(HasDiscontTS) {
82 TB.Num = Num;
83 TB.Den = Den;
84 }
85
FFMS_Track(ZipFile & stream)86 FFMS_Track::FFMS_Track(ZipFile &stream)
87 : Data(std::make_shared<TrackData>()) {
88 frame_vec &Frames = Data->Frames;
89 TT = static_cast<FFMS_TrackType>(stream.Read<uint8_t>());
90 TB.Num = stream.Read<int64_t>();
91 TB.Den = stream.Read<int64_t>();
92 LastDuration = stream.Read<int64_t>();
93 MaxBFrames = stream.Read<int32_t>();
94 UseDTS = !!stream.Read<uint8_t>();
95 HasTS = !!stream.Read<uint8_t>();
96 size_t FrameCount = static_cast<size_t>(stream.Read<uint64_t>());
97
98 if (!FrameCount) return;
99
100 FrameInfo temp{};
101 Frames.reserve(FrameCount);
102 for (size_t i = 0; i < FrameCount; ++i)
103 Frames.push_back(ReadFrame(stream, i == 0 ? temp : Frames.back(), TT));
104
105 if (TT == FFMS_TYPE_VIDEO)
106 GeneratePublicInfo();
107 }
108
Write(ZipFile & stream) const109 void FFMS_Track::Write(ZipFile &stream) const {
110 frame_vec &Frames = Data->Frames;
111 stream.Write<uint8_t>(TT);
112 stream.Write(TB.Num);
113 stream.Write(TB.Den);
114 stream.Write<int64_t>(LastDuration);
115 stream.Write<int32_t>(MaxBFrames);
116 stream.Write<uint8_t>(UseDTS);
117 stream.Write<uint8_t>(HasTS);
118 stream.Write<uint64_t>(size());
119
120 if (empty()) return;
121
122 FrameInfo temp{};
123 for (size_t i = 0; i < size(); ++i)
124 WriteFrame(stream, Frames[i], i == 0 ? temp : Frames[i - 1], TT);
125 }
126
AddVideoFrame(int64_t PTS,int RepeatPict,bool KeyFrame,int FrameType,int64_t FilePos,bool Hidden)127 void FFMS_Track::AddVideoFrame(int64_t PTS, int RepeatPict, bool KeyFrame, int FrameType, int64_t FilePos, bool Hidden) {
128 Data->Frames.push_back({ PTS, 0, FilePos, 0, 0, 0, FrameType, RepeatPict, KeyFrame, Hidden });
129 }
130
AddAudioFrame(int64_t PTS,int64_t SampleStart,uint32_t SampleCount,bool KeyFrame,int64_t FilePos,bool Hidden)131 void FFMS_Track::AddAudioFrame(int64_t PTS, int64_t SampleStart, uint32_t SampleCount, bool KeyFrame, int64_t FilePos, bool Hidden) {
132 if (SampleCount > 0) {
133 Data->Frames.push_back({ PTS, 0, FilePos, SampleStart, SampleCount,
134 0, 0, 0, KeyFrame, Hidden });
135 }
136 }
137
WriteTimecodes(const char * TimecodeFile) const138 void FFMS_Track::WriteTimecodes(const char *TimecodeFile) const {
139 frame_vec &Frames = Data->Frames;
140 FileHandle file(TimecodeFile, "w", FFMS_ERROR_TRACK, FFMS_ERROR_FILE_WRITE);
141
142 file.Printf("# timecode format v2\n");
143 for (size_t i = 0; i < size(); ++i) {
144 if (!Frames[i].Hidden)
145 file.Printf("%.02f\n", (Frames[i].PTS * TB.Num) / (double)TB.Den);
146 }
147 }
148
PTSComparison(FrameInfo FI1,FrameInfo FI2)149 static bool PTSComparison(FrameInfo FI1, FrameInfo FI2) {
150 return FI1.PTS < FI2.PTS;
151 }
152
FrameFromPTS(int64_t PTS) const153 int FFMS_Track::FrameFromPTS(int64_t PTS) const {
154 FrameInfo F;
155 F.PTS = PTS;
156
157 auto Pos = std::lower_bound(begin(), end(), F, PTSComparison);
158 while (Pos != end() && Pos->Hidden && Pos->PTS == PTS)
159 Pos++;
160
161 if (Pos == end() || Pos->PTS != PTS)
162 return -1;
163 return std::distance(begin(), Pos);
164 }
165
FrameFromPos(int64_t Pos) const166 int FFMS_Track::FrameFromPos(int64_t Pos) const {
167 for (size_t i = 0; i < size(); i++)
168 if (Data->Frames[i].FilePos == Pos && !Data->Frames[i].Hidden)
169 return static_cast<int>(i);
170 return -1;
171 }
172
ClosestFrameFromPTS(int64_t PTS) const173 int FFMS_Track::ClosestFrameFromPTS(int64_t PTS) const {
174 FrameInfo F;
175 F.PTS = PTS;
176
177 auto Pos = std::lower_bound(begin(), end(), F, PTSComparison);
178 while (Pos != end() && Pos->Hidden && Pos->PTS == PTS)
179 Pos++;
180
181 if (Pos == end())
182 return static_cast<int>(size() - 1);
183 size_t Frame = std::distance(begin(), Pos);
184 if (Pos == begin() || FFABS(Pos->PTS - PTS) <= FFABS((Pos - 1)->PTS - PTS))
185 return static_cast<int>(Frame);
186 return static_cast<int>(Frame - 1);
187 }
188
FindClosestVideoKeyFrame(int Frame) const189 int FFMS_Track::FindClosestVideoKeyFrame(int Frame) const {
190 frame_vec &Frames = Data->Frames;
191 Frame = std::min(std::max(Frame, 0), static_cast<int>(size()) - 1);
192 for (; Frame > 0 && !Frames[Frame].KeyFrame; Frame--);
193 for (; Frame > 0 && !Frames[Frames[Frame].OriginalPos].KeyFrame; Frame--);
194 return Frame;
195 }
196
RealFrameNumber(int Frame) const197 int FFMS_Track::RealFrameNumber(int Frame) const {
198 return Data->RealFrameNumbers[Frame];
199 }
200
VisibleFrameCount() const201 int FFMS_Track::VisibleFrameCount() const {
202 return TT == FFMS_TYPE_AUDIO ? static_cast<int>(Data->Frames.size()) : static_cast<int>(Data->RealFrameNumbers.size());
203 }
204
MaybeReorderFrames()205 void FFMS_Track::MaybeReorderFrames() {
206 frame_vec &Frames = Data->Frames;
207 // First check if we need to do anything
208 bool has_b_frames = false;
209 for (size_t i = 1; i < size(); ++i) {
210 // If the timestamps are already out of order, then they actually are
211 // presentation timestamps and we don't need to do anything
212 if (Frames[i].PTS < Frames[i - 1].PTS)
213 return;
214
215 if (Frames[i].FrameType == AV_PICTURE_TYPE_B) {
216 has_b_frames = true;
217
218 // Reordering files with multiple b-frames is currently not
219 // supported
220 if (Frames[i - 1].FrameType == AV_PICTURE_TYPE_B)
221 return;
222 }
223 }
224
225 // Don't need to do anything if there are no b-frames as presentation order
226 // equals decoding order
227 if (!has_b_frames)
228 return;
229
230 // We have b-frames, but the timestamps are monotonically increasing. This
231 // means that the timestamps we have are decoding timestamps, and we want
232 // presentation time stamps. Convert DTS to PTS by swapping the timestamp
233 // of each b-frame with the frame before it. This only works for the
234 // specific case of b-frames which reference the frame immediately after
235 // them temporally, but that happens to cover the only files I've seen
236 // with b-frames and no presentation timestamps.
237 for (size_t i = 1; i < size(); ++i) {
238 if (Frames[i].FrameType == AV_PICTURE_TYPE_B)
239 std::swap(Frames[i].PTS, Frames[i - 1].PTS);
240 }
241 }
242
MaybeHideFrames()243 void FFMS_Track::MaybeHideFrames() {
244 frame_vec &Frames = Data->Frames;
245 // Awful handling for interlaced H.264: each frame is output twice, so hide
246 // frames with an invalid file position. The PTS will not match sometimes,
247 // since libavformat makes up timestamps... but only sometimes.
248 for (size_t i = 1; i < size(); ++i) {
249 FrameInfo const& prev = Frames[i - 1];
250 FrameInfo& cur = Frames[i];
251
252 if (prev.FilePos >= 0 && (cur.FilePos == -1 || cur.FilePos == prev.FilePos) && !prev.Hidden)
253 cur.Hidden = true;
254 }
255 }
256
FillAudioGaps()257 void FFMS_Track::FillAudioGaps() {
258 frame_vec &Frames = Data->Frames;
259 // There may not be audio data for the entire duration of the audio track,
260 // as some formats support gaps between the end time of one packet and the
261 // PTS of the next audio packet, and we should zero-fill those gaps.
262 // However, garbage or missing timestamps for audio tracks are very common,
263 // so we only want to trust them if they're all present, monotonically
264 // increasing, and result in a total duration meaningfully longer than the
265 // samples we have would cover.
266 if (size() < 2 || !HasTS || front().PTS == AV_NOPTS_VALUE || back().PTS == AV_NOPTS_VALUE)
267 return;
268
269 const auto DurationToSamples = [this](int64_t Dur) {
270 auto Num = TB.Num * SampleRate;
271 auto Den = TB.Den * 1000;
272 return av_rescale(Dur, Num, Den);
273 };
274
275 const auto SamplesToDuration = [this](int64_t Samples) {
276 auto Num = TB.Den * 1000;
277 auto Den = TB.Num * SampleRate;
278 return av_rescale(Samples, Num, Den);
279 };
280
281 if (HasDiscontTS) {
282 int64_t shift = 0;
283 Frames[0].OriginalPTS = Frames[0].PTS;
284 for (size_t i = 1; i < size(); i++) {
285 Frames[i].OriginalPTS = Frames[i].PTS;
286 if (Frames[i].PTS != AV_NOPTS_VALUE && Frames[i].OriginalPTS <= Frames[i-1].OriginalPTS)
287 shift = -(Frames[i].PTS) + Frames[i-1].PTS + SamplesToDuration(Frames[i-1].SampleCount);
288 Frames[i].PTS += shift;
289 }
290 }
291
292 const auto ActualSamples = back().SampleStart + back().SampleCount;
293 const auto ExpectedSamples = DurationToSamples(back().PTS - front().PTS) + back().SampleCount;
294 if (ActualSamples + 5 > ExpectedSamples) // arbitrary threshold to cover rounding/not worth adjusting
295 return;
296
297 // Verify that every frame has a timestamp and that they monotonically
298 // increase, as otherwise we can't trust them
299 auto PrevPTS = front().PTS - 1;
300 for (auto const& frame : *this) {
301 if (frame.PTS == AV_NOPTS_VALUE || frame.PTS <= PrevPTS)
302 return;
303 PrevPTS = frame.PTS;
304 }
305
306 // There are some missing samples and the timestamps appear to all be valid,
307 // so go ahead and extend the frames to cover the gaps
308 const auto FirstPTS = front().PTS;
309 auto PrevFrame = &Frames.front();
310 int32_t Shift = 0;
311 for (auto& Frame : Frames) {
312 if (Shift > 0)
313 Frame.SampleStart += Shift;
314
315 const auto ExpectedStartSample = DurationToSamples(Frame.PTS - FirstPTS);
316 const auto Gap = static_cast<int32_t>(ExpectedStartSample - Frame.SampleStart);
317 if (Gap > 0) {
318 PrevFrame->SampleCount += Gap;
319 Frame.SampleStart = ExpectedStartSample;
320 }
321 Shift += Gap;
322 PrevFrame = &Frame;
323 }
324 }
325
FinalizeTrack()326 void FFMS_Track::FinalizeTrack() {
327 frame_vec &Frames = Data->Frames;
328 // With some formats (such as Vorbis) a bad final packet results in a
329 // frame with PTS 0, which we don't want to sort to the beginning
330 if (size() > 2 && front().PTS >= back().PTS)
331 Frames.pop_back();
332
333 if (TT == FFMS_TYPE_AUDIO) {
334 FillAudioGaps();
335 return;
336 }
337
338 if (TT != FFMS_TYPE_VIDEO)
339 return;
340
341 for (size_t i = 0; i < size(); i++) {
342 Frames[i].OriginalPos = i;
343 Frames[i].OriginalPTS = Frames[i].PTS;
344 }
345
346 MaybeReorderFrames();
347
348 if (size() > 2 && HasDiscontTS) {
349 std::vector<size_t> secs = { 0 };
350
351 auto lastPTS = Frames[0].PTS;
352 const auto thresh = std::abs(Frames[1].PTS - Frames[0].PTS) * 16; // A bad approximation of 16 frames, the max reorder buffer size.
353 for (size_t i = 0; i < size(); i++) {
354 if (Frames[i].PTS < lastPTS && (lastPTS - Frames[i].PTS) > thresh && i + 1 < size()) {
355 secs.push_back(i);
356 i++; // Sections must be at least 2 frames long.
357 }
358 lastPTS = Frames[i].PTS;
359 }
360
361 // We need to sort each distinct sections by PTS to account for any reordering.
362 for (size_t i = 0; i < secs.size() - 1; i++)
363 sort(Frames.begin() + secs[i], Frames.begin() + secs[i + 1], PTSComparison);
364 sort(Frames.begin() + secs.back(), Frames.end(), PTSComparison);
365
366 // Try and make up some sane timestamps based on previous sections, while
367 // keeping the same frame durations.
368 for (size_t i = 1; i < secs.size(); i++) {
369 const auto shift = -(Frames[secs[i]].PTS) + (Frames[secs[i] + 1].PTS - Frames[secs[i]].PTS) + Frames[secs[i] - 1].PTS;
370 size_t end;
371 if (i == secs.size() - 1)
372 end = Frames.size();
373 else
374 end = secs[i + 1];
375 for (size_t j = secs[i]; j < end; j++)
376 Frames[j].PTS += shift;
377 }
378 } else {
379 sort(Frames.begin(), Frames.end(), PTSComparison);
380 }
381
382 std::vector<size_t> ReorderTemp;
383 ReorderTemp.reserve(size());
384
385 for (size_t i = 0; i < size(); i++)
386 ReorderTemp.push_back(Frames[i].OriginalPos);
387
388 for (size_t i = 0; i < size(); i++)
389 Frames[ReorderTemp[i]].OriginalPos = i;
390
391 GeneratePublicInfo();
392 }
393
GeneratePublicInfo()394 void FFMS_Track::GeneratePublicInfo() {
395 frame_vec &Frames = Data->Frames;
396 std::vector<int> &RealFrameNumbers = Data->RealFrameNumbers;
397 std::vector<FFMS_FrameInfo> &PublicFrameInfo = Data->PublicFrameInfo;
398 RealFrameNumbers.reserve(size());
399 PublicFrameInfo.reserve(size());
400 for (size_t i = 0; i < size(); ++i) {
401 if (Frames[i].Hidden)
402 continue;
403 RealFrameNumbers.push_back(static_cast<int>(i));
404
405 FFMS_FrameInfo info = { Frames[i].PTS, Frames[i].RepeatPict, Frames[i].KeyFrame, Frames[i].OriginalPTS };
406 PublicFrameInfo.push_back(info);
407 }
408 }
409
GetFrameInfo(size_t N) const410 const FFMS_FrameInfo *FFMS_Track::GetFrameInfo(size_t N) const {
411 std::vector<FFMS_FrameInfo> &PublicFrameInfo = Data->PublicFrameInfo;
412 if (N >= PublicFrameInfo.size()) return nullptr;
413 return &PublicFrameInfo[N];
414 }
415