1package mp4
2
3// Tries to decode ISOBMFF quicktime mov
4// Uses naming from ISOBMFF when possible
5// ISO/IEC 14496-12
6// Quicktime file format https://developer.apple.com/standards/qtff-2001.pdf
7// FLAC in ISOBMFF https://github.com/xiph/flac/blob/master/doc/isoflac.txt
8// vp9 in ISOBMFF https://www.webmproject.org/vp9/mp4/
9// https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/Metadata/Metadata.html#//apple_ref/doc/uid/TP40000939-CH1-SW43
10
11// TODO: validate structure better? trak/stco etc
12// TODO: keep track of structure somehow to detect errors
13// TODO: ISO-14496 says mp4 mdat can begin and end with original header/trailer (no used?)
14// TODO: split into mov and mp4 decoder?
15// TODO: split into mp4_box decoder? needs complex in/out args?
16// TODO: better probe, find first 2 boxes, should be free,ftyp or mdat?
17
18import (
19	"embed"
20	"sort"
21
22	"github.com/wader/fq/format"
23	"github.com/wader/fq/format/registry"
24	"github.com/wader/fq/pkg/decode"
25)
26
27//go:embed *.jq
28var mp4FS embed.FS
29
30var aacFrameFormat decode.Group
31var av1CCRFormat decode.Group
32var av1FrameFormat decode.Group
33var flacFrameFormat decode.Group
34var flacMetadatablocksFormat decode.Group
35var id3v2Format decode.Group
36var imageFormat decode.Group
37var jpegFormat decode.Group
38var mp3FrameFormat decode.Group
39var mpegAVCAUFormat decode.Group
40var mpegAVCDCRFormat decode.Group
41var mpegESFormat decode.Group
42var mpegHEVCDCRFrameFormat decode.Group
43var mpegHEVCSampleFormat decode.Group
44var mpegPESPacketSampleFormat decode.Group
45var opusPacketFrameFormat decode.Group
46var protoBufWidevineFormat decode.Group
47var psshPlayreadyFormat decode.Group
48var vorbisPacketFormat decode.Group
49var vp9FrameFormat decode.Group
50var vpxCCRFormat decode.Group
51
52func init() {
53	registry.MustRegister(decode.Format{
54		Name:        format.MP4,
55		Description: "MPEG-4 file and similar",
56		Groups: []string{
57			format.PROBE,
58			format.IMAGE, // avif
59		},
60		DecodeFn: mp4Decode,
61		Dependencies: []decode.Dependency{
62			{Names: []string{format.AAC_FRAME}, Group: &aacFrameFormat},
63			{Names: []string{format.AV1_CCR}, Group: &av1CCRFormat},
64			{Names: []string{format.AV1_FRAME}, Group: &av1FrameFormat},
65			{Names: []string{format.FLAC_FRAME}, Group: &flacFrameFormat},
66			{Names: []string{format.FLAC_METADATABLOCKS}, Group: &flacMetadatablocksFormat},
67			{Names: []string{format.ID3V2}, Group: &id3v2Format},
68			{Names: []string{format.IMAGE}, Group: &imageFormat},
69			{Names: []string{format.JPEG}, Group: &jpegFormat},
70			{Names: []string{format.MP3_FRAME}, Group: &mp3FrameFormat},
71			{Names: []string{format.AVC_AU}, Group: &mpegAVCAUFormat},
72			{Names: []string{format.AVC_DCR}, Group: &mpegAVCDCRFormat},
73			{Names: []string{format.MPEG_ES}, Group: &mpegESFormat},
74			{Names: []string{format.HEVC_AU}, Group: &mpegHEVCSampleFormat},
75			{Names: []string{format.HEVC_DCR}, Group: &mpegHEVCDCRFrameFormat},
76			{Names: []string{format.MPEG_PES_PACKET}, Group: &mpegPESPacketSampleFormat},
77			{Names: []string{format.OPUS_PACKET}, Group: &opusPacketFrameFormat},
78			{Names: []string{format.PROTOBUF_WIDEVINE}, Group: &protoBufWidevineFormat},
79			{Names: []string{format.PSSH_PLAYREADY}, Group: &psshPlayreadyFormat},
80			{Names: []string{format.VORBIS_PACKET}, Group: &vorbisPacketFormat},
81			{Names: []string{format.VP9_FRAME}, Group: &vp9FrameFormat},
82			{Names: []string{format.VPX_CCR}, Group: &vpxCCRFormat},
83		},
84		Files: mp4FS,
85	})
86}
87
88type stsc struct {
89	firstChunk      uint32
90	samplesPerChunk uint32
91}
92
93type moof struct {
94	offset                        int64
95	defaultSampleSize             uint32
96	defaultSampleDescriptionIndex uint32
97	dataOffset                    uint32
98	samplesSizes                  []uint32
99}
100
101type sampleDescription struct {
102	dataFormat     string
103	originalFormat string
104}
105
106type track struct {
107	id                 uint32
108	sampleDescriptions []sampleDescription
109	subType            string
110	stco               []uint64 //
111	stsc               []stsc
112	stsz               []uint32
113	formatInArg        interface{}
114	objectType         int // if data format is "mp4a"
115
116	moofs       []*moof // for fmp4
117	currentMoof *moof
118}
119
120type decodeContext struct {
121	path              []string
122	tracks            map[uint32]*track
123	currentTrack      *track
124	currentMoofOffset int64
125}
126
127func isParent(ctx *decodeContext, typ string) bool {
128	return len(ctx.path) >= 2 && ctx.path[len(ctx.path)-2] == typ
129}
130
131func mp4Decode(d *decode.D, in interface{}) interface{} {
132	ctx := &decodeContext{
133		tracks: map[uint32]*track{},
134	}
135
136	// TODO: nicer, validate functions without field?
137	d.AssertLeastBytesLeft(16)
138	size := d.U32()
139	if size < 8 {
140		d.Fatalf("first box size too small < 8")
141	}
142	firstType := d.UTF8(4)
143	switch firstType {
144	case "styp", "ftyp", "free", "moov":
145	default:
146		d.Errorf("no styp, ftyp, free or moov box found")
147	}
148
149	d.SeekRel(-8 * 8)
150
151	decodeBoxes(ctx, d)
152
153	// keep track order stable
154	var sortedTracks []*track
155	for _, t := range ctx.tracks {
156		sortedTracks = append(sortedTracks, t)
157	}
158	sort.Slice(sortedTracks, func(i, j int) bool { return sortedTracks[i].id < sortedTracks[j].id })
159
160	d.FieldArray("tracks", func(d *decode.D) {
161		for _, t := range sortedTracks {
162			decodeSampleRange := func(d *decode.D, t *track, dataFormat string, name string, firstBit int64, nBits int64, inArg interface{}) {
163				d.RangeFn(firstBit, nBits, func(d *decode.D) {
164					switch {
165					case dataFormat == "fLaC":
166						d.FieldFormatLen(name, nBits, flacFrameFormat, inArg)
167					case dataFormat == "Opus":
168						d.FieldFormatLen(name, nBits, opusPacketFrameFormat, inArg)
169					case dataFormat == "vp09":
170						d.FieldFormatLen(name, nBits, vp9FrameFormat, inArg)
171					case dataFormat == "avc1":
172						d.FieldFormatLen(name, nBits, mpegAVCAUFormat, inArg)
173					case dataFormat == "hev1",
174						dataFormat == "hvc1":
175						d.FieldFormatLen(name, nBits, mpegHEVCSampleFormat, inArg)
176					case dataFormat == "av01":
177						d.FieldFormatLen(name, nBits, av1FrameFormat, inArg)
178					case dataFormat == "mp4a" && t.objectType == format.MPEGObjectTypeMP3:
179						d.FieldFormatLen(name, nBits, mp3FrameFormat, inArg)
180					case dataFormat == "mp4a" && t.objectType == format.MPEGObjectTypeAAC:
181						d.FieldFormatLen(name, nBits, aacFrameFormat, inArg)
182					case dataFormat == "mp4a" && t.objectType == format.MPEGObjectTypeVORBIS:
183						d.FieldFormatLen(name, nBits, vorbisPacketFormat, inArg)
184					case dataFormat == "mp4v" && t.objectType == format.MPEGObjectTypeMPEG2VideoMain:
185						d.FieldFormatLen(name, nBits, mpegPESPacketSampleFormat, inArg)
186					case dataFormat == "mp4v" && t.objectType == format.MPEGObjectTypeMJPEG:
187						d.FieldFormatLen(name, nBits, jpegFormat, inArg)
188					case dataFormat == "jpeg":
189						d.FieldFormatLen(name, nBits, jpegFormat, inArg)
190					default:
191						d.FieldRawLen(name, d.BitsLeft())
192					}
193				})
194			}
195
196			d.FieldStruct("track", func(d *decode.D) {
197				// TODO: handle progressive/fragmented mp4 differently somehow?
198
199				trackSdDataFormat := "unknown"
200				if len(t.sampleDescriptions) > 0 {
201					sd := t.sampleDescriptions[0]
202					trackSdDataFormat = sd.dataFormat
203					if sd.originalFormat != "" {
204						trackSdDataFormat = sd.originalFormat
205					}
206				}
207
208				d.FieldArray("samples", func(d *decode.D) {
209					stscIndex := 0
210					chunkNr := uint32(0)
211					sampleNr := uint64(0)
212
213					for sampleNr < uint64(len(t.stsz)) {
214						if stscIndex >= len(t.stsc) {
215							// TODO: add warning
216							break
217						}
218						stscEntry := t.stsc[stscIndex]
219						if int(chunkNr) >= len(t.stco) {
220							// TODO: add warning
221							break
222						}
223						sampleOffset := t.stco[chunkNr]
224
225						for i := uint32(0); i < stscEntry.samplesPerChunk; i++ {
226							if int(sampleNr) >= len(t.stsz) {
227								// TODO: add warning
228								break
229							}
230
231							sampleSize := t.stsz[sampleNr]
232							decodeSampleRange(d, t, trackSdDataFormat, "sample", int64(sampleOffset)*8, int64(sampleSize)*8, t.formatInArg)
233
234							// log.Printf("%s %d/%d %d/%d sample=%d/%d chunk=%d size=%d %d-%d\n",
235							// 	trackSdDataFormat, stscIndex, len(t.stsc),
236							// 	i, stscEntry.samplesPerChunk,
237							// 	sampleNr, len(t.stsz),
238							// 	chunkNr,
239							// 	sampleSize,
240							// 	sampleOffset,
241							// 	sampleOffset+uint64(sampleSize))
242
243							sampleOffset += uint64(sampleSize)
244							sampleNr++
245
246						}
247
248						chunkNr++
249						if stscIndex < len(t.stsc)-1 && chunkNr >= t.stsc[stscIndex+1].firstChunk-1 {
250							stscIndex++
251						}
252					}
253
254					for _, m := range t.moofs {
255						sampleOffset := m.offset + int64(m.dataOffset)
256						for _, sz := range m.samplesSizes {
257							// log.Printf("moof sample %s %d-%d\n", t.dataFormat, sampleOffset, int64(sz))
258
259							dataFormat := trackSdDataFormat
260							if m.defaultSampleDescriptionIndex != 0 && int(m.defaultSampleDescriptionIndex-1) < len(t.sampleDescriptions) {
261								sd := t.sampleDescriptions[m.defaultSampleDescriptionIndex-1]
262								dataFormat = sd.dataFormat
263								if sd.originalFormat != "" {
264									dataFormat = sd.originalFormat
265								}
266							}
267
268							// log.Printf("moof %#+v dataFormat: %#+v\n", m, dataFormat)
269
270							decodeSampleRange(d, t, dataFormat, "sample", sampleOffset*8, int64(sz)*8, t.formatInArg)
271							sampleOffset += int64(sz)
272						}
273					}
274				})
275			})
276		}
277	})
278
279	return nil
280
281}
282