1package mp4 2 3// Tries to decode ISOBMFF quicktime mov 4// Uses naming from ISOBMFF when possible 5// ISO/IEC 14496-12 6// Quicktime file format https://developer.apple.com/standards/qtff-2001.pdf 7// FLAC in ISOBMFF https://github.com/xiph/flac/blob/master/doc/isoflac.txt 8// vp9 in ISOBMFF https://www.webmproject.org/vp9/mp4/ 9// https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/Metadata/Metadata.html#//apple_ref/doc/uid/TP40000939-CH1-SW43 10 11// TODO: validate structure better? trak/stco etc 12// TODO: keep track of structure somehow to detect errors 13// TODO: ISO-14496 says mp4 mdat can begin and end with original header/trailer (no used?) 14// TODO: split into mov and mp4 decoder? 15// TODO: split into mp4_box decoder? needs complex in/out args? 16// TODO: better probe, find first 2 boxes, should be free,ftyp or mdat? 17 18import ( 19 "embed" 20 "sort" 21 22 "github.com/wader/fq/format" 23 "github.com/wader/fq/format/registry" 24 "github.com/wader/fq/pkg/decode" 25) 26 27//go:embed *.jq 28var mp4FS embed.FS 29 30var aacFrameFormat decode.Group 31var av1CCRFormat decode.Group 32var av1FrameFormat decode.Group 33var flacFrameFormat decode.Group 34var flacMetadatablocksFormat decode.Group 35var id3v2Format decode.Group 36var imageFormat decode.Group 37var jpegFormat decode.Group 38var mp3FrameFormat decode.Group 39var mpegAVCAUFormat decode.Group 40var mpegAVCDCRFormat decode.Group 41var mpegESFormat decode.Group 42var mpegHEVCDCRFrameFormat decode.Group 43var mpegHEVCSampleFormat decode.Group 44var mpegPESPacketSampleFormat decode.Group 45var opusPacketFrameFormat decode.Group 46var protoBufWidevineFormat decode.Group 47var psshPlayreadyFormat decode.Group 48var vorbisPacketFormat decode.Group 49var vp9FrameFormat decode.Group 50var vpxCCRFormat decode.Group 51 52func init() { 53 registry.MustRegister(decode.Format{ 54 Name: format.MP4, 55 Description: "MPEG-4 file and similar", 56 Groups: []string{ 57 format.PROBE, 58 format.IMAGE, // avif 59 }, 60 DecodeFn: mp4Decode, 61 Dependencies: []decode.Dependency{ 62 {Names: []string{format.AAC_FRAME}, Group: &aacFrameFormat}, 63 {Names: []string{format.AV1_CCR}, Group: &av1CCRFormat}, 64 {Names: []string{format.AV1_FRAME}, Group: &av1FrameFormat}, 65 {Names: []string{format.FLAC_FRAME}, Group: &flacFrameFormat}, 66 {Names: []string{format.FLAC_METADATABLOCKS}, Group: &flacMetadatablocksFormat}, 67 {Names: []string{format.ID3V2}, Group: &id3v2Format}, 68 {Names: []string{format.IMAGE}, Group: &imageFormat}, 69 {Names: []string{format.JPEG}, Group: &jpegFormat}, 70 {Names: []string{format.MP3_FRAME}, Group: &mp3FrameFormat}, 71 {Names: []string{format.AVC_AU}, Group: &mpegAVCAUFormat}, 72 {Names: []string{format.AVC_DCR}, Group: &mpegAVCDCRFormat}, 73 {Names: []string{format.MPEG_ES}, Group: &mpegESFormat}, 74 {Names: []string{format.HEVC_AU}, Group: &mpegHEVCSampleFormat}, 75 {Names: []string{format.HEVC_DCR}, Group: &mpegHEVCDCRFrameFormat}, 76 {Names: []string{format.MPEG_PES_PACKET}, Group: &mpegPESPacketSampleFormat}, 77 {Names: []string{format.OPUS_PACKET}, Group: &opusPacketFrameFormat}, 78 {Names: []string{format.PROTOBUF_WIDEVINE}, Group: &protoBufWidevineFormat}, 79 {Names: []string{format.PSSH_PLAYREADY}, Group: &psshPlayreadyFormat}, 80 {Names: []string{format.VORBIS_PACKET}, Group: &vorbisPacketFormat}, 81 {Names: []string{format.VP9_FRAME}, Group: &vp9FrameFormat}, 82 {Names: []string{format.VPX_CCR}, Group: &vpxCCRFormat}, 83 }, 84 Files: mp4FS, 85 }) 86} 87 88type stsc struct { 89 firstChunk uint32 90 samplesPerChunk uint32 91} 92 93type moof struct { 94 offset int64 95 defaultSampleSize uint32 96 defaultSampleDescriptionIndex uint32 97 dataOffset uint32 98 samplesSizes []uint32 99} 100 101type sampleDescription struct { 102 dataFormat string 103 originalFormat string 104} 105 106type track struct { 107 id uint32 108 sampleDescriptions []sampleDescription 109 subType string 110 stco []uint64 // 111 stsc []stsc 112 stsz []uint32 113 formatInArg interface{} 114 objectType int // if data format is "mp4a" 115 116 moofs []*moof // for fmp4 117 currentMoof *moof 118} 119 120type decodeContext struct { 121 path []string 122 tracks map[uint32]*track 123 currentTrack *track 124 currentMoofOffset int64 125} 126 127func isParent(ctx *decodeContext, typ string) bool { 128 return len(ctx.path) >= 2 && ctx.path[len(ctx.path)-2] == typ 129} 130 131func mp4Decode(d *decode.D, in interface{}) interface{} { 132 ctx := &decodeContext{ 133 tracks: map[uint32]*track{}, 134 } 135 136 // TODO: nicer, validate functions without field? 137 d.AssertLeastBytesLeft(16) 138 size := d.U32() 139 if size < 8 { 140 d.Fatalf("first box size too small < 8") 141 } 142 firstType := d.UTF8(4) 143 switch firstType { 144 case "styp", "ftyp", "free", "moov": 145 default: 146 d.Errorf("no styp, ftyp, free or moov box found") 147 } 148 149 d.SeekRel(-8 * 8) 150 151 decodeBoxes(ctx, d) 152 153 // keep track order stable 154 var sortedTracks []*track 155 for _, t := range ctx.tracks { 156 sortedTracks = append(sortedTracks, t) 157 } 158 sort.Slice(sortedTracks, func(i, j int) bool { return sortedTracks[i].id < sortedTracks[j].id }) 159 160 d.FieldArray("tracks", func(d *decode.D) { 161 for _, t := range sortedTracks { 162 decodeSampleRange := func(d *decode.D, t *track, dataFormat string, name string, firstBit int64, nBits int64, inArg interface{}) { 163 d.RangeFn(firstBit, nBits, func(d *decode.D) { 164 switch { 165 case dataFormat == "fLaC": 166 d.FieldFormatLen(name, nBits, flacFrameFormat, inArg) 167 case dataFormat == "Opus": 168 d.FieldFormatLen(name, nBits, opusPacketFrameFormat, inArg) 169 case dataFormat == "vp09": 170 d.FieldFormatLen(name, nBits, vp9FrameFormat, inArg) 171 case dataFormat == "avc1": 172 d.FieldFormatLen(name, nBits, mpegAVCAUFormat, inArg) 173 case dataFormat == "hev1", 174 dataFormat == "hvc1": 175 d.FieldFormatLen(name, nBits, mpegHEVCSampleFormat, inArg) 176 case dataFormat == "av01": 177 d.FieldFormatLen(name, nBits, av1FrameFormat, inArg) 178 case dataFormat == "mp4a" && t.objectType == format.MPEGObjectTypeMP3: 179 d.FieldFormatLen(name, nBits, mp3FrameFormat, inArg) 180 case dataFormat == "mp4a" && t.objectType == format.MPEGObjectTypeAAC: 181 d.FieldFormatLen(name, nBits, aacFrameFormat, inArg) 182 case dataFormat == "mp4a" && t.objectType == format.MPEGObjectTypeVORBIS: 183 d.FieldFormatLen(name, nBits, vorbisPacketFormat, inArg) 184 case dataFormat == "mp4v" && t.objectType == format.MPEGObjectTypeMPEG2VideoMain: 185 d.FieldFormatLen(name, nBits, mpegPESPacketSampleFormat, inArg) 186 case dataFormat == "mp4v" && t.objectType == format.MPEGObjectTypeMJPEG: 187 d.FieldFormatLen(name, nBits, jpegFormat, inArg) 188 case dataFormat == "jpeg": 189 d.FieldFormatLen(name, nBits, jpegFormat, inArg) 190 default: 191 d.FieldRawLen(name, d.BitsLeft()) 192 } 193 }) 194 } 195 196 d.FieldStruct("track", func(d *decode.D) { 197 // TODO: handle progressive/fragmented mp4 differently somehow? 198 199 trackSdDataFormat := "unknown" 200 if len(t.sampleDescriptions) > 0 { 201 sd := t.sampleDescriptions[0] 202 trackSdDataFormat = sd.dataFormat 203 if sd.originalFormat != "" { 204 trackSdDataFormat = sd.originalFormat 205 } 206 } 207 208 d.FieldArray("samples", func(d *decode.D) { 209 stscIndex := 0 210 chunkNr := uint32(0) 211 sampleNr := uint64(0) 212 213 for sampleNr < uint64(len(t.stsz)) { 214 if stscIndex >= len(t.stsc) { 215 // TODO: add warning 216 break 217 } 218 stscEntry := t.stsc[stscIndex] 219 if int(chunkNr) >= len(t.stco) { 220 // TODO: add warning 221 break 222 } 223 sampleOffset := t.stco[chunkNr] 224 225 for i := uint32(0); i < stscEntry.samplesPerChunk; i++ { 226 if int(sampleNr) >= len(t.stsz) { 227 // TODO: add warning 228 break 229 } 230 231 sampleSize := t.stsz[sampleNr] 232 decodeSampleRange(d, t, trackSdDataFormat, "sample", int64(sampleOffset)*8, int64(sampleSize)*8, t.formatInArg) 233 234 // log.Printf("%s %d/%d %d/%d sample=%d/%d chunk=%d size=%d %d-%d\n", 235 // trackSdDataFormat, stscIndex, len(t.stsc), 236 // i, stscEntry.samplesPerChunk, 237 // sampleNr, len(t.stsz), 238 // chunkNr, 239 // sampleSize, 240 // sampleOffset, 241 // sampleOffset+uint64(sampleSize)) 242 243 sampleOffset += uint64(sampleSize) 244 sampleNr++ 245 246 } 247 248 chunkNr++ 249 if stscIndex < len(t.stsc)-1 && chunkNr >= t.stsc[stscIndex+1].firstChunk-1 { 250 stscIndex++ 251 } 252 } 253 254 for _, m := range t.moofs { 255 sampleOffset := m.offset + int64(m.dataOffset) 256 for _, sz := range m.samplesSizes { 257 // log.Printf("moof sample %s %d-%d\n", t.dataFormat, sampleOffset, int64(sz)) 258 259 dataFormat := trackSdDataFormat 260 if m.defaultSampleDescriptionIndex != 0 && int(m.defaultSampleDescriptionIndex-1) < len(t.sampleDescriptions) { 261 sd := t.sampleDescriptions[m.defaultSampleDescriptionIndex-1] 262 dataFormat = sd.dataFormat 263 if sd.originalFormat != "" { 264 dataFormat = sd.originalFormat 265 } 266 } 267 268 // log.Printf("moof %#+v dataFormat: %#+v\n", m, dataFormat) 269 270 decodeSampleRange(d, t, dataFormat, "sample", sampleOffset*8, int64(sz)*8, t.formatInArg) 271 sampleOffset += int64(sz) 272 } 273 } 274 }) 275 }) 276 } 277 }) 278 279 return nil 280 281} 282