1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package vp8
6
7// This file implements decoding DCT/WHT residual coefficients and
8// reconstructing YCbCr data equal to predicted values plus residuals.
9//
10// There are 1*16*16 + 2*8*8 + 1*4*4 coefficients per macroblock:
11//	- 1*16*16 luma DCT coefficients,
12//	- 2*8*8 chroma DCT coefficients, and
13//	- 1*4*4 luma WHT coefficients.
14// Coefficients are read in lots of 16, and the later coefficients in each lot
15// are often zero.
16//
17// The YCbCr data consists of 1*16*16 luma values and 2*8*8 chroma values,
18// plus previously decoded values along the top and left borders. The combined
19// values are laid out as a [1+16+1+8][32]uint8 so that vertically adjacent
20// samples are 32 bytes apart. In detail, the layout is:
21//
22//	0 1 2 3 4 5 6 7  8 9 0 1 2 3 4 5  6 7 8 9 0 1 2 3  4 5 6 7 8 9 0 1
23//	. . . . . . . a  b b b b b b b b  b b b b b b b b  c c c c . . . .	0
24//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	1
25//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	2
26//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	3
27//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  c c c c . . . .	4
28//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	5
29//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	6
30//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	7
31//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  c c c c . . . .	8
32//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	9
33//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	10
34//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	11
35//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  c c c c . . . .	12
36//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	13
37//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	14
38//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	15
39//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	16
40//	. . . . . . . e  f f f f f f f f  . . . . . . . g  h h h h h h h h	17
41//	. . . . . . . i  B B B B B B B B  . . . . . . . j  R R R R R R R R	18
42//	. . . . . . . i  B B B B B B B B  . . . . . . . j  R R R R R R R R	19
43//	. . . . . . . i  B B B B B B B B  . . . . . . . j  R R R R R R R R	20
44//	. . . . . . . i  B B B B B B B B  . . . . . . . j  R R R R R R R R	21
45//	. . . . . . . i  B B B B B B B B  . . . . . . . j  R R R R R R R R	22
46//	. . . . . . . i  B B B B B B B B  . . . . . . . j  R R R R R R R R	23
47//	. . . . . . . i  B B B B B B B B  . . . . . . . j  R R R R R R R R	24
48//	. . . . . . . i  B B B B B B B B  . . . . . . . j  R R R R R R R R	25
49//
50// Y, B and R are the reconstructed luma (Y) and chroma (B, R) values.
51// The Y values are predicted (either as one 16x16 region or 16 4x4 regions)
52// based on the row above's Y values (some combination of {abc} or {dYC}) and
53// the column left's Y values (either {ad} or {bY}). Similarly, B and R values
54// are predicted on the row above and column left of their respective 8x8
55// region: {efi} for B, {ghj} for R.
56//
57// For uppermost macroblocks (i.e. those with mby == 0), the {abcefgh} values
58// are initialized to 0x81. Otherwise, they are copied from the bottom row of
59// the macroblock above. The {c} values are then duplicated from row 0 to rows
60// 4, 8 and 12 of the ybr workspace.
61// Similarly, for leftmost macroblocks (i.e. those with mbx == 0), the {adeigj}
62// values are initialized to 0x7f. Otherwise, they are copied from the right
63// column of the macroblock to the left.
64// For the top-left macroblock (with mby == 0 && mbx == 0), {aeg} is 0x81.
65//
66// When moving from one macroblock to the next horizontally, the {adeigj}
67// values can simply be copied from the workspace to itself, shifted by 8 or
68// 16 columns. When moving from one macroblock to the next vertically,
69// filtering can occur and hence the row values have to be copied from the
70// post-filtered image instead of the pre-filtered workspace.
71
72const (
73	bCoeffBase   = 1*16*16 + 0*8*8
74	rCoeffBase   = 1*16*16 + 1*8*8
75	whtCoeffBase = 1*16*16 + 2*8*8
76)
77
78const (
79	ybrYX = 8
80	ybrYY = 1
81	ybrBX = 8
82	ybrBY = 18
83	ybrRX = 24
84	ybrRY = 18
85)
86
87// prepareYBR prepares the {abcdefghij} elements of ybr.
88func (d *Decoder) prepareYBR(mbx, mby int) {
89	if mbx == 0 {
90		for y := 0; y < 17; y++ {
91			d.ybr[y][7] = 0x81
92		}
93		for y := 17; y < 26; y++ {
94			d.ybr[y][7] = 0x81
95			d.ybr[y][23] = 0x81
96		}
97	} else {
98		for y := 0; y < 17; y++ {
99			d.ybr[y][7] = d.ybr[y][7+16]
100		}
101		for y := 17; y < 26; y++ {
102			d.ybr[y][7] = d.ybr[y][15]
103			d.ybr[y][23] = d.ybr[y][31]
104		}
105	}
106	if mby == 0 {
107		for x := 7; x < 28; x++ {
108			d.ybr[0][x] = 0x7f
109		}
110		for x := 7; x < 16; x++ {
111			d.ybr[17][x] = 0x7f
112		}
113		for x := 23; x < 32; x++ {
114			d.ybr[17][x] = 0x7f
115		}
116	} else {
117		for i := 0; i < 16; i++ {
118			d.ybr[0][8+i] = d.img.Y[(16*mby-1)*d.img.YStride+16*mbx+i]
119		}
120		for i := 0; i < 8; i++ {
121			d.ybr[17][8+i] = d.img.Cb[(8*mby-1)*d.img.CStride+8*mbx+i]
122		}
123		for i := 0; i < 8; i++ {
124			d.ybr[17][24+i] = d.img.Cr[(8*mby-1)*d.img.CStride+8*mbx+i]
125		}
126		if mbx == d.mbw-1 {
127			for i := 16; i < 20; i++ {
128				d.ybr[0][8+i] = d.img.Y[(16*mby-1)*d.img.YStride+16*mbx+15]
129			}
130		} else {
131			for i := 16; i < 20; i++ {
132				d.ybr[0][8+i] = d.img.Y[(16*mby-1)*d.img.YStride+16*mbx+i]
133			}
134		}
135	}
136	for y := 4; y < 16; y += 4 {
137		d.ybr[y][24] = d.ybr[0][24]
138		d.ybr[y][25] = d.ybr[0][25]
139		d.ybr[y][26] = d.ybr[0][26]
140		d.ybr[y][27] = d.ybr[0][27]
141	}
142}
143
144// btou converts a bool to a 0/1 value.
145func btou(b bool) uint8 {
146	if b {
147		return 1
148	}
149	return 0
150}
151
152// pack packs four 0/1 values into four bits of a uint32.
153func pack(x [4]uint8, shift int) uint32 {
154	u := uint32(x[0])<<0 | uint32(x[1])<<1 | uint32(x[2])<<2 | uint32(x[3])<<3
155	return u << uint(shift)
156}
157
158// unpack unpacks four 0/1 values from a four-bit value.
159var unpack = [16][4]uint8{
160	{0, 0, 0, 0},
161	{1, 0, 0, 0},
162	{0, 1, 0, 0},
163	{1, 1, 0, 0},
164	{0, 0, 1, 0},
165	{1, 0, 1, 0},
166	{0, 1, 1, 0},
167	{1, 1, 1, 0},
168	{0, 0, 0, 1},
169	{1, 0, 0, 1},
170	{0, 1, 0, 1},
171	{1, 1, 0, 1},
172	{0, 0, 1, 1},
173	{1, 0, 1, 1},
174	{0, 1, 1, 1},
175	{1, 1, 1, 1},
176}
177
178var (
179	// The mapping from 4x4 region position to band is specified in section 13.3.
180	bands = [17]uint8{0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 0}
181	// Category probabilties are specified in section 13.2.
182	// Decoding categories 1 and 2 are done inline.
183	cat3456 = [4][12]uint8{
184		{173, 148, 140, 0, 0, 0, 0, 0, 0, 0, 0, 0},
185		{176, 155, 140, 135, 0, 0, 0, 0, 0, 0, 0, 0},
186		{180, 157, 141, 134, 130, 0, 0, 0, 0, 0, 0, 0},
187		{254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0},
188	}
189	// The zigzag order is:
190	//	0  1  5  6
191	//	2  4  7 12
192	//	3  8 11 13
193	//	9 10 14 15
194	zigzag = [16]uint8{0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15}
195)
196
197// parseResiduals4 parses a 4x4 region of residual coefficients, as specified
198// in section 13.3, and returns a 0/1 value indicating whether there was at
199// least one non-zero coefficient.
200// r is the partition to read bits from.
201// plane and context describe which token probability table to use. context is
202// either 0, 1 or 2, and equals how many of the macroblock left and macroblock
203// above have non-zero coefficients.
204// quant are the DC/AC quantization factors.
205// skipFirstCoeff is whether the DC coefficient has already been parsed.
206// coeffBase is the base index of d.coeff to write to.
207func (d *Decoder) parseResiduals4(r *partition, plane int, context uint8, quant [2]uint16, skipFirstCoeff bool, coeffBase int) uint8 {
208	prob, n := &d.tokenProb[plane], 0
209	if skipFirstCoeff {
210		n = 1
211	}
212	p := prob[bands[n]][context]
213	if !r.readBit(p[0]) {
214		return 0
215	}
216	for n != 16 {
217		n++
218		if !r.readBit(p[1]) {
219			p = prob[bands[n]][0]
220			continue
221		}
222		var v uint32
223		if !r.readBit(p[2]) {
224			v = 1
225			p = prob[bands[n]][1]
226		} else {
227			if !r.readBit(p[3]) {
228				if !r.readBit(p[4]) {
229					v = 2
230				} else {
231					v = 3 + r.readUint(p[5], 1)
232				}
233			} else if !r.readBit(p[6]) {
234				if !r.readBit(p[7]) {
235					// Category 1.
236					v = 5 + r.readUint(159, 1)
237				} else {
238					// Category 2.
239					v = 7 + 2*r.readUint(165, 1) + r.readUint(145, 1)
240				}
241			} else {
242				// Categories 3, 4, 5 or 6.
243				b1 := r.readUint(p[8], 1)
244				b0 := r.readUint(p[9+b1], 1)
245				cat := 2*b1 + b0
246				tab := &cat3456[cat]
247				v = 0
248				for i := 0; tab[i] != 0; i++ {
249					v *= 2
250					v += r.readUint(tab[i], 1)
251				}
252				v += 3 + (8 << cat)
253			}
254			p = prob[bands[n]][2]
255		}
256		z := zigzag[n-1]
257		c := int32(v) * int32(quant[btou(z > 0)])
258		if r.readBit(uniformProb) {
259			c = -c
260		}
261		d.coeff[coeffBase+int(z)] = int16(c)
262		if n == 16 || !r.readBit(p[0]) {
263			return 1
264		}
265	}
266	return 1
267}
268
269// parseResiduals parses the residuals and returns whether inner loop filtering
270// should be skipped for this macroblock.
271func (d *Decoder) parseResiduals(mbx, mby int) (skip bool) {
272	partition := &d.op[mby&(d.nOP-1)]
273	plane := planeY1SansY2
274	quant := &d.quant[d.segment]
275
276	// Parse the DC coefficient of each 4x4 luma region.
277	if d.usePredY16 {
278		nz := d.parseResiduals4(partition, planeY2, d.leftMB.nzY16+d.upMB[mbx].nzY16, quant.y2, false, whtCoeffBase)
279		d.leftMB.nzY16 = nz
280		d.upMB[mbx].nzY16 = nz
281		d.inverseWHT16()
282		plane = planeY1WithY2
283	}
284
285	var (
286		nzDC, nzAC         [4]uint8
287		nzDCMask, nzACMask uint32
288		coeffBase          int
289	)
290
291	// Parse the luma coefficients.
292	lnz := unpack[d.leftMB.nzMask&0x0f]
293	unz := unpack[d.upMB[mbx].nzMask&0x0f]
294	for y := 0; y < 4; y++ {
295		nz := lnz[y]
296		for x := 0; x < 4; x++ {
297			nz = d.parseResiduals4(partition, plane, nz+unz[x], quant.y1, d.usePredY16, coeffBase)
298			unz[x] = nz
299			nzAC[x] = nz
300			nzDC[x] = btou(d.coeff[coeffBase] != 0)
301			coeffBase += 16
302		}
303		lnz[y] = nz
304		nzDCMask |= pack(nzDC, y*4)
305		nzACMask |= pack(nzAC, y*4)
306	}
307	lnzMask := pack(lnz, 0)
308	unzMask := pack(unz, 0)
309
310	// Parse the chroma coefficients.
311	lnz = unpack[d.leftMB.nzMask>>4]
312	unz = unpack[d.upMB[mbx].nzMask>>4]
313	for c := 0; c < 4; c += 2 {
314		for y := 0; y < 2; y++ {
315			nz := lnz[y+c]
316			for x := 0; x < 2; x++ {
317				nz = d.parseResiduals4(partition, planeUV, nz+unz[x+c], quant.uv, false, coeffBase)
318				unz[x+c] = nz
319				nzAC[y*2+x] = nz
320				nzDC[y*2+x] = btou(d.coeff[coeffBase] != 0)
321				coeffBase += 16
322			}
323			lnz[y+c] = nz
324		}
325		nzDCMask |= pack(nzDC, 16+c*2)
326		nzACMask |= pack(nzAC, 16+c*2)
327	}
328	lnzMask |= pack(lnz, 4)
329	unzMask |= pack(unz, 4)
330
331	// Save decoder state.
332	d.leftMB.nzMask = uint8(lnzMask)
333	d.upMB[mbx].nzMask = uint8(unzMask)
334	d.nzDCMask = nzDCMask
335	d.nzACMask = nzACMask
336
337	// Section 15.1 of the spec says that "Steps 2 and 4 [of the loop filter]
338	// are skipped... [if] there is no DCT coefficient coded for the whole
339	// macroblock."
340	return nzDCMask == 0 && nzACMask == 0
341}
342
343// reconstructMacroblock applies the predictor functions and adds the inverse-
344// DCT transformed residuals to recover the YCbCr data.
345func (d *Decoder) reconstructMacroblock(mbx, mby int) {
346	if d.usePredY16 {
347		p := checkTopLeftPred(mbx, mby, d.predY16)
348		predFunc16[p](d, 1, 8)
349		for j := 0; j < 4; j++ {
350			for i := 0; i < 4; i++ {
351				n := 4*j + i
352				y := 4*j + 1
353				x := 4*i + 8
354				mask := uint32(1) << uint(n)
355				if d.nzACMask&mask != 0 {
356					d.inverseDCT4(y, x, 16*n)
357				} else if d.nzDCMask&mask != 0 {
358					d.inverseDCT4DCOnly(y, x, 16*n)
359				}
360			}
361		}
362	} else {
363		for j := 0; j < 4; j++ {
364			for i := 0; i < 4; i++ {
365				n := 4*j + i
366				y := 4*j + 1
367				x := 4*i + 8
368				predFunc4[d.predY4[j][i]](d, y, x)
369				mask := uint32(1) << uint(n)
370				if d.nzACMask&mask != 0 {
371					d.inverseDCT4(y, x, 16*n)
372				} else if d.nzDCMask&mask != 0 {
373					d.inverseDCT4DCOnly(y, x, 16*n)
374				}
375			}
376		}
377	}
378	p := checkTopLeftPred(mbx, mby, d.predC8)
379	predFunc8[p](d, ybrBY, ybrBX)
380	if d.nzACMask&0x0f0000 != 0 {
381		d.inverseDCT8(ybrBY, ybrBX, bCoeffBase)
382	} else if d.nzDCMask&0x0f0000 != 0 {
383		d.inverseDCT8DCOnly(ybrBY, ybrBX, bCoeffBase)
384	}
385	predFunc8[p](d, ybrRY, ybrRX)
386	if d.nzACMask&0xf00000 != 0 {
387		d.inverseDCT8(ybrRY, ybrRX, rCoeffBase)
388	} else if d.nzDCMask&0xf00000 != 0 {
389		d.inverseDCT8DCOnly(ybrRY, ybrRX, rCoeffBase)
390	}
391}
392
393// reconstruct reconstructs one macroblock and returns whether inner loop
394// filtering should be skipped for it.
395func (d *Decoder) reconstruct(mbx, mby int) (skip bool) {
396	if d.segmentHeader.updateMap {
397		if !d.fp.readBit(d.segmentHeader.prob[0]) {
398			d.segment = int(d.fp.readUint(d.segmentHeader.prob[1], 1))
399		} else {
400			d.segment = int(d.fp.readUint(d.segmentHeader.prob[2], 1)) + 2
401		}
402	}
403	if d.useSkipProb {
404		skip = d.fp.readBit(d.skipProb)
405	}
406	// Prepare the workspace.
407	for i := range d.coeff {
408		d.coeff[i] = 0
409	}
410	d.prepareYBR(mbx, mby)
411	// Parse the predictor modes.
412	d.usePredY16 = d.fp.readBit(145)
413	if d.usePredY16 {
414		d.parsePredModeY16(mbx)
415	} else {
416		d.parsePredModeY4(mbx)
417	}
418	d.parsePredModeC8()
419	// Parse the residuals.
420	if !skip {
421		skip = d.parseResiduals(mbx, mby)
422	} else {
423		if d.usePredY16 {
424			d.leftMB.nzY16 = 0
425			d.upMB[mbx].nzY16 = 0
426		}
427		d.leftMB.nzMask = 0
428		d.upMB[mbx].nzMask = 0
429		d.nzDCMask = 0
430		d.nzACMask = 0
431	}
432	// Reconstruct the YCbCr data and copy it to the image.
433	d.reconstructMacroblock(mbx, mby)
434	for i, y := (mby*d.img.YStride+mbx)*16, 0; y < 16; i, y = i+d.img.YStride, y+1 {
435		copy(d.img.Y[i:i+16], d.ybr[ybrYY+y][ybrYX:ybrYX+16])
436	}
437	for i, y := (mby*d.img.CStride+mbx)*8, 0; y < 8; i, y = i+d.img.CStride, y+1 {
438		copy(d.img.Cb[i:i+8], d.ybr[ybrBY+y][ybrBX:ybrBX+8])
439		copy(d.img.Cr[i:i+8], d.ybr[ybrRY+y][ybrRX:ybrRX+8])
440	}
441	return skip
442}
443