1 //*@@@+++@@@@******************************************************************
2 //
3 // Copyright � Microsoft Corp.
4 // All rights reserved.
5 //
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions are met:
8 //
9 // � Redistributions of source code must retain the above copyright notice,
10 //   this list of conditions and the following disclaimer.
11 // � Redistributions in binary form must reproduce the above copyright notice,
12 //   this list of conditions and the following disclaimer in the documentation
13 //   and/or other materials provided with the distribution.
14 //
15 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
19 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 // POSSIBILITY OF SUCH DAMAGE.
26 //
27 //*@@@---@@@@******************************************************************
28 
29 #include "strTransform.h"
30 #include "encode.h"
31 
32 /** rotation by pi/8 **/
33 #define ROTATE1(a, b) (b) -= (((a) + 1) >> 1), (a) += (((b) + 1) >> 1)  // this works well too
34 #define ROTATE2(a, b) (b) -= (((a)*3 + 4) >> 3), (a) += (((b)*3 + 4) >> 3)  // this works well too
35 
36 /** local functions **/
37 static Void fwdOddOdd(PixelI *, PixelI *, PixelI *, PixelI *);
38 static Void fwdOddOddPre(PixelI *, PixelI *, PixelI *, PixelI *);
39 static Void fwdOdd(PixelI *, PixelI *, PixelI *, PixelI *);
40 static Void strDCT2x2alt(PixelI * a, PixelI * b, PixelI * c, PixelI * d);
41 static Void strHSTenc1(PixelI *, PixelI *);
42 static Void strHSTenc(PixelI *, PixelI *, PixelI *, PixelI *);
43 static Void strHSTenc1_edge (PixelI *pa, PixelI *pd);
44 
45 //static Void scaleDownUp0(PixelI *, PixelI *);
46 //static Void scaleDownUp1(PixelI *, PixelI *);
47 //static Void scaleDownUp2(PixelI *, PixelI *);
48 //#define FOURBUTTERFLY_ENC_ALT(p, i00, i01, i02, i03, i10, i11, i12, i13,	\
49 //    i20, i21, i22, i23, i30, i31, i32, i33)		\
50 //    strHSTenc(&p[i00], &p[i01], &p[i02], &p[i03]);			\
51 //    strHSTenc(&p[i10], &p[i11], &p[i12], &p[i13]);			\
52 //    strHSTenc(&p[i20], &p[i21], &p[i22], &p[i23]);			\
53 //    strHSTenc(&p[i30], &p[i31], &p[i32], &p[i33]);          \
54 //    strHSTenc1(&p[i00], &p[i03]);			\
55 //    strHSTenc1(&p[i10], &p[i13]);			\
56 //    strHSTenc1(&p[i20], &p[i23]);			\
57 //    strHSTenc1(&p[i30], &p[i33])
58 
59 /** DCT stuff **/
60 /** data order before DCT **/
61 /**  0  1  2  3 **/
62 /**  4  5  6  7 **/
63 /**  8  9 10 11 **/
64 /** 12 13 14 15 **/
65 /** data order after DCT **/
66 /** 0  8  4  6 **/
67 /** 2 10 14 12 **/
68 /** 1 11 15 13 **/
69 /** 9  3  7  5 **/
70 /** reordering should be combined with zigzag scan **/
71 
strDCT4x4Stage1(PixelI * p)72 Void strDCT4x4Stage1(PixelI * p)
73 {
74     /** butterfly **/
75     //FOURBUTTERFLY(p, 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15);
76     FOURBUTTERFLY_HARDCODED1(p);
77 
78     /** top left corner, butterfly => butterfly **/
79     strDCT2x2up(&p[0], &p[1], &p[2], &p[3]);
80 
81     /** bottom right corner, pi/8 rotation => pi/8 rotation **/
82     fwdOddOdd(&p[15], &p[14], &p[13], &p[12]);
83 
84     /** top right corner, butterfly => pi/8 rotation **/
85     fwdOdd(&p[5], &p[4], &p[7], &p[6]);
86 
87     /** bottom left corner, pi/8 rotation => butterfly **/
88     fwdOdd(&p[10], &p[8], &p[11], &p[9]);
89 }
90 
strDCT4x4SecondStage(PixelI * p)91 Void strDCT4x4SecondStage(PixelI * p)
92 {
93     /** butterfly **/
94     FOURBUTTERFLY(p, 0, 192, 48, 240, 64, 128, 112, 176,16, 208, 32, 224,  80, 144, 96, 160);
95 
96     /** top left corner, butterfly => butterfly **/
97     strDCT2x2up(&p[0], &p[64], &p[16], &p[80]);
98 
99     /** bottom right corner, pi/8 rotation => pi/8 rotation **/
100     fwdOddOdd(&p[160], &p[224], &p[176], &p[240]);
101 
102     /** top right corner, butterfly => pi/8 rotation **/
103     fwdOdd(&p[128], &p[192], &p[144], &p[208]);
104 
105     /** bottom left corner, pi/8 rotation => butterfly **/
106     fwdOdd(&p[32], &p[48], &p[96], &p[112]);
107 }
108 
strNormalizeEnc(PixelI * p,Bool bChroma)109 Void strNormalizeEnc(PixelI* p, Bool bChroma)
110 {
111     int i;
112     if (!bChroma) {
113         //for (i = 0; i < 256; i += 16) {
114         //    p[i] = (p[i] + 1) >> 2;
115         //}
116     }
117     else {
118         for (i = 0; i < 256; i += 16) {
119             p[i] >>= 1;
120         }
121     }
122 }
123 
124 /** 2x2 DCT with pre-scaling - for use on encoder side **/
strDCT2x2dnEnc(PixelI * pa,PixelI * pb,PixelI * pc,PixelI * pd)125 Void strDCT2x2dnEnc(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
126 {
127     PixelI a, b, c, d, C, t;
128     a = (*pa + 0) >> 1;
129     b = (*pb + 0) >> 1;
130     C = (*pc + 0) >> 1;
131     d = (*pd + 0) >> 1;
132     //PixelI t1, t2;
133 
134     a += d;
135     b -= C;
136     t = ((a - b) >> 1);
137     c = t - d;
138     d = t - C;
139     a -= d;
140     b += c;
141 
142     *pa = a;
143     *pb = b;
144     *pc = c;
145     *pd = d;
146 }
147 
148 /** pre filter stuff **/
149 /** 2-point pre for boundaries **/
strPre2(PixelI * pa,PixelI * pb)150 Void strPre2(PixelI * pa, PixelI * pb)
151 {
152     PixelI a, b;
153     a = *pa;
154     b = *pb;
155 
156     /** rotate **/
157     b -= ((a + 2) >> 2);
158     a -= ((b + 1) >> 1);
159 
160     a -= (b >> 5);
161     a -= (b >> 9);
162     a -= (b >> 13);
163 
164     b -= ((a + 2) >> 2);
165 
166     *pa = a;
167     *pb = b;
168 }
169 
strPre2x2(PixelI * pa,PixelI * pb,PixelI * pc,PixelI * pd)170 Void strPre2x2(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
171 {
172     PixelI a, b, c, d;
173     a = *pa;
174     b = *pb;
175     c = *pc;
176     d = *pd;
177 
178     /** butterflies **/
179     a += d;
180     b += c;
181     d -= (a + 1) >> 1;
182     c -= (b + 1) >> 1;
183 
184     /** rotate **/
185     b -= ((a + 2) >> 2);
186     a -= ((b + 1) >> 1);
187     a -= (b >> 5);
188     a -= (b >> 9);
189     a -= (b >> 13);
190     b -= ((a + 2) >> 2);
191 
192     /** butterflies **/
193     d += (a + 1) >> 1;
194     c += (b + 1) >> 1;
195     a -= d;
196     b -= c;
197 
198     *pa = a;
199     *pb = b;
200     *pc = c;
201     *pd = d;
202 }
203 
204 /** 4-point pre for boundaries **/
strPre4(PixelI * pa,PixelI * pb,PixelI * pc,PixelI * pd)205 Void strPre4(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
206 {
207     PixelI a, b, c, d;
208     a = *pa;
209     b = *pb;
210     c = *pc;
211     d = *pd;
212 
213     a += d, b += c;
214     d -= ((a + 1) >> 1), c -= ((b + 1) >> 1);
215 
216     ROTATE1(c, d);
217 
218     strHSTenc1_edge(&a, &d); strHSTenc1_edge(&b, &c);
219 
220     d += ((a + 1) >> 1), c += ((b + 1) >> 1);
221     a -= d, b -= c;
222 
223     *pa = a;
224     *pb = b;
225     *pc = c;
226     *pd = d;
227 }
228 
229 /*****************************************************************************************
230   Input data offsets:
231   (15)(14)|(10+64)(11+64) p0 (15)(14)|(74)(75)
232   (13)(12)|( 8+64)( 9+64)    (13)(12)|(72)(73)
233   --------+--------------    --------+--------
234   ( 5)( 4)|( 0+64) (1+64) p1 ( 5)( 4)|(64)(65)
235   ( 7)( 6)|( 2+64) (3+64)    ( 7)( 6)|(66)(67)
236 *****************************************************************************************/
strPre4x4Stage1Split(PixelI * p0,PixelI * p1,Int iOffset)237 Void strPre4x4Stage1Split(PixelI *p0, PixelI *p1, Int iOffset)
238 {
239     PixelI *p2 = p0 + 72 - iOffset;
240     PixelI *p3 = p1 + 64 - iOffset;
241     p0 += 12;
242     p1 += 4;
243 
244     /** butterfly & scaling **/
245     strHSTenc(p0 + 0, p2 + 0, p1 + 0, p3 + 0);
246     strHSTenc(p0 + 1, p2 + 1, p1 + 1, p3 + 1);
247     strHSTenc(p0 + 2, p2 + 2, p1 + 2, p3 + 2);
248     strHSTenc(p0 + 3, p2 + 3, p1 + 3, p3 + 3);
249     strHSTenc1(p0 + 0, p3 + 0);
250     strHSTenc1(p0 + 1, p3 + 1);
251     strHSTenc1(p0 + 2, p3 + 2);
252     strHSTenc1(p0 + 3, p3 + 3);
253 
254     /** anti diagonal corners: rotation by pi/8 **/
255     ROTATE1(p1[2], p1[3]);
256     ROTATE1(p1[0], p1[1]);
257     ROTATE1(p2[1], p2[3]);
258     ROTATE1(p2[0], p2[2]);
259 
260     /** bottom right corner: pi/8 rotation => pi/8 rotation **/
261     fwdOddOddPre(p3 + 0, p3 + 1, p3 + 2, p3 + 3);
262 
263     /** butterfly **/
264     strDCT2x2dn(p0 + 0, p2 + 0, p1 + 0, p3 + 0);
265     strDCT2x2dn(p0 + 1, p2 + 1, p1 + 1, p3 + 1);
266     strDCT2x2dn(p0 + 2, p2 + 2, p1 + 2, p3 + 2);
267     strDCT2x2dn(p0 + 3, p2 + 3, p1 + 3, p3 + 3);
268 }
269 
strPre4x4Stage1(PixelI * p,Int iOffset)270 Void strPre4x4Stage1(PixelI* p, Int iOffset)
271 {
272     strPre4x4Stage1Split(p, p + 16, iOffset);
273 }
274 
275 /*****************************************************************************************
276   Input data offsets:
277   (15)(14)|(10+32)(11+32) p0 (15)(14)|(42)(43)
278   (13)(12)|( 8+32)( 9+32)    (13)(12)|(40)(41)
279   --------+--------------    --------+--------
280   ( 5)( 4)|( 0+32)( 1+32) p1 ( 5)( 4)|(32)(33)
281   ( 7)( 6)|( 2+32)( 3+32)    ( 7)( 6)|(34)(35)
282 *****************************************************************************************/
strPre4x4Stage2Split(PixelI * p0,PixelI * p1)283 Void strPre4x4Stage2Split(PixelI* p0, PixelI* p1)
284 {
285     /** butterfly **/
286     strHSTenc(p0 - 96, p0 +  96, p1 - 112, p1 + 80);
287     strHSTenc(p0 - 32, p0 +  32, p1 -  48, p1 + 16);
288     strHSTenc(p0 - 80, p0 + 112, p1 - 128, p1 + 64);
289     strHSTenc(p0 - 16, p0 +  48, p1 -  64, p1 +  0);
290     strHSTenc1(p0 - 96, p1 + 80);
291     strHSTenc1(p0 - 32, p1 + 16);
292     strHSTenc1(p0 - 80, p1 + 64);
293     strHSTenc1(p0 - 16, p1 +  0);
294 
295     /** anti diagonal corners: rotation **/
296     ROTATE1(p1[-48], p1[-112]);
297     ROTATE1(p1[-64], p1[-128]);
298     ROTATE1(p0[112], p0[  96]);
299     ROTATE1(p0[ 48], p0[  32]);
300 
301     /** bottom right corner: pi/8 rotation => pi/8 rotation **/
302     fwdOddOddPre(p1 + 0, p1 + 64, p1 + 16, p1 + 80);
303 
304     /** butterfly **/
305     strDCT2x2dn(p0 - 96, p1 - 112, p0 +  96, p1 + 80);
306     strDCT2x2dn(p0 - 32, p1 -  48, p0 +  32, p1 + 16);
307     strDCT2x2dn(p0 - 80, p1 - 128, p0 + 112, p1 + 64);
308     strDCT2x2dn(p0 - 16, p1 -  64, p0 +  48, p1 +  0);
309 }
310 
311 
312 /**
313     Hadamard+Scale transform
314     for some strange reason, breaking up the function into two blocks, strHSTenc1 and strHSTenc
315     seems to work faster
316 **/
strHSTenc(PixelI * pa,PixelI * pb,PixelI * pc,PixelI * pd)317 static Void strHSTenc(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
318 {
319     /** different realization : does rescaling as well! **/
320     PixelI a, b, c, d;
321     a = *pa;
322     b = *pb;
323     d = *pc;
324     c = *pd;
325 
326     a += c;
327     b -= d;
328     c = ((a - b) >> 1) - c;
329     d += (b >> 1);
330     b += c;
331 
332     a -= (d * 3 + 4) >> 3;
333 
334     *pa = a;
335     *pb = b;
336     *pc = c;
337     *pd = d;
338 }
339 
strHSTenc1(PixelI * pa,PixelI * pd)340 static Void strHSTenc1(PixelI *pa, PixelI *pd)
341 {
342     /** different realization : does rescaling as well! **/
343     PixelI a, d;
344     a = *pa;
345     d = *pd;
346 
347     d -= (a >> 7);
348     d += (a >> 10);
349 
350     //a -= (d * 3 + 4) >> 3;
351     d -= (a * 3 + 0) >> 4;
352     a -= (d * 3 + 0) >> 3;
353     d = (a >> 1) - d;
354     a -= d;
355 
356     *pa = a;
357     *pd = d;
358 }
359 
strHSTenc1_edge(PixelI * pa,PixelI * pd)360 static Void strHSTenc1_edge (PixelI *pa, PixelI *pd)
361 {
362     /** different realizion as compared to scaling operator for 2D case **/
363     PixelI a, d;
364     a = *pa;
365     d = -(*pd); // Negative sign needed here for 1D scaling case to ensure correct scaling.
366 
367     a -= d;
368     d += (a >> 1);
369     a -= (d * 3 + 4) >> 3;
370     // End new operations
371 
372     //Scaling modification of adding 7/1024 in two steps (without multiplication by 7).
373     d -= (a >> 7);
374     d += (a >> 10);
375 
376     d -= (a * 3 + 0) >> 4;
377     a -= (d * 3 + 0) >> 3;
378     d = (a >> 1) - d;
379     a -= d;
380 
381     *pa = a;
382     *pd = d;
383 }
384 
385 /** Kron(Rotate(pi/8), Rotate(pi/8)) **/\
fwdOddOdd(PixelI * pa,PixelI * pb,PixelI * pc,PixelI * pd)386 static Void fwdOddOdd(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
387 {
388     PixelI a, b, c, d, t1, t2;
389 
390     a = *pa;
391     b = -*pb;
392     c = -*pc;
393     d = *pd;
394 
395     /** butterflies **/
396     d += a;
397     c -= b;
398     a -= (t1 = d >> 1);
399     b += (t2 = c >> 1);
400 
401     /** rotate pi/4 **/
402     a += (b * 3 + 4) >> 3;
403     b -= (a * 3 + 3) >> 2;
404     a += (b * 3 + 3) >> 3;
405 
406     /** butterflies **/
407     b -= t2;
408     a += t1;
409     c += b;
410     d -= a;
411 
412     *pa = a;
413     *pb = b;
414     *pc = c;
415     *pd = d;
416 }
417 /** Kron(Rotate(pi/8), Rotate(pi/8)) **/
fwdOddOddPre(PixelI * pa,PixelI * pb,PixelI * pc,PixelI * pd)418 static Void fwdOddOddPre(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
419 {
420     PixelI a, b, c, d, t1, t2;
421     a = *pa;
422     b = *pb;
423     c = *pc;
424     d = *pd;
425 
426     /** butterflies **/
427     d += a;
428     c -= b;
429     a -= (t1 = d >> 1);
430     b += (t2 = c >> 1);
431 
432     /** rotate pi/4 **/
433     a += (b * 3 + 4) >> 3;
434     b -= (a * 3 + 2) >> 2;
435     a += (b * 3 + 6) >> 3;
436 
437     /** butterflies **/
438     b -= t2;
439     a += t1;
440     c += b;
441     d -= a;
442 
443     *pa = a;
444     *pb = b;
445     *pc = c;
446     *pd = d;
447 }
448 
449 /** Kron(Rotate(pi/8), [1 1; 1 -1]/sqrt(2)) **/
450 /** [a b c d] => [D C A B] **/
fwdOdd(PixelI * pa,PixelI * pb,PixelI * pc,PixelI * pd)451 Void fwdOdd(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
452 {
453     PixelI a, b, c, d;
454     a = *pa;
455     b = *pb;
456     c = *pc;
457     d = *pd;
458 
459     /** butterflies **/
460     b -= c;
461     a += d;
462     c += (b + 1) >> 1;
463     d = ((a + 1) >> 1) - d;
464 
465     /** rotate pi/8 **/
466     ROTATE2(a, b);
467     ROTATE2(c, d);
468 
469     /** butterflies **/
470     d += (b) >> 1;
471     c -= (a + 1) >> 1;
472     b -= d;
473     a += c;
474 
475     *pa = a;
476     *pb = b;
477     *pc = c;
478     *pd = d;
479 }
480 
481 /*************************************************************************
482   Top-level function to tranform possible part of a macroblock
483 *************************************************************************/
transformMacroblock(CWMImageStrCodec * pSC)484 Void transformMacroblock(CWMImageStrCodec * pSC)
485 {
486     OVERLAP olOverlap = pSC->WMISCP.olOverlap;
487     COLORFORMAT cfColorFormat = pSC->m_param.cfColorFormat;
488     Bool left = (pSC->cColumn == 0), right = (pSC->cColumn == pSC->cmbWidth);
489     Bool top = (pSC->cRow == 0), bottom = (pSC->cRow == pSC->cmbHeight);
490     Bool leftORright = (left || right), topORbottom = (top || bottom);
491     Bool topORleft = (left || top);// rightORbottom = (right || bottom);
492     Bool leftAdjacentColumn = (pSC->cColumn == 1), rightAdjacentColumn = (pSC->cColumn == pSC->cmbWidth - 1);
493     // Bool topAdjacentRow =  (pSC->cRow == 1), bottomAdjacentRow = (pSC->cRow == pSC->cmbHeight - 1);
494     PixelI * p = NULL;// * pt = NULL;
495     Int i, j;
496     Int iNumChromaFullPlanes = (Int)((YUV_420 == cfColorFormat || YUV_422 == cfColorFormat) ?
497         1 : pSC->m_param.cNumChannels);
498 
499 #define mbX               pSC->mbX
500 #define mbY               pSC->mbY
501 #define tileX             pSC->tileX
502 #define tileY             pSC->tileY
503 #define bVertTileBoundary pSC->bVertTileBoundary
504 #define bHoriTileBoundary pSC->bHoriTileBoundary
505 #define bOneMBLeftVertTB  pSC->bOneMBLeftVertTB
506 #define bOneMBRightVertTB pSC->bOneMBRightVertTB
507 #define iPredBefore       pSC->iPredBefore
508 #define iPredAfter        pSC->iPredAfter
509 
510     if (pSC->WMISCP.bUseHardTileBoundaries) {
511         //Add tile location information
512         if (pSC->cColumn == 0) {
513             bVertTileBoundary = FALSE;
514             tileY = 0;
515         }
516         bOneMBLeftVertTB = bOneMBRightVertTB = FALSE;
517         if(tileY > 0 && tileY <= pSC->WMISCP.cNumOfSliceMinus1H && (pSC->cColumn - 1) == pSC->WMISCP.uiTileY[tileY])
518             bOneMBRightVertTB = TRUE;
519         if(tileY < pSC->WMISCP.cNumOfSliceMinus1H && pSC->cColumn == pSC->WMISCP.uiTileY[tileY + 1]) {
520             bVertTileBoundary = TRUE;
521             tileY++;
522         }
523         else
524             bVertTileBoundary = FALSE;
525         if(tileY < pSC->WMISCP.cNumOfSliceMinus1H && (pSC->cColumn + 1) == pSC->WMISCP.uiTileY[tileY + 1])
526             bOneMBLeftVertTB = TRUE;
527 
528         if (pSC->cRow == 0) {
529             bHoriTileBoundary = FALSE;
530             tileX = 0;
531         }
532         else if(mbY != pSC->cRow && tileX < pSC->WMISCP.cNumOfSliceMinus1V && pSC->cRow == pSC->WMISCP.uiTileX[tileX + 1]) {
533             bHoriTileBoundary = TRUE;
534             tileX++;
535         }
536         else if(mbY != pSC->cRow)
537             bHoriTileBoundary = FALSE;
538     }
539     else {
540         bVertTileBoundary = FALSE;
541         bHoriTileBoundary = FALSE;
542         bOneMBLeftVertTB = FALSE;
543         bOneMBRightVertTB = FALSE;
544     }
545     mbX = pSC->cColumn, mbY = pSC->cRow;
546 
547     //================================================================
548     // 400_Y, 444_YUV
549     for(i = 0; i < iNumChromaFullPlanes; ++i)
550     {
551         PixelI* const p0 = pSC->p0MBbuffer[i];//(0 == i ? pSC->pY0 : (1 == i ? pSC->pU0 : pSC->pV0));
552         PixelI* const p1 = pSC->p1MBbuffer[i];//(0 == i ? pSC->pY1 : (1 == i ? pSC->pU1 : pSC->pV1));
553 
554         //================================
555         // first level overlap
556         if(OL_NONE != olOverlap)
557         {
558             /* Corner operations */
559             if ((top || bHoriTileBoundary) && (left || bVertTileBoundary))
560                 strPre4(p1 + 0, p1 + 1, p1 + 2, p1 + 3);
561             if ((top || bHoriTileBoundary) && (right || bVertTileBoundary))
562                 strPre4(p1 - 59, p1 - 60, p1 - 57, p1 - 58);
563             if ((bottom || bHoriTileBoundary) && (left || bVertTileBoundary))
564                 strPre4(p0 + 48 + 10, p0 + 48 + 11, p0 + 48 + 8, p0 + 48 + 9);
565             if ((bottom || bHoriTileBoundary) && (right || bVertTileBoundary))
566                 strPre4(p0 - 1, p0 - 2, p0 - 3, p0 - 4);
567             if(!right && !bottom)
568             {
569                 if (top || bHoriTileBoundary)
570                 {
571 
572                     for (j = ((left || bVertTileBoundary) ? 0 : -64); j < 192; j += 64)
573                     {
574                         p = p1 + j;
575                         strPre4(p + 5, p + 4, p + 64, p + 65);
576                         strPre4(p + 7, p + 6, p + 66, p + 67);
577                         p = NULL;
578                     }
579                 }
580                 else
581                 {
582                     for (j = ((left || bVertTileBoundary) ? 0 : -64); j < 192; j += 64)
583                     {
584                         strPre4x4Stage1Split(p0 + 48 + j, p1 + j, 0);
585                     }
586                 }
587 
588                 if (left || bVertTileBoundary)
589                 {
590                     if (!top && !bHoriTileBoundary)
591                     {
592                         strPre4(p0 + 58, p0 + 56, p1 + 0, p1 + 2);
593                         strPre4(p0 + 59, p0 + 57, p1 + 1, p1 + 3);
594                     }
595 
596                     for (j = -64; j < -16; j += 16)
597                     {
598                         p = p1 + j;
599                         strPre4(p + 74, p + 72, p + 80, p + 82);
600                         strPre4(p + 75, p + 73, p + 81, p + 83);
601                         p = NULL;
602                     }
603                 }
604                 else
605                 {
606                     for (j = -64; j < -16; j += 16)
607                     {
608                         strPre4x4Stage1(p1 + j, 0);
609                     }
610                 }
611 
612                 strPre4x4Stage1(p1 +   0, 0);
613                 strPre4x4Stage1(p1 +  16, 0);
614                 strPre4x4Stage1(p1 +  32, 0);
615                 strPre4x4Stage1(p1 +  64, 0);
616                 strPre4x4Stage1(p1 +  80, 0);
617                 strPre4x4Stage1(p1 +  96, 0);
618                 strPre4x4Stage1(p1 + 128, 0);
619                 strPre4x4Stage1(p1 + 144, 0);
620                 strPre4x4Stage1(p1 + 160, 0);
621             }
622 
623             if (bottom || bHoriTileBoundary)
624             {
625                 for (j = ((left || bVertTileBoundary) ? 48 : -16); j < (right ? -16 : 240); j += 64)
626                 {
627                     p = p0 + j;
628                     strPre4(p + 15, p + 14, p + 74, p + 75);
629                     strPre4(p + 13, p + 12, p + 72, p + 73);
630                     p = NULL;
631                 }
632             }
633 
634             if ((right || bVertTileBoundary) && !bottom)
635             {
636                 if (!top && !bHoriTileBoundary)
637                 {
638                     strPre4(p0 - 1, p0 - 3, p1 - 59, p1 - 57);
639                     strPre4(p0 - 2, p0 - 4, p1 - 60, p1 - 58);
640                 }
641                 for (j = -64; j < -16; j += 16)
642                 {
643                     p = p1 + j;
644                     strPre4(p + 15, p + 13, p + 21, p + 23);
645                     strPre4(p + 14, p + 12, p + 20, p + 22);
646                     p = NULL;
647                 }
648             }
649         }
650 
651         //================================
652         // first level transform
653         if (!top)
654         {
655             for (j = (left ? 48 : -16); j < (right ? 48 : 240); j += 64)
656             {
657                 strDCT4x4Stage1(p0 + j);
658             }
659         }
660 
661         if (!bottom)
662         {
663             for (j = (left ? 0 : -64); j < (right ? 0 : 192); j += 64)
664             {
665                 strDCT4x4Stage1(p1 + j + 0);
666                 strDCT4x4Stage1(p1 + j + 16);
667                 strDCT4x4Stage1(p1 + j + 32);
668             }
669         }
670 
671         //================================
672         // second level overlap
673         if (OL_TWO == olOverlap)
674         {
675             /* Corner operations */
676             if ((top || bHoriTileBoundary) && (left || bVertTileBoundary))
677                 strPre4(p1 + 0, p1 + 64, p1 + 0 + 16, p1 + 64 + 16);
678             if ((top || bHoriTileBoundary) && (right || bVertTileBoundary))
679                 strPre4(p1 - 128, p1 - 64, p1 - 128 + 16, p1 - 64 + 16);
680             if ((bottom || bHoriTileBoundary) && (left || bVertTileBoundary))
681                 strPre4(p0 + 32, p0 + 96, p0 + 32 + 16, p0 + 96 + 16);
682             if ((bottom || bHoriTileBoundary) && (right || bVertTileBoundary))
683                 strPre4(p0 - 96, p0 - 32, p0 - 96 + 16, p0 - 32 + 16);
684             if ((leftORright || bVertTileBoundary) && (!topORbottom && !bHoriTileBoundary))
685             {
686                 if (left || bVertTileBoundary) {
687                     j = 0;
688                     strPre4(p0 + j + 32, p0 + j +  48, p1 + j +  0, p1 + j + 16);
689                     strPre4(p0 + j + 96, p0 + j + 112, p1 + j + 64, p1 + j + 80);
690                 }
691                 if (right || bVertTileBoundary) {
692                     j = -128;
693                     strPre4(p0 + j + 32, p0 + j +  48, p1 + j +  0, p1 + j + 16);
694                     strPre4(p0 + j + 96, p0 + j + 112, p1 + j + 64, p1 + j + 80);
695                 }
696             }
697 
698             if (!leftORright && !bVertTileBoundary)
699             {
700                 if (topORbottom || bHoriTileBoundary)
701                 {
702                     if (top || bHoriTileBoundary) {
703                         p = p1;
704                         strPre4(p - 128, p - 64, p +  0, p + 64);
705                         strPre4(p - 112, p - 48, p + 16, p + 80);
706                         p = NULL;
707                     }
708                     if (bottom || bHoriTileBoundary) {
709                         p = p0 + 32;
710                         strPre4(p - 128, p - 64, p +  0, p + 64);
711                         strPre4(p - 112, p - 48, p + 16, p + 80);
712                         p = NULL;
713                     }
714                 }
715                 else
716                 {
717                     strPre4x4Stage2Split(p0, p1);
718                 }
719             }
720         }
721 
722         //================================
723         // second level transform
724         if (!topORleft){
725             if (pSC->m_param.bScaledArith) {
726                 strNormalizeEnc(p0 - 256, (i != 0));
727             }
728             strDCT4x4SecondStage(p0 - 256);
729         }
730     }
731 
732     //================================================================
733     // 420_UV
734     for(i = 0; i < (YUV_420 == cfColorFormat? 2 : 0); ++i)
735     {
736         PixelI* const p0 = pSC->p0MBbuffer[1 + i];//(0 == i ? pSC->pU0 : pSC->pV0);
737         PixelI* const p1 = pSC->p1MBbuffer[1 + i];//(0 == i ? pSC->pU1 : pSC->pV1);
738 
739         //================================
740         // first level overlap (420_UV)
741         if (OL_NONE != olOverlap)
742         {
743             /* Corner operations */
744             if ((top || bHoriTileBoundary) && (left || bVertTileBoundary))
745                 strPre4(p1 + 0, p1 + 1, p1 + 2, p1 + 3);
746             if ((top || bHoriTileBoundary) && (right || bVertTileBoundary))
747                 strPre4(p1 - 27, p1 - 28, p1 - 25, p1 - 26);
748             if ((bottom || bHoriTileBoundary) && (left || bVertTileBoundary))
749                 strPre4(p0 + 16 + 10, p0 + 16 + 11, p0 + 16 + 8, p0 + 16 + 9);
750             if ((bottom || bHoriTileBoundary) && (right || bVertTileBoundary))
751                 strPre4(p0 - 1, p0 - 2, p0 - 3, p0 - 4);
752             if(!right && !bottom)
753             {
754                 if (top || bHoriTileBoundary)
755                 {
756 
757                     for (j = ((left || bVertTileBoundary) ? 0 : -32); j < 32; j += 32)
758                     {
759                         p = p1 + j;
760                         strPre4(p + 5, p + 4, p + 32, p + 33);
761                         strPre4(p + 7, p + 6, p + 34, p + 35);
762                         p = NULL;
763                     }
764                 }
765                 else
766                 {
767                     for (j = ((left || bVertTileBoundary) ? 0: -32); j < 32; j += 32)
768                     {
769                         strPre4x4Stage1Split(p0 + 16 + j, p1 + j, 32);
770                     }
771                 }
772 
773                 if (left || bVertTileBoundary)
774                 {
775                     if (!top && !bHoriTileBoundary)
776                     {
777                         strPre4(p0 + 26, p0 + 24, p1 + 0, p1 + 2);
778                         strPre4(p0 + 27, p0 + 25, p1 + 1, p1 + 3);
779                     }
780 
781                     strPre4(p1 + 10, p1 + 8, p1 + 16, p1 + 18);
782                     strPre4(p1 + 11, p1 + 9, p1 + 17, p1 + 19);
783                 }
784                 else if (!bVertTileBoundary)
785                 {
786                     strPre4x4Stage1(p1 - 32, 32);
787                 }
788 
789                 strPre4x4Stage1(p1, 32);
790             }
791 
792             if (bottom || bHoriTileBoundary)
793             {
794                 for (j = ((left || bVertTileBoundary) ? 16: -16); j < (right ? -16: 32); j += 32)
795                 {
796                     p = p0 + j;
797                     strPre4(p + 15, p + 14, p + 42, p + 43);
798                     strPre4(p + 13, p + 12, p + 40, p + 41);
799                     p = NULL;
800                 }
801             }
802 
803             if ((right || bVertTileBoundary) && !bottom)
804             {
805                 if (!top && !bHoriTileBoundary)
806                 {
807                     strPre4(p0 - 1, p0 - 3, p1 - 27, p1 - 25);
808                     strPre4(p0 - 2, p0 - 4, p1 - 28, p1 - 26);
809                 }
810 
811                 strPre4(p1 - 17, p1 - 19, p1 - 11, p1 -  9);
812                 strPre4(p1 - 18, p1 - 20, p1 - 12, p1 - 10);
813             }
814         }
815 
816         //================================
817         // first level transform (420_UV)
818         if (!top)
819         {
820             for (j = (left ? 16 : -16); j < (right ? 16 : 48); j += 32)
821             {
822                 strDCT4x4Stage1(p0 + j);
823             }
824         }
825 
826         if (!bottom)
827         {
828             for (j = (left ? 0 : -32); j < (right ? 0 : 32); j += 32)
829             {
830                 strDCT4x4Stage1(p1 + j);
831             }
832         }
833 
834         //================================
835         // second level overlap (420_UV)
836         if (OL_TWO == olOverlap)
837         {
838             if ((leftAdjacentColumn || bOneMBRightVertTB) && (top || bHoriTileBoundary))
839                 COMPUTE_CORNER_PRED_DIFF(p1 - 64 + 0, *(p1 - 64 + 32));
840 
841             if ((rightAdjacentColumn || bOneMBLeftVertTB) && (top || bHoriTileBoundary))
842                 iPredBefore[i][0] = *(p1 + 0);
843             if ((right || bVertTileBoundary) && (top || bHoriTileBoundary))
844                 COMPUTE_CORNER_PRED_DIFF(p1 - 64 + 32, iPredBefore[i][0]);
845 
846             if ((leftAdjacentColumn || bOneMBRightVertTB) && (bottom || bHoriTileBoundary))
847                 COMPUTE_CORNER_PRED_DIFF(p0 - 64 + 16, *(p0 - 64 + 48));
848 
849             if ((rightAdjacentColumn || bOneMBLeftVertTB) && (bottom || bHoriTileBoundary))
850                 iPredBefore[i][1] = *(p0 + 16);
851             if ((right || bVertTileBoundary) && (bottom || bHoriTileBoundary))
852                 COMPUTE_CORNER_PRED_DIFF(p0 - 64 + 48, iPredBefore[i][1]);
853 
854             if ((leftORright || bVertTileBoundary) && !topORbottom && !bHoriTileBoundary)
855             {
856                 if (left || bVertTileBoundary)
857                     strPre2(p0 + 0 + 16, p1 + 0);
858                 if (right || bVertTileBoundary)
859                     strPre2(p0 + -32 + 16, p1 + -32);
860             }
861 
862             if (!leftORright)
863             {
864                 if ((topORbottom || bHoriTileBoundary) && !bVertTileBoundary)
865                 {
866                     if (top || bHoriTileBoundary)
867                         strPre2(p1 - 32, p1);
868                     if (bottom || bHoriTileBoundary)
869                         strPre2(p0 + 16 - 32, p0 + 16);
870                 }
871                 else if (!topORbottom && !bHoriTileBoundary && !bVertTileBoundary)
872                     strPre2x2(p0 - 16, p0 + 16, p1 - 32, p1);
873             }
874             if ((leftAdjacentColumn || bOneMBRightVertTB) && (top || bHoriTileBoundary))
875                 COMPUTE_CORNER_PRED_ADD(p1 - 64 + 0, *(p1 - 64 + 32));
876             if ((rightAdjacentColumn || bOneMBLeftVertTB) && (top || bHoriTileBoundary))
877                 iPredAfter[i][0] = *(p1 + 0);
878             if ((right || bVertTileBoundary) && (top || bHoriTileBoundary))
879                 COMPUTE_CORNER_PRED_ADD(p1 - 64 + 32, iPredAfter[i][0]);
880             if ((leftAdjacentColumn || bOneMBRightVertTB) && (bottom || bHoriTileBoundary))
881                 COMPUTE_CORNER_PRED_ADD(p0 - 64 + 16, *(p0 - 64 + 48));
882             if ((rightAdjacentColumn || bOneMBLeftVertTB) && (bottom || bHoriTileBoundary))
883                 iPredAfter[i][1] = *(p0 + 16);
884             if ((right || bVertTileBoundary) && (bottom || bHoriTileBoundary))
885                 COMPUTE_CORNER_PRED_ADD(p0 - 64 + 48, iPredAfter[i][1]);
886         }
887 
888         //================================
889         // second level transform (420_UV)
890         if (!topORleft)
891         {
892             if (!pSC->m_param.bScaledArith) {
893                 strDCT2x2dn(p0 - 64, p0 - 32, p0 - 48, p0 - 16);
894             }
895             else {
896                 strDCT2x2dnEnc(p0 - 64, p0 - 32, p0 - 48, p0 - 16);
897             }
898         }
899     }
900 
901     //================================================================
902     //  422_UV
903     for(i = 0; i < (YUV_422 == cfColorFormat? 2 : 0); ++i)
904     {
905         PixelI* const p0 = pSC->p0MBbuffer[1 + i];//(0 == i ? pSC->pU0 : pSC->pV0);
906         PixelI* const p1 = pSC->p1MBbuffer[1 + i];//(0 == i ? pSC->pU1 : pSC->pV1);
907 
908         //================================
909         // first level overlap (422_UV)
910         if (OL_NONE != olOverlap)
911         {
912             /* Corner operations */
913             if ((top || bHoriTileBoundary) && (left || bVertTileBoundary))
914                 strPre4(p1 + 0, p1 + 1, p1 + 2, p1 + 3);
915             if ((top || bHoriTileBoundary) && (right || bVertTileBoundary))
916                 strPre4(p1 - 59, p1 - 60, p1 - 57, p1 - 58);
917             if ((bottom || bHoriTileBoundary) && (left || bVertTileBoundary))
918                 strPre4(p0 + 48 + 10, p0 + 48 + 11, p0 + 48 + 8, p0 + 48 + 9);
919             if ((bottom || bHoriTileBoundary) && (right || bVertTileBoundary))
920                 strPre4(p0 - 1, p0 - 2, p0 - 3, p0 - 4);
921             if(!right && !bottom)
922             {
923                 if (top || bHoriTileBoundary)
924                 {
925 
926                     for (j = ((left || bVertTileBoundary) ? 0 : -64); j < 64; j += 64)
927                     {
928                         p = p1 + j;
929                         strPre4(p + 5, p + 4, p + 64, p + 65);
930                         strPre4(p + 7, p + 6, p + 66, p + 67);
931                         p = NULL;
932                     }
933                 }
934                 else
935                 {
936                     for (j = ((left || bVertTileBoundary) ? 0: -64); j < 64; j += 64)
937                     {
938                         strPre4x4Stage1Split(p0 + 48 + j, p1 + j, 0);
939                     }
940                 }
941 
942                 if (left || bVertTileBoundary)
943                 {
944                     if (!top && !bHoriTileBoundary)
945                     {
946                         strPre4(p0 + 58, p0 + 56, p1 + 0, p1 + 2);
947                         strPre4(p0 + 59, p0 + 57, p1 + 1, p1 + 3);
948                     }
949 
950                     for (j = 0; j < 48; j += 16)
951                     {
952                         p = p1 + j;
953                         strPre4(p + 10, p + 8, p + 16, p + 18);
954                         strPre4(p + 11, p + 9, p + 17, p + 19);
955                         p = NULL;
956                     }
957                 }
958                 else if (!bVertTileBoundary)
959                 {
960                     for (j = -64; j < -16; j += 16)
961                     {
962                         strPre4x4Stage1(p1 + j, 0);
963                     }
964                 }
965 
966                 strPre4x4Stage1(p1 +  0, 0);
967                 strPre4x4Stage1(p1 + 16, 0);
968                 strPre4x4Stage1(p1 + 32, 0);
969             }
970 
971             if (bottom || bHoriTileBoundary)
972             {
973                 for (j = ((left || bVertTileBoundary) ? 48: -16); j < (right ? -16: 112); j += 64)
974                 {
975                     p = p0 + j;
976                     strPre4(p + 15, p + 14, p + 74, p + 75);
977                     strPre4(p + 13, p + 12, p + 72, p + 73);
978                     p = NULL;
979                 }
980             }
981 
982             if ((right || bVertTileBoundary) && !bottom)
983             {
984                 if (!top && !bHoriTileBoundary)
985                 {
986                     strPre4(p0 - 1, p0 - 3, p1 - 59, p1 - 57);
987                     strPre4(p0 - 2, p0 - 4, p1 - 60, p1 - 58);
988                 }
989 
990                 for (j = -64; j < -16; j += 16)
991                 {
992                     p = p1 + j;
993                     strPre4(p + 15, p + 13, p + 21, p + 23);
994                     strPre4(p + 14, p + 12, p + 20, p + 22);
995                     p = NULL;
996                 }
997             }
998         }
999 
1000         //================================
1001         // first level transform (422_UV)
1002         if (!top)
1003         {
1004             for (j = (left ? 48 : -16); j < (right ? 48 : 112); j += 64)
1005             {
1006                 strDCT4x4Stage1(p0 + j);
1007             }
1008         }
1009 
1010         if (!bottom)
1011         {
1012             for (j = (left ? 0 : -64); j < (right ? 0 : 64); j += 64)
1013             {
1014                 strDCT4x4Stage1(p1 + j + 0);
1015                 strDCT4x4Stage1(p1 + j + 16);
1016                 strDCT4x4Stage1(p1 + j + 32);
1017             }
1018         }
1019 
1020         //================================
1021         // second level overlap (422_UV)
1022         if (OL_TWO == olOverlap)
1023         {
1024             if ((leftAdjacentColumn || bOneMBRightVertTB) && (top || bHoriTileBoundary))
1025                 COMPUTE_CORNER_PRED_DIFF(p1 - 128 + 0, *(p1 - 128 + 64));
1026 
1027             if ((rightAdjacentColumn || bOneMBLeftVertTB) && (top || bHoriTileBoundary))
1028                 iPredBefore[i][0] = *(p1 + 0);
1029             if ((right || bVertTileBoundary) && (top || bHoriTileBoundary))
1030                 COMPUTE_CORNER_PRED_DIFF(p1 - 128 + 64, iPredBefore[i][0]);
1031 
1032             if ((leftAdjacentColumn || bOneMBRightVertTB) && (bottom || bHoriTileBoundary))
1033                 COMPUTE_CORNER_PRED_DIFF(p0 - 128 + 48, *(p0 - 128 + 112));
1034 
1035             if ((rightAdjacentColumn || bOneMBLeftVertTB) && (bottom || bHoriTileBoundary))
1036                 iPredBefore[i][1] = *(p0 + 48);
1037             if ((right || bVertTileBoundary) && (bottom || bHoriTileBoundary))
1038                 COMPUTE_CORNER_PRED_DIFF(p0 - 128 + 112, iPredBefore[i][1]);
1039 
1040             if (!bottom)
1041             {
1042                 if (leftORright || bVertTileBoundary)
1043                 {
1044                     if (!top && !bHoriTileBoundary)
1045                     {
1046                         if (left || bVertTileBoundary)
1047                             strPre2(p0 + 48 + 0, p1 + 0);
1048 
1049                         if (right || bVertTileBoundary)
1050                             strPre2(p0 + 48 + -64, p1 + -64);
1051                     }
1052 
1053                     if (left || bVertTileBoundary)
1054                         strPre2(p1 + 16, p1 + 16 + 16);
1055 
1056                     if (right || bVertTileBoundary)
1057                         strPre2(p1 + -48, p1 + -48 + 16);
1058                 }
1059 
1060                 if (!leftORright && !bVertTileBoundary)
1061                 {
1062                     if (top || bHoriTileBoundary)
1063                         strPre2(p1 - 64, p1);
1064                     else
1065                         strPre2x2(p0 - 16, p0 + 48, p1 - 64, p1);
1066 
1067                     strPre2x2(p1 - 48, p1 + 16, p1 - 32, p1 + 32);
1068                 }
1069             }
1070 
1071             if ((bottom || bHoriTileBoundary) && (!leftORright && !bVertTileBoundary))
1072                 strPre2(p0 - 16, p0 + 48);
1073 
1074             if ((leftAdjacentColumn || bOneMBRightVertTB) && (top || bHoriTileBoundary))
1075                 COMPUTE_CORNER_PRED_ADD(p1 - 128 + 0, *(p1 - 128 + 64));
1076 
1077             if ((rightAdjacentColumn || bOneMBLeftVertTB) && (top || bHoriTileBoundary))
1078                 iPredAfter[i][0] = *(p1 + 0);
1079             if ((right || bVertTileBoundary) && (top || bHoriTileBoundary))
1080                 COMPUTE_CORNER_PRED_ADD(p1 - 128 + 64, iPredAfter[i][0]);
1081 
1082             if ((leftAdjacentColumn || bOneMBRightVertTB) && (bottom || bHoriTileBoundary))
1083                 COMPUTE_CORNER_PRED_ADD(p0 - 128 + 48, *(p0 - 128 + 112));
1084 
1085             if ((rightAdjacentColumn || bOneMBLeftVertTB) && (bottom || bHoriTileBoundary))
1086                 iPredAfter[i][1] = *(p0 + 48);
1087             if ((right || bVertTileBoundary) && (bottom || bHoriTileBoundary))
1088                 COMPUTE_CORNER_PRED_ADD(p0 - 128 + 112, iPredAfter[i][1]);
1089         }
1090 
1091         //================================
1092         // second level transform (422_UV)
1093         if (!topORleft)
1094         {
1095             if (!pSC->m_param.bScaledArith) {
1096                 strDCT2x2dn(p0 - 128, p0 - 64, p0 - 112, p0 - 48);
1097                 strDCT2x2dn(p0 -  96, p0 - 32, p0 -  80, p0 - 16);
1098             }
1099             else {
1100                 strDCT2x2dnEnc(p0 - 128, p0 - 64, p0 - 112, p0 - 48);
1101                 strDCT2x2dnEnc(p0 -  96, p0 - 32, p0 -  80, p0 - 16);
1102             }
1103 
1104             // 1D lossless HT
1105             p0[- 96] -= p0[-128];
1106             p0[-128] += ((p0[-96] + 1) >> 1);
1107         }
1108     }
1109     assert(NULL == p);
1110 }
1111 
1112