1 //*@@@+++@@@@******************************************************************
2 //
3 // Copyright � Microsoft Corp.
4 // All rights reserved.
5 //
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions are met:
8 //
9 // � Redistributions of source code must retain the above copyright notice,
10 // this list of conditions and the following disclaimer.
11 // � Redistributions in binary form must reproduce the above copyright notice,
12 // this list of conditions and the following disclaimer in the documentation
13 // and/or other materials provided with the distribution.
14 //
15 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
19 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 // POSSIBILITY OF SUCH DAMAGE.
26 //
27 //*@@@---@@@@******************************************************************
28
29 #include "strTransform.h"
30 #include "encode.h"
31
32 /** rotation by pi/8 **/
33 #define ROTATE1(a, b) (b) -= (((a) + 1) >> 1), (a) += (((b) + 1) >> 1) // this works well too
34 #define ROTATE2(a, b) (b) -= (((a)*3 + 4) >> 3), (a) += (((b)*3 + 4) >> 3) // this works well too
35
36 /** local functions **/
37 static Void fwdOddOdd(PixelI *, PixelI *, PixelI *, PixelI *);
38 static Void fwdOddOddPre(PixelI *, PixelI *, PixelI *, PixelI *);
39 static Void fwdOdd(PixelI *, PixelI *, PixelI *, PixelI *);
40 static Void strDCT2x2alt(PixelI * a, PixelI * b, PixelI * c, PixelI * d);
41 static Void strHSTenc1(PixelI *, PixelI *);
42 static Void strHSTenc(PixelI *, PixelI *, PixelI *, PixelI *);
43 static Void strHSTenc1_edge (PixelI *pa, PixelI *pd);
44
45 //static Void scaleDownUp0(PixelI *, PixelI *);
46 //static Void scaleDownUp1(PixelI *, PixelI *);
47 //static Void scaleDownUp2(PixelI *, PixelI *);
48 //#define FOURBUTTERFLY_ENC_ALT(p, i00, i01, i02, i03, i10, i11, i12, i13, \
49 // i20, i21, i22, i23, i30, i31, i32, i33) \
50 // strHSTenc(&p[i00], &p[i01], &p[i02], &p[i03]); \
51 // strHSTenc(&p[i10], &p[i11], &p[i12], &p[i13]); \
52 // strHSTenc(&p[i20], &p[i21], &p[i22], &p[i23]); \
53 // strHSTenc(&p[i30], &p[i31], &p[i32], &p[i33]); \
54 // strHSTenc1(&p[i00], &p[i03]); \
55 // strHSTenc1(&p[i10], &p[i13]); \
56 // strHSTenc1(&p[i20], &p[i23]); \
57 // strHSTenc1(&p[i30], &p[i33])
58
59 /** DCT stuff **/
60 /** data order before DCT **/
61 /** 0 1 2 3 **/
62 /** 4 5 6 7 **/
63 /** 8 9 10 11 **/
64 /** 12 13 14 15 **/
65 /** data order after DCT **/
66 /** 0 8 4 6 **/
67 /** 2 10 14 12 **/
68 /** 1 11 15 13 **/
69 /** 9 3 7 5 **/
70 /** reordering should be combined with zigzag scan **/
71
strDCT4x4Stage1(PixelI * p)72 Void strDCT4x4Stage1(PixelI * p)
73 {
74 /** butterfly **/
75 //FOURBUTTERFLY(p, 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15);
76 FOURBUTTERFLY_HARDCODED1(p);
77
78 /** top left corner, butterfly => butterfly **/
79 strDCT2x2up(&p[0], &p[1], &p[2], &p[3]);
80
81 /** bottom right corner, pi/8 rotation => pi/8 rotation **/
82 fwdOddOdd(&p[15], &p[14], &p[13], &p[12]);
83
84 /** top right corner, butterfly => pi/8 rotation **/
85 fwdOdd(&p[5], &p[4], &p[7], &p[6]);
86
87 /** bottom left corner, pi/8 rotation => butterfly **/
88 fwdOdd(&p[10], &p[8], &p[11], &p[9]);
89 }
90
strDCT4x4SecondStage(PixelI * p)91 Void strDCT4x4SecondStage(PixelI * p)
92 {
93 /** butterfly **/
94 FOURBUTTERFLY(p, 0, 192, 48, 240, 64, 128, 112, 176,16, 208, 32, 224, 80, 144, 96, 160);
95
96 /** top left corner, butterfly => butterfly **/
97 strDCT2x2up(&p[0], &p[64], &p[16], &p[80]);
98
99 /** bottom right corner, pi/8 rotation => pi/8 rotation **/
100 fwdOddOdd(&p[160], &p[224], &p[176], &p[240]);
101
102 /** top right corner, butterfly => pi/8 rotation **/
103 fwdOdd(&p[128], &p[192], &p[144], &p[208]);
104
105 /** bottom left corner, pi/8 rotation => butterfly **/
106 fwdOdd(&p[32], &p[48], &p[96], &p[112]);
107 }
108
strNormalizeEnc(PixelI * p,Bool bChroma)109 Void strNormalizeEnc(PixelI* p, Bool bChroma)
110 {
111 int i;
112 if (!bChroma) {
113 //for (i = 0; i < 256; i += 16) {
114 // p[i] = (p[i] + 1) >> 2;
115 //}
116 }
117 else {
118 for (i = 0; i < 256; i += 16) {
119 p[i] >>= 1;
120 }
121 }
122 }
123
124 /** 2x2 DCT with pre-scaling - for use on encoder side **/
strDCT2x2dnEnc(PixelI * pa,PixelI * pb,PixelI * pc,PixelI * pd)125 Void strDCT2x2dnEnc(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
126 {
127 PixelI a, b, c, d, C, t;
128 a = (*pa + 0) >> 1;
129 b = (*pb + 0) >> 1;
130 C = (*pc + 0) >> 1;
131 d = (*pd + 0) >> 1;
132 //PixelI t1, t2;
133
134 a += d;
135 b -= C;
136 t = ((a - b) >> 1);
137 c = t - d;
138 d = t - C;
139 a -= d;
140 b += c;
141
142 *pa = a;
143 *pb = b;
144 *pc = c;
145 *pd = d;
146 }
147
148 /** pre filter stuff **/
149 /** 2-point pre for boundaries **/
strPre2(PixelI * pa,PixelI * pb)150 Void strPre2(PixelI * pa, PixelI * pb)
151 {
152 PixelI a, b;
153 a = *pa;
154 b = *pb;
155
156 /** rotate **/
157 b -= ((a + 2) >> 2);
158 a -= ((b + 1) >> 1);
159
160 a -= (b >> 5);
161 a -= (b >> 9);
162 a -= (b >> 13);
163
164 b -= ((a + 2) >> 2);
165
166 *pa = a;
167 *pb = b;
168 }
169
strPre2x2(PixelI * pa,PixelI * pb,PixelI * pc,PixelI * pd)170 Void strPre2x2(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
171 {
172 PixelI a, b, c, d;
173 a = *pa;
174 b = *pb;
175 c = *pc;
176 d = *pd;
177
178 /** butterflies **/
179 a += d;
180 b += c;
181 d -= (a + 1) >> 1;
182 c -= (b + 1) >> 1;
183
184 /** rotate **/
185 b -= ((a + 2) >> 2);
186 a -= ((b + 1) >> 1);
187 a -= (b >> 5);
188 a -= (b >> 9);
189 a -= (b >> 13);
190 b -= ((a + 2) >> 2);
191
192 /** butterflies **/
193 d += (a + 1) >> 1;
194 c += (b + 1) >> 1;
195 a -= d;
196 b -= c;
197
198 *pa = a;
199 *pb = b;
200 *pc = c;
201 *pd = d;
202 }
203
204 /** 4-point pre for boundaries **/
strPre4(PixelI * pa,PixelI * pb,PixelI * pc,PixelI * pd)205 Void strPre4(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
206 {
207 PixelI a, b, c, d;
208 a = *pa;
209 b = *pb;
210 c = *pc;
211 d = *pd;
212
213 a += d, b += c;
214 d -= ((a + 1) >> 1), c -= ((b + 1) >> 1);
215
216 ROTATE1(c, d);
217
218 strHSTenc1_edge(&a, &d); strHSTenc1_edge(&b, &c);
219
220 d += ((a + 1) >> 1), c += ((b + 1) >> 1);
221 a -= d, b -= c;
222
223 *pa = a;
224 *pb = b;
225 *pc = c;
226 *pd = d;
227 }
228
229 /*****************************************************************************************
230 Input data offsets:
231 (15)(14)|(10+64)(11+64) p0 (15)(14)|(74)(75)
232 (13)(12)|( 8+64)( 9+64) (13)(12)|(72)(73)
233 --------+-------------- --------+--------
234 ( 5)( 4)|( 0+64) (1+64) p1 ( 5)( 4)|(64)(65)
235 ( 7)( 6)|( 2+64) (3+64) ( 7)( 6)|(66)(67)
236 *****************************************************************************************/
strPre4x4Stage1Split(PixelI * p0,PixelI * p1,Int iOffset)237 Void strPre4x4Stage1Split(PixelI *p0, PixelI *p1, Int iOffset)
238 {
239 PixelI *p2 = p0 + 72 - iOffset;
240 PixelI *p3 = p1 + 64 - iOffset;
241 p0 += 12;
242 p1 += 4;
243
244 /** butterfly & scaling **/
245 strHSTenc(p0 + 0, p2 + 0, p1 + 0, p3 + 0);
246 strHSTenc(p0 + 1, p2 + 1, p1 + 1, p3 + 1);
247 strHSTenc(p0 + 2, p2 + 2, p1 + 2, p3 + 2);
248 strHSTenc(p0 + 3, p2 + 3, p1 + 3, p3 + 3);
249 strHSTenc1(p0 + 0, p3 + 0);
250 strHSTenc1(p0 + 1, p3 + 1);
251 strHSTenc1(p0 + 2, p3 + 2);
252 strHSTenc1(p0 + 3, p3 + 3);
253
254 /** anti diagonal corners: rotation by pi/8 **/
255 ROTATE1(p1[2], p1[3]);
256 ROTATE1(p1[0], p1[1]);
257 ROTATE1(p2[1], p2[3]);
258 ROTATE1(p2[0], p2[2]);
259
260 /** bottom right corner: pi/8 rotation => pi/8 rotation **/
261 fwdOddOddPre(p3 + 0, p3 + 1, p3 + 2, p3 + 3);
262
263 /** butterfly **/
264 strDCT2x2dn(p0 + 0, p2 + 0, p1 + 0, p3 + 0);
265 strDCT2x2dn(p0 + 1, p2 + 1, p1 + 1, p3 + 1);
266 strDCT2x2dn(p0 + 2, p2 + 2, p1 + 2, p3 + 2);
267 strDCT2x2dn(p0 + 3, p2 + 3, p1 + 3, p3 + 3);
268 }
269
strPre4x4Stage1(PixelI * p,Int iOffset)270 Void strPre4x4Stage1(PixelI* p, Int iOffset)
271 {
272 strPre4x4Stage1Split(p, p + 16, iOffset);
273 }
274
275 /*****************************************************************************************
276 Input data offsets:
277 (15)(14)|(10+32)(11+32) p0 (15)(14)|(42)(43)
278 (13)(12)|( 8+32)( 9+32) (13)(12)|(40)(41)
279 --------+-------------- --------+--------
280 ( 5)( 4)|( 0+32)( 1+32) p1 ( 5)( 4)|(32)(33)
281 ( 7)( 6)|( 2+32)( 3+32) ( 7)( 6)|(34)(35)
282 *****************************************************************************************/
strPre4x4Stage2Split(PixelI * p0,PixelI * p1)283 Void strPre4x4Stage2Split(PixelI* p0, PixelI* p1)
284 {
285 /** butterfly **/
286 strHSTenc(p0 - 96, p0 + 96, p1 - 112, p1 + 80);
287 strHSTenc(p0 - 32, p0 + 32, p1 - 48, p1 + 16);
288 strHSTenc(p0 - 80, p0 + 112, p1 - 128, p1 + 64);
289 strHSTenc(p0 - 16, p0 + 48, p1 - 64, p1 + 0);
290 strHSTenc1(p0 - 96, p1 + 80);
291 strHSTenc1(p0 - 32, p1 + 16);
292 strHSTenc1(p0 - 80, p1 + 64);
293 strHSTenc1(p0 - 16, p1 + 0);
294
295 /** anti diagonal corners: rotation **/
296 ROTATE1(p1[-48], p1[-112]);
297 ROTATE1(p1[-64], p1[-128]);
298 ROTATE1(p0[112], p0[ 96]);
299 ROTATE1(p0[ 48], p0[ 32]);
300
301 /** bottom right corner: pi/8 rotation => pi/8 rotation **/
302 fwdOddOddPre(p1 + 0, p1 + 64, p1 + 16, p1 + 80);
303
304 /** butterfly **/
305 strDCT2x2dn(p0 - 96, p1 - 112, p0 + 96, p1 + 80);
306 strDCT2x2dn(p0 - 32, p1 - 48, p0 + 32, p1 + 16);
307 strDCT2x2dn(p0 - 80, p1 - 128, p0 + 112, p1 + 64);
308 strDCT2x2dn(p0 - 16, p1 - 64, p0 + 48, p1 + 0);
309 }
310
311
312 /**
313 Hadamard+Scale transform
314 for some strange reason, breaking up the function into two blocks, strHSTenc1 and strHSTenc
315 seems to work faster
316 **/
strHSTenc(PixelI * pa,PixelI * pb,PixelI * pc,PixelI * pd)317 static Void strHSTenc(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
318 {
319 /** different realization : does rescaling as well! **/
320 PixelI a, b, c, d;
321 a = *pa;
322 b = *pb;
323 d = *pc;
324 c = *pd;
325
326 a += c;
327 b -= d;
328 c = ((a - b) >> 1) - c;
329 d += (b >> 1);
330 b += c;
331
332 a -= (d * 3 + 4) >> 3;
333
334 *pa = a;
335 *pb = b;
336 *pc = c;
337 *pd = d;
338 }
339
strHSTenc1(PixelI * pa,PixelI * pd)340 static Void strHSTenc1(PixelI *pa, PixelI *pd)
341 {
342 /** different realization : does rescaling as well! **/
343 PixelI a, d;
344 a = *pa;
345 d = *pd;
346
347 d -= (a >> 7);
348 d += (a >> 10);
349
350 //a -= (d * 3 + 4) >> 3;
351 d -= (a * 3 + 0) >> 4;
352 a -= (d * 3 + 0) >> 3;
353 d = (a >> 1) - d;
354 a -= d;
355
356 *pa = a;
357 *pd = d;
358 }
359
strHSTenc1_edge(PixelI * pa,PixelI * pd)360 static Void strHSTenc1_edge (PixelI *pa, PixelI *pd)
361 {
362 /** different realizion as compared to scaling operator for 2D case **/
363 PixelI a, d;
364 a = *pa;
365 d = -(*pd); // Negative sign needed here for 1D scaling case to ensure correct scaling.
366
367 a -= d;
368 d += (a >> 1);
369 a -= (d * 3 + 4) >> 3;
370 // End new operations
371
372 //Scaling modification of adding 7/1024 in two steps (without multiplication by 7).
373 d -= (a >> 7);
374 d += (a >> 10);
375
376 d -= (a * 3 + 0) >> 4;
377 a -= (d * 3 + 0) >> 3;
378 d = (a >> 1) - d;
379 a -= d;
380
381 *pa = a;
382 *pd = d;
383 }
384
385 /** Kron(Rotate(pi/8), Rotate(pi/8)) **/\
fwdOddOdd(PixelI * pa,PixelI * pb,PixelI * pc,PixelI * pd)386 static Void fwdOddOdd(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
387 {
388 PixelI a, b, c, d, t1, t2;
389
390 a = *pa;
391 b = -*pb;
392 c = -*pc;
393 d = *pd;
394
395 /** butterflies **/
396 d += a;
397 c -= b;
398 a -= (t1 = d >> 1);
399 b += (t2 = c >> 1);
400
401 /** rotate pi/4 **/
402 a += (b * 3 + 4) >> 3;
403 b -= (a * 3 + 3) >> 2;
404 a += (b * 3 + 3) >> 3;
405
406 /** butterflies **/
407 b -= t2;
408 a += t1;
409 c += b;
410 d -= a;
411
412 *pa = a;
413 *pb = b;
414 *pc = c;
415 *pd = d;
416 }
417 /** Kron(Rotate(pi/8), Rotate(pi/8)) **/
fwdOddOddPre(PixelI * pa,PixelI * pb,PixelI * pc,PixelI * pd)418 static Void fwdOddOddPre(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
419 {
420 PixelI a, b, c, d, t1, t2;
421 a = *pa;
422 b = *pb;
423 c = *pc;
424 d = *pd;
425
426 /** butterflies **/
427 d += a;
428 c -= b;
429 a -= (t1 = d >> 1);
430 b += (t2 = c >> 1);
431
432 /** rotate pi/4 **/
433 a += (b * 3 + 4) >> 3;
434 b -= (a * 3 + 2) >> 2;
435 a += (b * 3 + 6) >> 3;
436
437 /** butterflies **/
438 b -= t2;
439 a += t1;
440 c += b;
441 d -= a;
442
443 *pa = a;
444 *pb = b;
445 *pc = c;
446 *pd = d;
447 }
448
449 /** Kron(Rotate(pi/8), [1 1; 1 -1]/sqrt(2)) **/
450 /** [a b c d] => [D C A B] **/
fwdOdd(PixelI * pa,PixelI * pb,PixelI * pc,PixelI * pd)451 Void fwdOdd(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
452 {
453 PixelI a, b, c, d;
454 a = *pa;
455 b = *pb;
456 c = *pc;
457 d = *pd;
458
459 /** butterflies **/
460 b -= c;
461 a += d;
462 c += (b + 1) >> 1;
463 d = ((a + 1) >> 1) - d;
464
465 /** rotate pi/8 **/
466 ROTATE2(a, b);
467 ROTATE2(c, d);
468
469 /** butterflies **/
470 d += (b) >> 1;
471 c -= (a + 1) >> 1;
472 b -= d;
473 a += c;
474
475 *pa = a;
476 *pb = b;
477 *pc = c;
478 *pd = d;
479 }
480
481 /*************************************************************************
482 Top-level function to tranform possible part of a macroblock
483 *************************************************************************/
transformMacroblock(CWMImageStrCodec * pSC)484 Void transformMacroblock(CWMImageStrCodec * pSC)
485 {
486 OVERLAP olOverlap = pSC->WMISCP.olOverlap;
487 COLORFORMAT cfColorFormat = pSC->m_param.cfColorFormat;
488 Bool left = (pSC->cColumn == 0), right = (pSC->cColumn == pSC->cmbWidth);
489 Bool top = (pSC->cRow == 0), bottom = (pSC->cRow == pSC->cmbHeight);
490 Bool leftORright = (left || right), topORbottom = (top || bottom);
491 Bool topORleft = (left || top);// rightORbottom = (right || bottom);
492 Bool leftAdjacentColumn = (pSC->cColumn == 1), rightAdjacentColumn = (pSC->cColumn == pSC->cmbWidth - 1);
493 // Bool topAdjacentRow = (pSC->cRow == 1), bottomAdjacentRow = (pSC->cRow == pSC->cmbHeight - 1);
494 PixelI * p = NULL;// * pt = NULL;
495 Int i, j;
496 Int iNumChromaFullPlanes = (Int)((YUV_420 == cfColorFormat || YUV_422 == cfColorFormat) ?
497 1 : pSC->m_param.cNumChannels);
498
499 #define mbX pSC->mbX
500 #define mbY pSC->mbY
501 #define tileX pSC->tileX
502 #define tileY pSC->tileY
503 #define bVertTileBoundary pSC->bVertTileBoundary
504 #define bHoriTileBoundary pSC->bHoriTileBoundary
505 #define bOneMBLeftVertTB pSC->bOneMBLeftVertTB
506 #define bOneMBRightVertTB pSC->bOneMBRightVertTB
507 #define iPredBefore pSC->iPredBefore
508 #define iPredAfter pSC->iPredAfter
509
510 if (pSC->WMISCP.bUseHardTileBoundaries) {
511 //Add tile location information
512 if (pSC->cColumn == 0) {
513 bVertTileBoundary = FALSE;
514 tileY = 0;
515 }
516 bOneMBLeftVertTB = bOneMBRightVertTB = FALSE;
517 if(tileY > 0 && tileY <= pSC->WMISCP.cNumOfSliceMinus1H && (pSC->cColumn - 1) == pSC->WMISCP.uiTileY[tileY])
518 bOneMBRightVertTB = TRUE;
519 if(tileY < pSC->WMISCP.cNumOfSliceMinus1H && pSC->cColumn == pSC->WMISCP.uiTileY[tileY + 1]) {
520 bVertTileBoundary = TRUE;
521 tileY++;
522 }
523 else
524 bVertTileBoundary = FALSE;
525 if(tileY < pSC->WMISCP.cNumOfSliceMinus1H && (pSC->cColumn + 1) == pSC->WMISCP.uiTileY[tileY + 1])
526 bOneMBLeftVertTB = TRUE;
527
528 if (pSC->cRow == 0) {
529 bHoriTileBoundary = FALSE;
530 tileX = 0;
531 }
532 else if(mbY != pSC->cRow && tileX < pSC->WMISCP.cNumOfSliceMinus1V && pSC->cRow == pSC->WMISCP.uiTileX[tileX + 1]) {
533 bHoriTileBoundary = TRUE;
534 tileX++;
535 }
536 else if(mbY != pSC->cRow)
537 bHoriTileBoundary = FALSE;
538 }
539 else {
540 bVertTileBoundary = FALSE;
541 bHoriTileBoundary = FALSE;
542 bOneMBLeftVertTB = FALSE;
543 bOneMBRightVertTB = FALSE;
544 }
545 mbX = pSC->cColumn, mbY = pSC->cRow;
546
547 //================================================================
548 // 400_Y, 444_YUV
549 for(i = 0; i < iNumChromaFullPlanes; ++i)
550 {
551 PixelI* const p0 = pSC->p0MBbuffer[i];//(0 == i ? pSC->pY0 : (1 == i ? pSC->pU0 : pSC->pV0));
552 PixelI* const p1 = pSC->p1MBbuffer[i];//(0 == i ? pSC->pY1 : (1 == i ? pSC->pU1 : pSC->pV1));
553
554 //================================
555 // first level overlap
556 if(OL_NONE != olOverlap)
557 {
558 /* Corner operations */
559 if ((top || bHoriTileBoundary) && (left || bVertTileBoundary))
560 strPre4(p1 + 0, p1 + 1, p1 + 2, p1 + 3);
561 if ((top || bHoriTileBoundary) && (right || bVertTileBoundary))
562 strPre4(p1 - 59, p1 - 60, p1 - 57, p1 - 58);
563 if ((bottom || bHoriTileBoundary) && (left || bVertTileBoundary))
564 strPre4(p0 + 48 + 10, p0 + 48 + 11, p0 + 48 + 8, p0 + 48 + 9);
565 if ((bottom || bHoriTileBoundary) && (right || bVertTileBoundary))
566 strPre4(p0 - 1, p0 - 2, p0 - 3, p0 - 4);
567 if(!right && !bottom)
568 {
569 if (top || bHoriTileBoundary)
570 {
571
572 for (j = ((left || bVertTileBoundary) ? 0 : -64); j < 192; j += 64)
573 {
574 p = p1 + j;
575 strPre4(p + 5, p + 4, p + 64, p + 65);
576 strPre4(p + 7, p + 6, p + 66, p + 67);
577 p = NULL;
578 }
579 }
580 else
581 {
582 for (j = ((left || bVertTileBoundary) ? 0 : -64); j < 192; j += 64)
583 {
584 strPre4x4Stage1Split(p0 + 48 + j, p1 + j, 0);
585 }
586 }
587
588 if (left || bVertTileBoundary)
589 {
590 if (!top && !bHoriTileBoundary)
591 {
592 strPre4(p0 + 58, p0 + 56, p1 + 0, p1 + 2);
593 strPre4(p0 + 59, p0 + 57, p1 + 1, p1 + 3);
594 }
595
596 for (j = -64; j < -16; j += 16)
597 {
598 p = p1 + j;
599 strPre4(p + 74, p + 72, p + 80, p + 82);
600 strPre4(p + 75, p + 73, p + 81, p + 83);
601 p = NULL;
602 }
603 }
604 else
605 {
606 for (j = -64; j < -16; j += 16)
607 {
608 strPre4x4Stage1(p1 + j, 0);
609 }
610 }
611
612 strPre4x4Stage1(p1 + 0, 0);
613 strPre4x4Stage1(p1 + 16, 0);
614 strPre4x4Stage1(p1 + 32, 0);
615 strPre4x4Stage1(p1 + 64, 0);
616 strPre4x4Stage1(p1 + 80, 0);
617 strPre4x4Stage1(p1 + 96, 0);
618 strPre4x4Stage1(p1 + 128, 0);
619 strPre4x4Stage1(p1 + 144, 0);
620 strPre4x4Stage1(p1 + 160, 0);
621 }
622
623 if (bottom || bHoriTileBoundary)
624 {
625 for (j = ((left || bVertTileBoundary) ? 48 : -16); j < (right ? -16 : 240); j += 64)
626 {
627 p = p0 + j;
628 strPre4(p + 15, p + 14, p + 74, p + 75);
629 strPre4(p + 13, p + 12, p + 72, p + 73);
630 p = NULL;
631 }
632 }
633
634 if ((right || bVertTileBoundary) && !bottom)
635 {
636 if (!top && !bHoriTileBoundary)
637 {
638 strPre4(p0 - 1, p0 - 3, p1 - 59, p1 - 57);
639 strPre4(p0 - 2, p0 - 4, p1 - 60, p1 - 58);
640 }
641 for (j = -64; j < -16; j += 16)
642 {
643 p = p1 + j;
644 strPre4(p + 15, p + 13, p + 21, p + 23);
645 strPre4(p + 14, p + 12, p + 20, p + 22);
646 p = NULL;
647 }
648 }
649 }
650
651 //================================
652 // first level transform
653 if (!top)
654 {
655 for (j = (left ? 48 : -16); j < (right ? 48 : 240); j += 64)
656 {
657 strDCT4x4Stage1(p0 + j);
658 }
659 }
660
661 if (!bottom)
662 {
663 for (j = (left ? 0 : -64); j < (right ? 0 : 192); j += 64)
664 {
665 strDCT4x4Stage1(p1 + j + 0);
666 strDCT4x4Stage1(p1 + j + 16);
667 strDCT4x4Stage1(p1 + j + 32);
668 }
669 }
670
671 //================================
672 // second level overlap
673 if (OL_TWO == olOverlap)
674 {
675 /* Corner operations */
676 if ((top || bHoriTileBoundary) && (left || bVertTileBoundary))
677 strPre4(p1 + 0, p1 + 64, p1 + 0 + 16, p1 + 64 + 16);
678 if ((top || bHoriTileBoundary) && (right || bVertTileBoundary))
679 strPre4(p1 - 128, p1 - 64, p1 - 128 + 16, p1 - 64 + 16);
680 if ((bottom || bHoriTileBoundary) && (left || bVertTileBoundary))
681 strPre4(p0 + 32, p0 + 96, p0 + 32 + 16, p0 + 96 + 16);
682 if ((bottom || bHoriTileBoundary) && (right || bVertTileBoundary))
683 strPre4(p0 - 96, p0 - 32, p0 - 96 + 16, p0 - 32 + 16);
684 if ((leftORright || bVertTileBoundary) && (!topORbottom && !bHoriTileBoundary))
685 {
686 if (left || bVertTileBoundary) {
687 j = 0;
688 strPre4(p0 + j + 32, p0 + j + 48, p1 + j + 0, p1 + j + 16);
689 strPre4(p0 + j + 96, p0 + j + 112, p1 + j + 64, p1 + j + 80);
690 }
691 if (right || bVertTileBoundary) {
692 j = -128;
693 strPre4(p0 + j + 32, p0 + j + 48, p1 + j + 0, p1 + j + 16);
694 strPre4(p0 + j + 96, p0 + j + 112, p1 + j + 64, p1 + j + 80);
695 }
696 }
697
698 if (!leftORright && !bVertTileBoundary)
699 {
700 if (topORbottom || bHoriTileBoundary)
701 {
702 if (top || bHoriTileBoundary) {
703 p = p1;
704 strPre4(p - 128, p - 64, p + 0, p + 64);
705 strPre4(p - 112, p - 48, p + 16, p + 80);
706 p = NULL;
707 }
708 if (bottom || bHoriTileBoundary) {
709 p = p0 + 32;
710 strPre4(p - 128, p - 64, p + 0, p + 64);
711 strPre4(p - 112, p - 48, p + 16, p + 80);
712 p = NULL;
713 }
714 }
715 else
716 {
717 strPre4x4Stage2Split(p0, p1);
718 }
719 }
720 }
721
722 //================================
723 // second level transform
724 if (!topORleft){
725 if (pSC->m_param.bScaledArith) {
726 strNormalizeEnc(p0 - 256, (i != 0));
727 }
728 strDCT4x4SecondStage(p0 - 256);
729 }
730 }
731
732 //================================================================
733 // 420_UV
734 for(i = 0; i < (YUV_420 == cfColorFormat? 2 : 0); ++i)
735 {
736 PixelI* const p0 = pSC->p0MBbuffer[1 + i];//(0 == i ? pSC->pU0 : pSC->pV0);
737 PixelI* const p1 = pSC->p1MBbuffer[1 + i];//(0 == i ? pSC->pU1 : pSC->pV1);
738
739 //================================
740 // first level overlap (420_UV)
741 if (OL_NONE != olOverlap)
742 {
743 /* Corner operations */
744 if ((top || bHoriTileBoundary) && (left || bVertTileBoundary))
745 strPre4(p1 + 0, p1 + 1, p1 + 2, p1 + 3);
746 if ((top || bHoriTileBoundary) && (right || bVertTileBoundary))
747 strPre4(p1 - 27, p1 - 28, p1 - 25, p1 - 26);
748 if ((bottom || bHoriTileBoundary) && (left || bVertTileBoundary))
749 strPre4(p0 + 16 + 10, p0 + 16 + 11, p0 + 16 + 8, p0 + 16 + 9);
750 if ((bottom || bHoriTileBoundary) && (right || bVertTileBoundary))
751 strPre4(p0 - 1, p0 - 2, p0 - 3, p0 - 4);
752 if(!right && !bottom)
753 {
754 if (top || bHoriTileBoundary)
755 {
756
757 for (j = ((left || bVertTileBoundary) ? 0 : -32); j < 32; j += 32)
758 {
759 p = p1 + j;
760 strPre4(p + 5, p + 4, p + 32, p + 33);
761 strPre4(p + 7, p + 6, p + 34, p + 35);
762 p = NULL;
763 }
764 }
765 else
766 {
767 for (j = ((left || bVertTileBoundary) ? 0: -32); j < 32; j += 32)
768 {
769 strPre4x4Stage1Split(p0 + 16 + j, p1 + j, 32);
770 }
771 }
772
773 if (left || bVertTileBoundary)
774 {
775 if (!top && !bHoriTileBoundary)
776 {
777 strPre4(p0 + 26, p0 + 24, p1 + 0, p1 + 2);
778 strPre4(p0 + 27, p0 + 25, p1 + 1, p1 + 3);
779 }
780
781 strPre4(p1 + 10, p1 + 8, p1 + 16, p1 + 18);
782 strPre4(p1 + 11, p1 + 9, p1 + 17, p1 + 19);
783 }
784 else if (!bVertTileBoundary)
785 {
786 strPre4x4Stage1(p1 - 32, 32);
787 }
788
789 strPre4x4Stage1(p1, 32);
790 }
791
792 if (bottom || bHoriTileBoundary)
793 {
794 for (j = ((left || bVertTileBoundary) ? 16: -16); j < (right ? -16: 32); j += 32)
795 {
796 p = p0 + j;
797 strPre4(p + 15, p + 14, p + 42, p + 43);
798 strPre4(p + 13, p + 12, p + 40, p + 41);
799 p = NULL;
800 }
801 }
802
803 if ((right || bVertTileBoundary) && !bottom)
804 {
805 if (!top && !bHoriTileBoundary)
806 {
807 strPre4(p0 - 1, p0 - 3, p1 - 27, p1 - 25);
808 strPre4(p0 - 2, p0 - 4, p1 - 28, p1 - 26);
809 }
810
811 strPre4(p1 - 17, p1 - 19, p1 - 11, p1 - 9);
812 strPre4(p1 - 18, p1 - 20, p1 - 12, p1 - 10);
813 }
814 }
815
816 //================================
817 // first level transform (420_UV)
818 if (!top)
819 {
820 for (j = (left ? 16 : -16); j < (right ? 16 : 48); j += 32)
821 {
822 strDCT4x4Stage1(p0 + j);
823 }
824 }
825
826 if (!bottom)
827 {
828 for (j = (left ? 0 : -32); j < (right ? 0 : 32); j += 32)
829 {
830 strDCT4x4Stage1(p1 + j);
831 }
832 }
833
834 //================================
835 // second level overlap (420_UV)
836 if (OL_TWO == olOverlap)
837 {
838 if ((leftAdjacentColumn || bOneMBRightVertTB) && (top || bHoriTileBoundary))
839 COMPUTE_CORNER_PRED_DIFF(p1 - 64 + 0, *(p1 - 64 + 32));
840
841 if ((rightAdjacentColumn || bOneMBLeftVertTB) && (top || bHoriTileBoundary))
842 iPredBefore[i][0] = *(p1 + 0);
843 if ((right || bVertTileBoundary) && (top || bHoriTileBoundary))
844 COMPUTE_CORNER_PRED_DIFF(p1 - 64 + 32, iPredBefore[i][0]);
845
846 if ((leftAdjacentColumn || bOneMBRightVertTB) && (bottom || bHoriTileBoundary))
847 COMPUTE_CORNER_PRED_DIFF(p0 - 64 + 16, *(p0 - 64 + 48));
848
849 if ((rightAdjacentColumn || bOneMBLeftVertTB) && (bottom || bHoriTileBoundary))
850 iPredBefore[i][1] = *(p0 + 16);
851 if ((right || bVertTileBoundary) && (bottom || bHoriTileBoundary))
852 COMPUTE_CORNER_PRED_DIFF(p0 - 64 + 48, iPredBefore[i][1]);
853
854 if ((leftORright || bVertTileBoundary) && !topORbottom && !bHoriTileBoundary)
855 {
856 if (left || bVertTileBoundary)
857 strPre2(p0 + 0 + 16, p1 + 0);
858 if (right || bVertTileBoundary)
859 strPre2(p0 + -32 + 16, p1 + -32);
860 }
861
862 if (!leftORright)
863 {
864 if ((topORbottom || bHoriTileBoundary) && !bVertTileBoundary)
865 {
866 if (top || bHoriTileBoundary)
867 strPre2(p1 - 32, p1);
868 if (bottom || bHoriTileBoundary)
869 strPre2(p0 + 16 - 32, p0 + 16);
870 }
871 else if (!topORbottom && !bHoriTileBoundary && !bVertTileBoundary)
872 strPre2x2(p0 - 16, p0 + 16, p1 - 32, p1);
873 }
874 if ((leftAdjacentColumn || bOneMBRightVertTB) && (top || bHoriTileBoundary))
875 COMPUTE_CORNER_PRED_ADD(p1 - 64 + 0, *(p1 - 64 + 32));
876 if ((rightAdjacentColumn || bOneMBLeftVertTB) && (top || bHoriTileBoundary))
877 iPredAfter[i][0] = *(p1 + 0);
878 if ((right || bVertTileBoundary) && (top || bHoriTileBoundary))
879 COMPUTE_CORNER_PRED_ADD(p1 - 64 + 32, iPredAfter[i][0]);
880 if ((leftAdjacentColumn || bOneMBRightVertTB) && (bottom || bHoriTileBoundary))
881 COMPUTE_CORNER_PRED_ADD(p0 - 64 + 16, *(p0 - 64 + 48));
882 if ((rightAdjacentColumn || bOneMBLeftVertTB) && (bottom || bHoriTileBoundary))
883 iPredAfter[i][1] = *(p0 + 16);
884 if ((right || bVertTileBoundary) && (bottom || bHoriTileBoundary))
885 COMPUTE_CORNER_PRED_ADD(p0 - 64 + 48, iPredAfter[i][1]);
886 }
887
888 //================================
889 // second level transform (420_UV)
890 if (!topORleft)
891 {
892 if (!pSC->m_param.bScaledArith) {
893 strDCT2x2dn(p0 - 64, p0 - 32, p0 - 48, p0 - 16);
894 }
895 else {
896 strDCT2x2dnEnc(p0 - 64, p0 - 32, p0 - 48, p0 - 16);
897 }
898 }
899 }
900
901 //================================================================
902 // 422_UV
903 for(i = 0; i < (YUV_422 == cfColorFormat? 2 : 0); ++i)
904 {
905 PixelI* const p0 = pSC->p0MBbuffer[1 + i];//(0 == i ? pSC->pU0 : pSC->pV0);
906 PixelI* const p1 = pSC->p1MBbuffer[1 + i];//(0 == i ? pSC->pU1 : pSC->pV1);
907
908 //================================
909 // first level overlap (422_UV)
910 if (OL_NONE != olOverlap)
911 {
912 /* Corner operations */
913 if ((top || bHoriTileBoundary) && (left || bVertTileBoundary))
914 strPre4(p1 + 0, p1 + 1, p1 + 2, p1 + 3);
915 if ((top || bHoriTileBoundary) && (right || bVertTileBoundary))
916 strPre4(p1 - 59, p1 - 60, p1 - 57, p1 - 58);
917 if ((bottom || bHoriTileBoundary) && (left || bVertTileBoundary))
918 strPre4(p0 + 48 + 10, p0 + 48 + 11, p0 + 48 + 8, p0 + 48 + 9);
919 if ((bottom || bHoriTileBoundary) && (right || bVertTileBoundary))
920 strPre4(p0 - 1, p0 - 2, p0 - 3, p0 - 4);
921 if(!right && !bottom)
922 {
923 if (top || bHoriTileBoundary)
924 {
925
926 for (j = ((left || bVertTileBoundary) ? 0 : -64); j < 64; j += 64)
927 {
928 p = p1 + j;
929 strPre4(p + 5, p + 4, p + 64, p + 65);
930 strPre4(p + 7, p + 6, p + 66, p + 67);
931 p = NULL;
932 }
933 }
934 else
935 {
936 for (j = ((left || bVertTileBoundary) ? 0: -64); j < 64; j += 64)
937 {
938 strPre4x4Stage1Split(p0 + 48 + j, p1 + j, 0);
939 }
940 }
941
942 if (left || bVertTileBoundary)
943 {
944 if (!top && !bHoriTileBoundary)
945 {
946 strPre4(p0 + 58, p0 + 56, p1 + 0, p1 + 2);
947 strPre4(p0 + 59, p0 + 57, p1 + 1, p1 + 3);
948 }
949
950 for (j = 0; j < 48; j += 16)
951 {
952 p = p1 + j;
953 strPre4(p + 10, p + 8, p + 16, p + 18);
954 strPre4(p + 11, p + 9, p + 17, p + 19);
955 p = NULL;
956 }
957 }
958 else if (!bVertTileBoundary)
959 {
960 for (j = -64; j < -16; j += 16)
961 {
962 strPre4x4Stage1(p1 + j, 0);
963 }
964 }
965
966 strPre4x4Stage1(p1 + 0, 0);
967 strPre4x4Stage1(p1 + 16, 0);
968 strPre4x4Stage1(p1 + 32, 0);
969 }
970
971 if (bottom || bHoriTileBoundary)
972 {
973 for (j = ((left || bVertTileBoundary) ? 48: -16); j < (right ? -16: 112); j += 64)
974 {
975 p = p0 + j;
976 strPre4(p + 15, p + 14, p + 74, p + 75);
977 strPre4(p + 13, p + 12, p + 72, p + 73);
978 p = NULL;
979 }
980 }
981
982 if ((right || bVertTileBoundary) && !bottom)
983 {
984 if (!top && !bHoriTileBoundary)
985 {
986 strPre4(p0 - 1, p0 - 3, p1 - 59, p1 - 57);
987 strPre4(p0 - 2, p0 - 4, p1 - 60, p1 - 58);
988 }
989
990 for (j = -64; j < -16; j += 16)
991 {
992 p = p1 + j;
993 strPre4(p + 15, p + 13, p + 21, p + 23);
994 strPre4(p + 14, p + 12, p + 20, p + 22);
995 p = NULL;
996 }
997 }
998 }
999
1000 //================================
1001 // first level transform (422_UV)
1002 if (!top)
1003 {
1004 for (j = (left ? 48 : -16); j < (right ? 48 : 112); j += 64)
1005 {
1006 strDCT4x4Stage1(p0 + j);
1007 }
1008 }
1009
1010 if (!bottom)
1011 {
1012 for (j = (left ? 0 : -64); j < (right ? 0 : 64); j += 64)
1013 {
1014 strDCT4x4Stage1(p1 + j + 0);
1015 strDCT4x4Stage1(p1 + j + 16);
1016 strDCT4x4Stage1(p1 + j + 32);
1017 }
1018 }
1019
1020 //================================
1021 // second level overlap (422_UV)
1022 if (OL_TWO == olOverlap)
1023 {
1024 if ((leftAdjacentColumn || bOneMBRightVertTB) && (top || bHoriTileBoundary))
1025 COMPUTE_CORNER_PRED_DIFF(p1 - 128 + 0, *(p1 - 128 + 64));
1026
1027 if ((rightAdjacentColumn || bOneMBLeftVertTB) && (top || bHoriTileBoundary))
1028 iPredBefore[i][0] = *(p1 + 0);
1029 if ((right || bVertTileBoundary) && (top || bHoriTileBoundary))
1030 COMPUTE_CORNER_PRED_DIFF(p1 - 128 + 64, iPredBefore[i][0]);
1031
1032 if ((leftAdjacentColumn || bOneMBRightVertTB) && (bottom || bHoriTileBoundary))
1033 COMPUTE_CORNER_PRED_DIFF(p0 - 128 + 48, *(p0 - 128 + 112));
1034
1035 if ((rightAdjacentColumn || bOneMBLeftVertTB) && (bottom || bHoriTileBoundary))
1036 iPredBefore[i][1] = *(p0 + 48);
1037 if ((right || bVertTileBoundary) && (bottom || bHoriTileBoundary))
1038 COMPUTE_CORNER_PRED_DIFF(p0 - 128 + 112, iPredBefore[i][1]);
1039
1040 if (!bottom)
1041 {
1042 if (leftORright || bVertTileBoundary)
1043 {
1044 if (!top && !bHoriTileBoundary)
1045 {
1046 if (left || bVertTileBoundary)
1047 strPre2(p0 + 48 + 0, p1 + 0);
1048
1049 if (right || bVertTileBoundary)
1050 strPre2(p0 + 48 + -64, p1 + -64);
1051 }
1052
1053 if (left || bVertTileBoundary)
1054 strPre2(p1 + 16, p1 + 16 + 16);
1055
1056 if (right || bVertTileBoundary)
1057 strPre2(p1 + -48, p1 + -48 + 16);
1058 }
1059
1060 if (!leftORright && !bVertTileBoundary)
1061 {
1062 if (top || bHoriTileBoundary)
1063 strPre2(p1 - 64, p1);
1064 else
1065 strPre2x2(p0 - 16, p0 + 48, p1 - 64, p1);
1066
1067 strPre2x2(p1 - 48, p1 + 16, p1 - 32, p1 + 32);
1068 }
1069 }
1070
1071 if ((bottom || bHoriTileBoundary) && (!leftORright && !bVertTileBoundary))
1072 strPre2(p0 - 16, p0 + 48);
1073
1074 if ((leftAdjacentColumn || bOneMBRightVertTB) && (top || bHoriTileBoundary))
1075 COMPUTE_CORNER_PRED_ADD(p1 - 128 + 0, *(p1 - 128 + 64));
1076
1077 if ((rightAdjacentColumn || bOneMBLeftVertTB) && (top || bHoriTileBoundary))
1078 iPredAfter[i][0] = *(p1 + 0);
1079 if ((right || bVertTileBoundary) && (top || bHoriTileBoundary))
1080 COMPUTE_CORNER_PRED_ADD(p1 - 128 + 64, iPredAfter[i][0]);
1081
1082 if ((leftAdjacentColumn || bOneMBRightVertTB) && (bottom || bHoriTileBoundary))
1083 COMPUTE_CORNER_PRED_ADD(p0 - 128 + 48, *(p0 - 128 + 112));
1084
1085 if ((rightAdjacentColumn || bOneMBLeftVertTB) && (bottom || bHoriTileBoundary))
1086 iPredAfter[i][1] = *(p0 + 48);
1087 if ((right || bVertTileBoundary) && (bottom || bHoriTileBoundary))
1088 COMPUTE_CORNER_PRED_ADD(p0 - 128 + 112, iPredAfter[i][1]);
1089 }
1090
1091 //================================
1092 // second level transform (422_UV)
1093 if (!topORleft)
1094 {
1095 if (!pSC->m_param.bScaledArith) {
1096 strDCT2x2dn(p0 - 128, p0 - 64, p0 - 112, p0 - 48);
1097 strDCT2x2dn(p0 - 96, p0 - 32, p0 - 80, p0 - 16);
1098 }
1099 else {
1100 strDCT2x2dnEnc(p0 - 128, p0 - 64, p0 - 112, p0 - 48);
1101 strDCT2x2dnEnc(p0 - 96, p0 - 32, p0 - 80, p0 - 16);
1102 }
1103
1104 // 1D lossless HT
1105 p0[- 96] -= p0[-128];
1106 p0[-128] += ((p0[-96] + 1) >> 1);
1107 }
1108 }
1109 assert(NULL == p);
1110 }
1111
1112