1 //*@@@+++@@@@******************************************************************
2 //
3 // Copyright � Microsoft Corp.
4 // All rights reserved.
5 //
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions are met:
8 //
9 // � Redistributions of source code must retain the above copyright notice,
10 //   this list of conditions and the following disclaimer.
11 // � Redistributions in binary form must reproduce the above copyright notice,
12 //   this list of conditions and the following disclaimer in the documentation
13 //   and/or other materials provided with the distribution.
14 //
15 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
19 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 // POSSIBILITY OF SUCH DAMAGE.
26 //
27 //*@@@---@@@@******************************************************************
28 #include "strcodec.h"
29 #include "encode.h"
30 #include "strTransform.h"
31 #include <math.h>
32 #include "perfTimer.h"
33 
34 #ifdef MEM_TRACE
35 #define TRACE_MALLOC    1
36 #define TRACE_NEW       0
37 #define TRACE_HEAP      0
38 #include "memtrace.h"
39 #endif
40 
41 #ifdef ADI_SYS_OPT
42 extern char L1WW[];
43 #endif
44 
45 #ifdef X86OPT_INLINE
46 #define _FORCEINLINE __forceinline
47 #else // X86OPT_INLINE
48 #define _FORCEINLINE
49 #endif // X86OPT_INLINE
50 
51 Int inputMBRow(CWMImageStrCodec *);
52 
53 #if defined(WMP_OPT_SSE2) || defined(WMP_OPT_CC_ENC) || defined(WMP_OPT_TRFM_ENC)
54 void StrEncOpt(CWMImageStrCodec* pSC);
55 #endif // OPT defined
56 
57 #define MINIMUM_PACKET_LENGTH 4  // as long as packet header - skipped if data is not accessed (happens only for flexbits)
58 
writeQuantizer(CWMIQuantizer * pQuantizer[MAX_CHANNELS],BitIOInfo * pIO,U8 cChMode,size_t cChannel,size_t iPos)59 Void writeQuantizer(CWMIQuantizer * pQuantizer[MAX_CHANNELS], BitIOInfo * pIO, U8 cChMode, size_t cChannel, size_t iPos)
60 {
61     if(cChMode > 2)
62         cChMode = 2;
63 
64     if(cChannel > 1)
65         putBit16(pIO, cChMode, 2); // Channel mode
66     else
67         cChMode = 0;
68 
69     putBit16(pIO, pQuantizer[0][iPos].iIndex, 8); // Y
70 
71     if(cChMode == 1)  // MIXED
72         putBit16(pIO, pQuantizer[1][iPos].iIndex, 8); // UV
73     else if(cChMode > 0){ // INDEPENDENT
74         size_t i;
75 
76         for(i = 1; i < cChannel; i ++)
77             putBit16(pIO, pQuantizer[i][iPos].iIndex, 8); // UV
78     }
79 }
80 
81 // packet header: 00000000 00000000 00000001 ?????xxx
82 // xxx:           000(spatial) 001(DC) 010(AD) 011(AC) 100(FL) 101-111(reserved)
83 // ?????:         (iTileY * cNumOfSliceV + iTileX)
writePacketHeader(BitIOInfo * pIO,U8 ptPacketType,U8 pID)84 Void writePacketHeader(BitIOInfo * pIO, U8 ptPacketType, U8 pID)
85 {
86     putBit16(pIO, 0, 8);
87     putBit16(pIO, 0, 8);
88     putBit16(pIO, 1, 8);
89     putBit16(pIO, (pID << 3) + (ptPacketType & 7), 8);
90 }
91 
writeTileHeaderDC(CWMImageStrCodec * pSC,BitIOInfo * pIO)92 Int writeTileHeaderDC(CWMImageStrCodec * pSC, BitIOInfo * pIO)
93 {
94     size_t iTile, j = (pSC->m_pNextSC == NULL ? 1U : 2U);
95 
96     for(; j > 0; j --){
97         if((pSC->m_param.uQPMode & 1) != 0){ // not DC uniform
98             CWMITile * pTile = pSC->pTile + pSC->cTileColumn;
99             size_t i;
100 
101             pTile->cChModeDC = (U8)(rand() & 3); // channel mode, just for concept proofing!
102 
103             if(pSC->cTileRow + pSC->cTileColumn == 0) // allocate DC QP info
104                 for(iTile = 0; iTile <= pSC->WMISCP.cNumOfSliceMinus1V; iTile ++)
105                     if(allocateQuantizer(pSC->pTile[iTile].pQuantizerDC, pSC->m_param.cNumChannels, 1) != ICERR_OK)
106                         return ICERR_ERROR;
107 
108             for(i = 0; i < pSC->m_param.cNumChannels; i ++)
109                 pTile->pQuantizerDC[i]->iIndex = (U8)((rand() & 0x2f) + 1); // QP indexes, just for concept proofing!
110 
111             formatQuantizer(pTile->pQuantizerDC, pTile->cChModeDC, pSC->m_param.cNumChannels, 0, TRUE, pSC->m_param.bScaledArith);
112 
113             for(i = 0; i < pSC->m_param.cNumChannels; i ++)
114                 pTile->pQuantizerDC[i]->iOffset = (pTile->pQuantizerDC[i]->iQP >> 1);
115 
116             writeQuantizer(pTile->pQuantizerDC, pIO, pTile->cChModeDC, pSC->m_param.cNumChannels, 0);
117         }
118 
119         pSC = pSC->m_pNextSC;
120     }
121 
122     return ICERR_OK;
123 }
124 
writeTileHeaderLP(CWMImageStrCodec * pSC,BitIOInfo * pIO)125 Int writeTileHeaderLP(CWMImageStrCodec * pSC, BitIOInfo * pIO)
126 {
127     size_t k = (pSC->m_pNextSC == NULL ? 1U : 2U);
128 
129     for(; k > 0; k --){
130         if(pSC->WMISCP.sbSubband != SB_DC_ONLY && (pSC->m_param.uQPMode & 2) != 0){ // not LP uniform
131             CWMITile * pTile = pSC->pTile + pSC->cTileColumn;
132             U8 i, j;
133 
134             pTile->bUseDC = ((rand() & 1) == 0 ? TRUE : FALSE); // use DC quantizer?
135             putBit16(pIO, pTile->bUseDC == TRUE ? 1 : 0, 1);
136             pTile->cBitsLP = 0;
137 
138             pTile->cNumQPLP = (pTile->bUseDC == TRUE ? 1 : (U8)((rand() & 0xf) + 1)); // # of LP QPs
139 
140             if(pSC->cTileRow > 0)
141                 freeQuantizer(pTile->pQuantizerLP);
142 
143             if(allocateQuantizer(pTile->pQuantizerLP, pSC->m_param.cNumChannels, pTile->cNumQPLP) != ICERR_OK)
144                 return ICERR_ERROR;
145 
146             if(pTile->bUseDC == TRUE)
147                 useDCQuantizer(pSC, pSC->cTileColumn);
148             else{
149                 putBit16(pIO, pTile->cNumQPLP - 1, 4);
150 
151                 pTile->cBitsLP = dquantBits(pTile->cNumQPLP);
152 
153                 for(i = 0; i < pTile->cNumQPLP; i ++){
154                     pTile->cChModeLP[i] = (U8)(rand() & 3); // channel mode, just for concept proofing!
155 
156                     for(j = 0; j < pSC->m_param.cNumChannels; j ++)
157                         pTile->pQuantizerLP[j][i].iIndex = (U8)((rand() & 0xfe) + 1); // QP indexes, just for concept proofing!
158                     formatQuantizer(pTile->pQuantizerLP, pTile->cChModeLP[i], pSC->m_param.cNumChannels, i, TRUE, pSC->m_param.bScaledArith);
159                     writeQuantizer(pTile->pQuantizerLP, pIO, pTile->cChModeLP[i], pSC->m_param.cNumChannels, i);
160                 }
161             }
162         }
163         pSC = pSC->m_pNextSC;
164     }
165 
166     return ICERR_OK;
167 }
168 
writeTileHeaderHP(CWMImageStrCodec * pSC,BitIOInfo * pIO)169 Int writeTileHeaderHP(CWMImageStrCodec * pSC, BitIOInfo * pIO)
170 {
171     size_t k = (pSC->m_pNextSC == NULL ? 1U : 2U);
172 
173     for(; k > 0; k --){
174         if(pSC->WMISCP.sbSubband != SB_DC_ONLY && pSC->WMISCP.sbSubband != SB_NO_HIGHPASS && (pSC->m_param.uQPMode & 4) != 0){ // not HP uniform
175             CWMITile * pTile = pSC->pTile + pSC->cTileColumn;
176             U8 i, j;
177 
178             pTile->bUseLP = ((rand() & 1) == 0 ? TRUE : FALSE); // use LP quantizer?
179             putBit16(pIO, pTile->bUseLP == TRUE ? 1 : 0, 1);
180             pTile->cBitsHP = 0;
181 
182             pTile->cNumQPHP = (pTile->bUseLP == TRUE ? pTile->cNumQPLP : (U8)((rand() & 0xf) + 1)); // # of LP QPs
183 
184             if(pSC->cTileRow > 0)
185                 freeQuantizer(pTile->pQuantizerHP);
186 
187             if(allocateQuantizer(pTile->pQuantizerHP, pSC->m_param.cNumChannels, pTile->cNumQPHP) != ICERR_OK)
188                 return ICERR_ERROR;
189 
190             if(pTile->bUseLP == TRUE)
191                 useLPQuantizer(pSC, pTile->cNumQPHP, pSC->cTileColumn);
192             else{
193                 putBit16(pIO, pTile->cNumQPHP - 1, 4);
194                 pTile->cBitsHP = dquantBits(pTile->cNumQPHP);
195 
196                 for(i = 0; i < pTile->cNumQPHP; i ++){
197                     pTile->cChModeHP[i] = (U8)(rand() & 3); // channel mode, just for concept proofing!
198 
199                     for(j = 0; j < pSC->m_param.cNumChannels; j ++)
200                         pTile->pQuantizerHP[j][i].iIndex = (U8)((rand() & 0xfe) + 1); // QP indexes, just for concept proofing!
201                     formatQuantizer(pTile->pQuantizerHP, pTile->cChModeHP[i], pSC->m_param.cNumChannels, i, FALSE, pSC->m_param.bScaledArith);
202                     writeQuantizer(pTile->pQuantizerHP, pIO, pTile->cChModeHP[i], pSC->m_param.cNumChannels, i);
203                 }
204             }
205         }
206         pSC = pSC->m_pNextSC;
207     }
208 
209     return ICERR_OK;
210 }
211 
encodeMB(CWMImageStrCodec * pSC,Int iMBX,Int iMBY)212 Int encodeMB(CWMImageStrCodec * pSC, Int iMBX, Int iMBY)
213 {
214     CCodingContext * pContext = &pSC->m_pCodingContext[pSC->cTileColumn];
215 
216     if(pSC->m_bCtxLeft && pSC->m_bCtxTop && pSC->m_bSecondary == FALSE && pSC->m_param.bTranscode == FALSE){ // write packet headers
217         U8 pID = (U8)((pSC->cTileRow * (pSC->WMISCP.cNumOfSliceMinus1V + 1) + pSC->cTileColumn) & 0x1F);
218 
219         if(pSC->WMISCP.bfBitstreamFormat == SPATIAL) {
220             writePacketHeader(pContext->m_pIODC, 0, pID);
221             if (pSC->m_param.bTrimFlexbitsFlag)
222                 putBit16(pContext->m_pIODC, pContext->m_iTrimFlexBits, 4);
223             writeTileHeaderDC(pSC, pContext->m_pIODC);
224             writeTileHeaderLP(pSC, pContext->m_pIODC);
225             writeTileHeaderHP(pSC, pContext->m_pIODC);
226         }
227         else{
228             writePacketHeader(pContext->m_pIODC, 1, pID);
229             writeTileHeaderDC(pSC, pContext->m_pIODC);
230             if(pSC->cSB > 1){
231                 writePacketHeader(pContext->m_pIOLP, 2, pID);
232                 writeTileHeaderLP(pSC, pContext->m_pIOLP);
233             }
234             if(pSC->cSB > 2){
235                 writePacketHeader(pContext->m_pIOAC, 3, pID);
236                 writeTileHeaderHP(pSC, pContext->m_pIOAC);
237             }
238             if(pSC->cSB > 3) {
239                 writePacketHeader(pContext->m_pIOFL, 4, pID);
240                 if (pSC->m_param.bTrimFlexbitsFlag)
241                     putBit16(pContext->m_pIOFL, pContext->m_iTrimFlexBits, 4);
242             }
243         }
244     }
245 
246     if(EncodeMacroblockDC(pSC, pContext, iMBX, iMBY) != ICERR_OK)
247         return ICERR_ERROR;
248 
249     if(pSC->WMISCP.sbSubband != SB_DC_ONLY)
250         if(EncodeMacroblockLowpass(pSC, pContext, iMBX, iMBY) != ICERR_OK)
251             return ICERR_ERROR;
252 
253     if(pSC->WMISCP.sbSubband != SB_DC_ONLY && pSC->WMISCP.sbSubband != SB_NO_HIGHPASS)
254         if(EncodeMacroblockHighpass(pSC, pContext, iMBX, iMBY) != ICERR_OK)
255             return ICERR_ERROR;
256 
257     if(iMBX + 1 == (int) pSC->cmbWidth && (iMBY + 1 == (int) pSC->cmbHeight ||
258         (pSC->cTileRow < pSC->WMISCP.cNumOfSliceMinus1H && iMBY == (int) pSC->WMISCP.uiTileY[pSC->cTileRow + 1] - 1)))
259     { // end of a horizontal slice
260         size_t k, l;
261 
262         // get sizes of each packet and update index table
263         if (pSC->m_pNextSC == NULL || pSC->m_bSecondary) {
264             for(k = 0; k < pSC->cNumBitIO; k ++){
265                 fillToByte(pSC->m_ppBitIO[k]);
266                 pSC->ppWStream[k]->GetPos(pSC->ppWStream[k], &l);
267                 pSC->pIndexTable[pSC->cNumBitIO * pSC->cTileRow + k] = l + getSizeWrite(pSC->m_ppBitIO[k]); // offset
268             }
269         }
270 
271         // reset coding contexts
272         if(iMBY + 1 != (int) pSC->cmbHeight){
273             for(k = 0; k <= pSC->WMISCP.cNumOfSliceMinus1V; k ++)
274                 ResetCodingContextEnc(&pSC->m_pCodingContext[k]);
275         }
276     }
277 
278     return ICERR_OK;
279 }
280 
281 /*************************************************************************
282     Top level function for processing a macroblock worth of input
283 *************************************************************************/
processMacroblock(CWMImageStrCodec * pSC)284 Int processMacroblock(CWMImageStrCodec *pSC)
285 {
286     Bool topORleft = (pSC->cColumn == 0 || pSC->cRow == 0);
287     ERR_CODE result = ICERR_OK;
288     size_t j, jend = (pSC->m_pNextSC != NULL);
289 
290     for (j = 0; j <= jend; j++) {
291         transformMacroblock(pSC);
292         if(!topORleft){
293             getTilePos(pSC, (Int)pSC->cColumn - 1, (Int)pSC->cRow - 1);
294             if(jend){
295                 pSC->m_pNextSC->cTileRow = pSC->cTileRow;
296                 pSC->m_pNextSC->cTileColumn = pSC->cTileColumn;
297             }
298             if ((result = encodeMB(pSC, (Int)pSC->cColumn - 1, (Int)pSC->cRow - 1)) != ICERR_OK)
299                 return result;
300         }
301 
302         if (jend) {
303             pSC->m_pNextSC->cRow = pSC->cRow;
304             pSC->m_pNextSC->cColumn = pSC->cColumn;
305             pSC = pSC->m_pNextSC;
306         }
307     }
308 
309     return ICERR_OK;
310 }
311 
312 /*************************************************************************
313   forwardRGBE: forward conversion from RGBE to RGB
314 *************************************************************************/
forwardRGBE(PixelI RGB,PixelI E)315 static _FORCEINLINE PixelI forwardRGBE (PixelI RGB, PixelI E)
316 {
317     PixelI iResult = 0, iAppend = 1;
318 
319     if (E == 0)
320         return 0;
321 
322     assert (E!=0);
323 
324     E--;
325     while (((RGB & 0x80) == 0) && (E > 0)) {
326         RGB = (RGB << 1) + iAppend;
327         iAppend = 0;
328         E--;
329     }
330 
331     // result will always be one of 3 cases
332     // E  RGB       convert to
333     // 0  [0.x]      [0   x]
334     // 0  [1.x]      [1   x]
335     // e  [1.x]      [e+1 x]
336     if (E == 0) {
337         iResult = RGB;
338     }
339     else {
340         E++;
341         iResult = (RGB & 0x7f) + (E << 7);
342     }
343 
344     return iResult;
345 }
346 
347 /*************************************************************************
348   convert float-32 into float with (c, lm)!!
349 *************************************************************************/
float2pixel(float f,const char _c,const unsigned char _lm)350 static _FORCEINLINE PixelI float2pixel (float f, const char _c, const unsigned char _lm)
351 {
352     union uif
353     {
354         I32   i;
355         float f;
356     } x;
357 
358     PixelI _h, e, e1, m, s;
359 
360     if (f == 0)
361     {
362         _h = 0;
363     }
364     else
365     {
366         x.f = f;
367 
368         e = (x.i >> 23) & 0x000000ff;//here set e as e, not s! e includes s: [s e] 9 bits [31..23]
369         m = (x.i & 0x007fffff) | 0x800000; // actual mantissa, with normalizer
370         if (e == 0) { // denormal-land
371             m ^= 0x800000;  // actual mantissa, removing normalizer
372             e++; // actual exponent -126
373         }
374 
375         e1 = e - 127 + _c;  // this is basically a division or quantization to a different exponent
376                             // note: _c cannot be greater than 127, so e1 cannot be greater than e
377         //assert (_c <= 127);
378         if (e1 <= 1) {  // denormal-land
379             if (e1 < 1)
380                 m >>= (1 - e1);  // shift mantissa right to make exponent 1
381             e1 = 1;
382             if ((m & 0x800000) == 0) // if denormal, set e1 to zero else to 1
383                 e1 = 0;
384         }
385         m &= 0x007fffff;
386 
387         //for float-22:
388         _h = (e1 << _lm) + ((m + (1 << (23 - _lm - 1))) >> (23 - _lm));//take 23-bit m, shift (23-lm), get lm-bit m for float22
389         s = ((PixelI) x.i) >> 31;
390         //padding to int-32:
391         _h = (_h ^ s) - s;
392     }
393 
394     return _h;
395 }
396 
397 /*************************************************************************
398   convert Half-16 to internal format, only need to handle sign bit
399 *************************************************************************/
forwardHalf(PixelI hHalf)400 static _FORCEINLINE PixelI forwardHalf (PixelI hHalf)
401 {
402     PixelI s;
403     s = hHalf >> 31;
404     hHalf = ((hHalf & 0x7fff) ^ s) - s;
405     return hHalf;
406 }
407 
408 
409 //================================================================
410 // Color Conversion
411 // functions to get image data from input buffer
412 // this inlcudes necessary color conversion and boundary padding
413 //================================================================
414 #define _CC(r, g, b) (b -= r, r += ((b + 1) >> 1) - g, g += ((r + 0) >> 1))
415 #define _CC_CMYK(c, m, y, k) (y -= c, c += ((y + 1) >> 1) - m, m += (c >> 1) - k, k += ((m + 1) >> 1))
416 
417 //================================================================
418 // BitIOInfo init/term for encoding
419 const size_t MAX_MEMORY_SIZE_IN_WORDS = 64 << 20; // 1 << 20 \approx 1 million
420 
StrIOEncInit(CWMImageStrCodec * pSC)421 Int StrIOEncInit(CWMImageStrCodec* pSC)
422 {
423     pSC->m_param.bIndexTable = !(pSC->WMISCP.bfBitstreamFormat == SPATIAL && pSC->WMISCP.cNumOfSliceMinus1H + pSC->WMISCP.cNumOfSliceMinus1V == 0);
424     if(allocateBitIOInfo(pSC) != ICERR_OK){
425         return ICERR_ERROR;
426     }
427 
428     attachISWrite(pSC->pIOHeader, pSC->WMISCP.pWStream);
429 
430     if(pSC->cNumBitIO > 0){
431         size_t i;
432 #if defined(_WINDOWS_) || defined(UNDER_CE)  // tmpnam does not exist in VS2005 WinCE CRT
433         TCHAR szPath[MAX_PATH];
434         DWORD cSize, j, k;
435 #endif
436         char * pFilename;
437 
438         pSC->ppWStream = (struct WMPStream **)malloc(pSC->cNumBitIO * sizeof(struct WMPStream *));
439         if(pSC->ppWStream == NULL) return ICERR_ERROR;
440         memset(pSC->ppWStream, 0, pSC->cNumBitIO * sizeof(struct WMPStream *));
441 
442         if (pSC->cmbHeight * pSC->cmbWidth * pSC->WMISCP.cChannel >= MAX_MEMORY_SIZE_IN_WORDS) {
443 #ifdef _WINDOWS_
444             pSC->ppTempFile = (TCHAR **)malloc(pSC->cNumBitIO * sizeof(TCHAR *));
445             if(pSC->ppTempFile == NULL) return ICERR_ERROR;
446             memset(pSC->ppTempFile, 0, pSC->cNumBitIO * sizeof(TCHAR *));
447 #else
448             pSC->ppTempFile = (char **)malloc(pSC->cNumBitIO * sizeof(char *));
449             if(pSC->ppTempFile == NULL) return ICERR_ERROR;
450             memset(pSC->ppTempFile, 0, pSC->cNumBitIO * sizeof(char *));
451 #endif
452         }
453 
454         for(i = 0; i < pSC->cNumBitIO; i ++){
455             if (pSC->cmbHeight * pSC->cmbWidth * pSC->WMISCP.cChannel >= MAX_MEMORY_SIZE_IN_WORDS) {
456 #if defined(_WINDOWS_) || defined(UNDER_CE)  // tmpnam does not exist in VS2005 WinCE CRT
457                 Bool bUnicode = sizeof(TCHAR) == 2;
458                 pSC->ppTempFile[i] = (TCHAR *)malloc(MAX_PATH * sizeof(TCHAR));
459                 if(pSC->ppTempFile[i] == NULL) return ICERR_ERROR;
460 
461                 pFilename = (char *)pSC->ppTempFile[i];
462 
463                 cSize = GetTempPath(MAX_PATH, szPath);
464                 if(cSize == 0 || cSize >= MAX_PATH)
465                     return ICERR_ERROR;
466                 if(!GetTempFileName(szPath, TEXT("wdp"), 0, pSC->ppTempFile[i]))
467                     return ICERR_ERROR;
468 
469                 if(bUnicode){ // unicode file name
470                     for(k = j = cSize = 0; cSize < MAX_PATH; cSize ++, j += 2){
471                         if(pSC->ppTempFile[i][cSize] == '\0')
472                             break;
473                         if(pFilename[j] != '\0')
474                             pFilename[k ++] = pFilename[j];
475                         if(pFilename[j + 1] != '\0')
476                             pFilename[k ++] = pFilename[j + 1];
477                     }
478                     pFilename[cSize] = '\0';
479                 }
480 
481 #else //DPK needs to support ANSI
482                 pSC->ppTempFile[i] = (char *)malloc(FILENAME_MAX * sizeof(char));
483                 if(pSC->ppTempFile[i] == NULL) return ICERR_ERROR;
484 
485                 if ((pFilename = tmpnam(NULL)) == NULL)
486                     return ICERR_ERROR;
487                 strcpy(pSC->ppTempFile[i], pFilename);
488 #endif
489                 if(CreateWS_File(pSC->ppWStream + i, pFilename, "w+b") != ICERR_OK) return ICERR_ERROR;
490 
491             }
492             else {
493                 if(CreateWS_List(pSC->ppWStream + i) != ICERR_OK) return ICERR_ERROR;
494             }
495             attachISWrite(pSC->m_ppBitIO[i], pSC->ppWStream[i]);
496         }
497     }
498 
499     return ICERR_OK;
500 }
501 
502 #define PUTBITS putBit16
503 /*************************************************************************
504     Write variable length byte aligned integer
505 *************************************************************************/
PutVLWordEsc(BitIOInfo * pIO,Int iEscape,size_t s)506 static Void PutVLWordEsc(BitIOInfo* pIO, Int iEscape, size_t s)
507 {
508     if (iEscape) {
509         assert(iEscape <= 0xff && iEscape > 0xfc); // fd,fe,ff are the only valid escapes
510         PUTBITS(pIO, iEscape, 8);
511     }
512     else if (s < 0xfb00) {
513         PUTBITS(pIO, (U32) s, 16);
514     }
515     else {
516         size_t t = s >> 16;
517         if ((t >> 16) == 0) {
518             PUTBITS(pIO, 0xfb, 8);
519         }
520         else {
521             t >>= 16;
522             PUTBITS(pIO, 0xfc, 8);
523             PUTBITS(pIO, (U32)(t >> 16) & 0xffff, 16);
524             PUTBITS(pIO, (U32) t & 0xffff, 16);
525         }
526         PUTBITS(pIO, (U32) t & 0xffff, 16);
527         PUTBITS(pIO, (U32) s & 0xffff, 16);
528     }
529 }
530 
531 /*************************************************************************
532     Write index table at start (null index table)
533 *************************************************************************/
writeIndexTableNull(CWMImageStrCodec * pSC)534 Int writeIndexTableNull(CWMImageStrCodec * pSC)
535 {
536     if(pSC->cNumBitIO == 0){
537         BitIOInfo* pIO = pSC->pIOHeader;
538         fillToByte(pIO);
539 
540         /* Profile / Level info */
541         PutVLWordEsc(pIO, 0, 4);    // 4 bytes
542         PUTBITS(pIO, 111, 8);       // default profile idc
543         PUTBITS(pIO, 255, 8);       // default level idc
544         PUTBITS(pIO, 1, 16);        // LAST_FLAG
545     }
546 
547     return ICERR_OK;
548 }
549 
550 /*************************************************************************
551     Write index table
552 *************************************************************************/
writeIndexTable(CWMImageStrCodec * pSC)553 Int writeIndexTable(CWMImageStrCodec * pSC)
554 {
555     if(pSC->cNumBitIO > 0){
556         BitIOInfo* pIO = pSC->pIOHeader;
557         size_t *pTable = pSC->pIndexTable, iSize[4] = { 0 };
558         I32 iEntry = (I32)pSC->cNumBitIO * (pSC->WMISCP.cNumOfSliceMinus1H + 1), i, k, l;
559 
560         // write index table header [0x0001] - 2 bytes
561         PUTBITS(pIO, 1, 16);
562 
563         for(i = pSC->WMISCP.cNumOfSliceMinus1H; i>= 0 && pSC->bTileExtraction == FALSE; i --){
564             for(k = 0; k < (int)pSC->cNumBitIO; ){
565                 for(l = 0; l < (pSC->WMISCP.bfBitstreamFormat == FREQUENCY && pSC->WMISCP.bProgressiveMode ? pSC->cSB : 1); l ++, k ++)
566                 {
567                 if (i > 0)
568                 pTable[pSC->cNumBitIO * i + k] -= pSC->pIndexTable[pSC->cNumBitIO * (i - 1) + k]; // packet length
569                 iSize[l] += pTable[pSC->cNumBitIO * i + k];
570                 }
571             }
572         }
573 
574         iSize[3] = iSize[2] + iSize[1] + iSize[0];
575         iSize[2] = iSize[1] + iSize[0];
576         iSize[1] = iSize[0];
577         iSize[0] = 0;
578 
579         for(i = 0; i < iEntry; ){
580         for(l = 0; l < (pSC->WMISCP.bfBitstreamFormat == FREQUENCY && pSC->WMISCP.bProgressiveMode ? pSC->cSB : 1); l ++, i ++)
581         {
582             writeIS_L1(pSC, pIO);
583             PutVLWordEsc(pIO, (pTable[i] <= MINIMUM_PACKET_LENGTH) ? 0xff : 0, iSize[l]);
584             iSize[l] += (pTable[i] <= MINIMUM_PACKET_LENGTH) ? 0 : pTable[i];
585         }
586         }
587 
588         writeIS_L1(pSC, pIO);
589         PutVLWordEsc(pIO, 0xff, 0); // escape to end
590         fillToByte(pIO);
591     }
592 
593     return ICERR_OK;
594 }
595 
copyTo(struct WMPStream * pSrc,struct WMPStream * pDst,size_t iBytes)596 Int copyTo(struct WMPStream * pSrc, struct WMPStream * pDst, size_t iBytes)
597 {
598     char pData[PACKETLENGTH];
599 
600     if (iBytes <= MINIMUM_PACKET_LENGTH){
601         pSrc->Read(pSrc, pData, iBytes);
602         return ICERR_OK;
603     }
604 
605     while(iBytes > PACKETLENGTH){
606         pSrc->Read(pSrc, pData, PACKETLENGTH);
607         pDst->Write(pDst, pData, PACKETLENGTH);
608         iBytes -= PACKETLENGTH;
609     }
610     pSrc->Read(pSrc, pData, iBytes);
611     pDst->Write(pDst, pData, iBytes);
612 
613     return ICERR_OK;
614 }
615 
StrIOEncTerm(CWMImageStrCodec * pSC)616 Int StrIOEncTerm(CWMImageStrCodec* pSC)
617 {
618     BitIOInfo * pIO = pSC->pIOHeader;
619 
620     fillToByte(pIO);
621 
622     if(pSC->WMISCP.bVerbose){
623         U32 i, j;
624 
625         printf("\n%d horizontal tiles:\n", pSC->WMISCP.cNumOfSliceMinus1H + 1);
626         for(i = 0; i <= pSC->WMISCP.cNumOfSliceMinus1H; i ++){
627             printf("    offset of tile %d in MBs: %d\n", i, pSC->WMISCP.uiTileY[i]);
628         }
629 
630         printf("\n%d vertical tiles:\n", pSC->WMISCP.cNumOfSliceMinus1V + 1);
631         for(i = 0; i <= pSC->WMISCP.cNumOfSliceMinus1V; i ++){
632             printf("    offset of tile %d in MBs: %d\n", i, pSC->WMISCP.uiTileX[i]);
633         }
634 
635         if(pSC->WMISCP.bfBitstreamFormat == SPATIAL){
636             printf("\nSpatial order bitstream\n");
637         }
638         else{
639             printf("\nFrequency order bitstream\n");
640         }
641 
642         if(!pSC->m_param.bIndexTable){
643             printf("\nstreaming mode, no index table.\n");
644         }
645         else if(pSC->WMISCP.bfBitstreamFormat == SPATIAL){
646             for(j = 0; j <= pSC->WMISCP.cNumOfSliceMinus1H; j ++){
647                 for(i = 0; i <= pSC->WMISCP.cNumOfSliceMinus1V; i ++){
648                     printf("bitstream size for tile (%d, %d): %d.\n", j, i, (int) pSC->pIndexTable[j * (pSC->WMISCP.cNumOfSliceMinus1V + 1) + i]);
649                 }
650             }
651         }
652         else{
653             for(j = 0; j <= pSC->WMISCP.cNumOfSliceMinus1H; j ++){
654                 for(i = 0; i <= pSC->WMISCP.cNumOfSliceMinus1V; i ++){
655                     size_t * p = &pSC->pIndexTable[(j * (pSC->WMISCP.cNumOfSliceMinus1V + 1) + i) * 4];
656                     printf("bitstream size of (DC, LP, AC, FL) for tile (%d, %d): %d %d %d %d.\n", j, i,
657                         (int) p[0], (int) p[1], (int) p[2], (int) p[3]);
658                 }
659             }
660         }
661     }
662 
663     writeIndexTable(pSC); // write index table to the header
664 
665     detachISWrite(pSC, pIO);
666 
667     if(pSC->cNumBitIO > 0){
668         size_t i, j, k, l;
669         struct WMPStream * pDst = pSC->WMISCP.pWStream;
670         size_t * pTable = pSC->pIndexTable;
671 
672         for(i = 0; i < pSC->cNumBitIO; i ++){
673             detachISWrite(pSC, pSC->m_ppBitIO[i]);
674         }
675 
676         for(i = 0; i < pSC->cNumBitIO; i ++){
677             pSC->ppWStream[i]->SetPos(pSC->ppWStream[i], 0); // seek back for read
678         }
679 
680         for(l = 0; l < (size_t)(pSC->WMISCP.bfBitstreamFormat == FREQUENCY && pSC->WMISCP.bProgressiveMode ? pSC->cSB : 1); l ++){
681 			for(i = 0, k = l; i <= pSC->WMISCP.cNumOfSliceMinus1H; i ++){ // loop through tiles
682 				for(j = 0; j <= pSC->WMISCP.cNumOfSliceMinus1V; j ++){
683 
684 					if(pSC->WMISCP.bfBitstreamFormat == SPATIAL)
685 						copyTo(pSC->ppWStream[j], pDst, pTable[k ++]);
686 					else if (!pSC->WMISCP.bProgressiveMode){
687 						copyTo(pSC->ppWStream[j * pSC->cSB + 0], pDst, pTable[k ++]);
688 						if(pSC->cSB > 1)
689 							copyTo(pSC->ppWStream[j * pSC->cSB + 1], pDst, pTable[k ++]);
690 						if(pSC->cSB > 2)
691 							copyTo(pSC->ppWStream[j * pSC->cSB + 2], pDst, pTable[k ++]);
692 						if(pSC->cSB > 3)
693 							copyTo(pSC->ppWStream[j * pSC->cSB + 3], pDst, pTable[k ++]);
694 					}
695 					else{
696 						copyTo(pSC->ppWStream[j * pSC->cSB + l], pDst, pTable[k]);
697 						k += pSC->cSB;
698 					}
699 				}
700 			}
701         }
702 
703         if (pSC->cmbHeight * pSC->cmbWidth * pSC->WMISCP.cChannel >= MAX_MEMORY_SIZE_IN_WORDS){
704             for(i = 0; i < pSC->cNumBitIO; i ++){
705                 if(pSC->ppWStream && pSC->ppWStream[i]){
706                     if((*(pSC->ppWStream + i))->state.file.pFile){
707                         fclose((*(pSC->ppWStream + i))->state.file.pFile);
708 #ifdef _WINDOWS_
709                         if(DeleteFileA((LPCSTR)pSC->ppTempFile[i]) == 0)
710                             return ICERR_ERROR;
711 #else
712                         if (remove(pSC->ppTempFile[i]) == -1)
713                             return ICERR_ERROR;
714 #endif
715                     }
716 
717                     if (*(pSC->ppWStream + i))
718                         free(*(pSC->ppWStream + i));
719                 }
720                 if(pSC->ppTempFile){
721                     if(pSC->ppTempFile[i])
722                         free(pSC->ppTempFile[i]);
723                 }
724             }
725 
726             if(pSC->ppTempFile)
727                 free(pSC->ppTempFile);
728         }
729         else{
730             for(i = 0; i < pSC->cNumBitIO; i ++){
731                 if(pSC->ppWStream && pSC->ppWStream[i])
732                     pSC->ppWStream[i]->Close(pSC->ppWStream + i);
733             }
734         }
735 
736         free(pSC->ppWStream);
737 
738         free(pSC->m_ppBitIO);
739         free(pSC->pIndexTable);
740     }
741 
742     return 0;
743 }
744 
745 /*************************************************************************
746     Write header of image plane
747 *************************************************************************/
WriteImagePlaneHeader(CWMImageStrCodec * pSC)748 Int WriteImagePlaneHeader(CWMImageStrCodec * pSC)
749 {
750     CWMImageInfo * pII = &pSC->WMII;
751     CWMIStrCodecParam * pSCP = &pSC->WMISCP;
752     BitIOInfo* pIO = pSC->pIOHeader;
753 
754     PUTBITS(pIO, (Int) pSC->m_param.cfColorFormat, 3); // internal color format
755     PUTBITS(pIO, (Int) pSC->m_param.bScaledArith, 1); // lossless mode
756 
757 // subbands
758     PUTBITS(pIO, (U32)pSCP->sbSubband, 4);
759 
760 // color parameters
761     switch (pSC->m_param.cfColorFormat) {
762         case YUV_420:
763         case YUV_422:
764         case YUV_444:
765             PUTBITS(pIO, 0, 4);
766             PUTBITS(pIO, 0, 4);
767             break;
768         case NCOMPONENT:
769             PUTBITS(pIO, (Int) pSC->m_param.cNumChannels - 1, 4);
770             PUTBITS(pIO, 0, 4);
771             break;
772         default:
773             break;
774     }
775 
776 // float and 32s additional parameters
777     switch (pII->bdBitDepth) {
778         case BD_16:
779         case BD_16S:
780             PUTBITS(pIO, pSCP->nLenMantissaOrShift, 8);
781             break;
782         case BD_32:
783         case BD_32S:
784             if(pSCP->nLenMantissaOrShift == 0)
785                 pSCP->nLenMantissaOrShift = 10;//default
786             PUTBITS(pIO, pSCP->nLenMantissaOrShift, 8);
787             break;
788         case BD_32F:
789             if(pSCP->nLenMantissaOrShift == 0)
790                 pSCP->nLenMantissaOrShift = 13;//default
791             PUTBITS(pIO, pSCP->nLenMantissaOrShift, 8);//float conversion parameters
792             PUTBITS(pIO, pSCP->nExpBias, 8);
793             break;
794         default:
795             break;
796     }
797 
798         // quantization
799     PUTBITS(pIO, (pSC->m_param.uQPMode & 1) == 1 ? 0 : 1, 1); // DC frame uniform quantization?
800     if((pSC->m_param.uQPMode & 1) == 0)
801         writeQuantizer(pSC->pTile[0].pQuantizerDC, pIO, (pSC->m_param.uQPMode >> 3) & 3, pSC->m_param.cNumChannels, 0);
802     if(pSC->WMISCP.sbSubband != SB_DC_ONLY){
803         PUTBITS(pIO, (pSC->m_param.uQPMode & 0x200) == 0 ? 1 : 0, 1); // use DC quantization?
804         if((pSC->m_param.uQPMode & 0x200) != 0){
805             PUTBITS(pIO, (pSC->m_param.uQPMode & 2) == 2 ? 0 : 1, 1); // LP frame uniform quantization?
806             if((pSC->m_param.uQPMode & 2) == 0)
807                 writeQuantizer(pSC->pTile[0].pQuantizerLP, pIO, (pSC->m_param.uQPMode >> 5) & 3,  pSC->m_param.cNumChannels, 0);
808         }
809 
810         if(pSC->WMISCP.sbSubband != SB_NO_HIGHPASS){
811             PUTBITS(pIO, (pSC->m_param.uQPMode & 0x400) == 0 ? 1 : 0, 1); // use LP quantization?
812             if((pSC->m_param.uQPMode & 0x400) != 0){
813                 PUTBITS(pIO, (pSC->m_param.uQPMode & 4) == 4 ? 0 : 1, 1); // HP frame uniform quantization?
814                 if((pSC->m_param.uQPMode & 4) == 0)
815                     writeQuantizer(pSC->pTile[0].pQuantizerHP, pIO, (pSC->m_param.uQPMode >> 7) & 3,  pSC->m_param.cNumChannels, 0);
816             }
817         }
818     }
819 
820     fillToByte(pIO);  // remove this later
821     return ICERR_OK;
822 }
823 
824 /*************************************************************************
825     Write header to buffer
826 *************************************************************************/
WriteWMIHeader(CWMImageStrCodec * pSC)827 Int WriteWMIHeader(CWMImageStrCodec * pSC)
828 {
829     CWMImageInfo * pII = &pSC->WMII;
830     CWMIStrCodecParam * pSCP = &pSC->WMISCP;
831     CCoreParameters * pCoreParam = &pSC->m_param;
832     BitIOInfo* pIO = pSC->pIOHeader;
833     U32 /*iSizeOfSize = 2,*/ i;
834     // temporary assignments / reserved words
835     // const Int HEADERSIZE = 0;
836     Bool bInscribed = FALSE;
837     Bool bAbbreviatedHeader = (((pII->cWidth + 15) / 16 > 255 || (pII->cHeight + 15) / 16 > 255) ? FALSE : TRUE);
838 
839     if(pCoreParam->bTranscode == FALSE)
840         pCoreParam->cExtraPixelsTop = pCoreParam->cExtraPixelsLeft = pCoreParam->cExtraPixelsRight = pCoreParam->cExtraPixelsBottom = 0;
841 
842     // num of extra boundary pixels due to compressed domain processing
843     bInscribed = (pCoreParam->cExtraPixelsTop || pCoreParam->cExtraPixelsLeft || pCoreParam->cExtraPixelsBottom || pCoreParam->cExtraPixelsRight);
844 
845 // 0
846     /** signature **/
847     for (i = 0; i < 8; PUTBITS(pSC->pIOHeader, gGDISignature[i++], 8));
848 
849 // 8
850     /** codec version and subversion **/
851     PUTBITS(pIO, CODEC_VERSION, 4);  // this should be changed to "profile" in RTM
852     if (pSC->WMISCP.bUseHardTileBoundaries)
853         PUTBITS(pIO, CODEC_SUBVERSION_NEWSCALING_HARD_TILES, 4);
854     else
855         PUTBITS(pIO, CODEC_SUBVERSION_NEWSCALING_SOFT_TILES, 4);
856 
857 // 9 primary parameters
858     PUTBITS(pIO, (pSCP->cNumOfSliceMinus1V || pSCP->cNumOfSliceMinus1H) ? 1 : 0, 1); // tiling present
859     PUTBITS(pIO, (Int) pSCP->bfBitstreamFormat, 1); // bitstream layout
860     PUTBITS(pIO, pII->oOrientation, 3);        // m_iRotateFlip
861     PUTBITS(pIO, pSC->m_param.bIndexTable, 1); // index table present
862     PUTBITS(pIO, pSCP->olOverlap, 2); // overlap
863 
864 // 10
865     PUTBITS(pIO, bAbbreviatedHeader, 1); // short words for size and tiles
866     PUTBITS(pIO, 1, 1); // long word length (use intelligence later)
867     PUTBITS(pIO, bInscribed, 1); // windowing
868     PUTBITS(pIO, pSC->m_param.bTrimFlexbitsFlag, 1); // trim flexbits flag sent
869     PUTBITS(pIO, 0, 1); // tile stretching parameters (not enabled)
870     PUTBITS(pIO, 0, 2); // reserved bits
871     PUTBITS(pIO, (Int) pSC->m_param.bAlphaChannel, 1); // alpha channel present
872 
873 // 11 - informational
874     PUTBITS(pIO, (Int) pII->cfColorFormat, 4); // source color format
875     if(BD_1 == pII->bdBitDepth && pSCP->bBlackWhite)
876         PUTBITS(pIO, (Int) BD_1alt, 4); // source bit depth
877     else
878         PUTBITS(pIO, (Int) pII->bdBitDepth, 4); // source bit depth
879 
880 // 12 - Variable length fields
881 // size
882     putBit32(pIO, (U32)(pII->cWidth - 1), bAbbreviatedHeader ? 16 : 32);
883     putBit32(pIO, (U32)(pII->cHeight - 1), bAbbreviatedHeader ? 16 : 32);
884 
885 // tiling
886     if (pSCP->cNumOfSliceMinus1V || pSCP->cNumOfSliceMinus1H) {
887         PUTBITS(pIO, pSCP->cNumOfSliceMinus1V, LOG_MAX_TILES); // # of vertical slices
888         PUTBITS(pIO, pSCP->cNumOfSliceMinus1H, LOG_MAX_TILES); // # of horizontal slices
889     }
890 
891 // tile sizes
892     for(i = 0; i < pSCP->cNumOfSliceMinus1V; i ++){ // width in MB of vertical slices, not needed for last slice!
893         PUTBITS(pIO, pSCP->uiTileX[i + 1] - pSCP->uiTileX[i], bAbbreviatedHeader ? 8 : 16);
894     }
895     for(i = 0; i < pSCP->cNumOfSliceMinus1H; i ++){ // width in MB of horizontal slices, not needed for last slice!
896         PUTBITS(pIO, pSCP->uiTileY[i + 1] - pSCP->uiTileY[i], bAbbreviatedHeader ? 8 : 16);
897     }
898 
899 // window due to compressed domain processing
900     if (bInscribed) {
901         PUTBITS(pIO, (U32)pCoreParam->cExtraPixelsTop, 6);
902         PUTBITS(pIO, (U32)pCoreParam->cExtraPixelsLeft, 6);
903         PUTBITS(pIO, (U32)pCoreParam->cExtraPixelsBottom, 6);
904         PUTBITS(pIO, (U32)pCoreParam->cExtraPixelsRight, 6);
905     }
906     fillToByte(pIO);  // redundant
907 
908     // write image plane headers
909     WriteImagePlaneHeader(pSC);
910 
911     return ICERR_OK;
912 }
913 
914 // streaming codec init/term
StrEncInit(CWMImageStrCodec * pSC)915 Int StrEncInit(CWMImageStrCodec* pSC)
916 {
917     COLORFORMAT cf = pSC->m_param.cfColorFormat;
918     COLORFORMAT cfE = pSC->WMII.cfColorFormat;
919     U16 iQPIndexY = 0, iQPIndexYLP = 0, iQPIndexYHP = 0;
920 	U16 iQPIndexU = 0, iQPIndexULP = 0, iQPIndexUHP = 0;
921     U16 iQPIndexV = 0, iQPIndexVLP = 0, iQPIndexVHP = 0;
922     size_t i;
923     Bool b32bit = sizeof(size_t) == 4;
924 
925     /** color transcoding with resolution change **/
926     pSC->m_bUVResolutionChange = (((cfE == CF_RGB || cfE == YUV_444 || cfE == CMYK || cfE == CF_RGBE) &&
927 								   (cf == YUV_422 || cf == YUV_420))
928 								  || (cfE == YUV_422 && cf == YUV_420)) && !pSC->WMISCP.bYUVData;
929 
930     if(pSC->m_bUVResolutionChange){
931         size_t cSize = ((cfE == YUV_422 ? 128 : 256) + (cf == YUV_420 ? 32 : 0)) * pSC->cmbWidth + 256;
932 
933         if(b32bit){ // integer overlow/underflow check for 32-bit system
934             if(((pSC->cmbWidth >> 16) * ((cfE == YUV_422 ? 128 : 256) + (cf == YUV_420 ? 32 : 0))) & 0xffff0000)
935                 return ICERR_ERROR;
936             if(cSize >= 0x3fffffff)
937                 return ICERR_ERROR;
938         }
939         pSC->pResU = (PixelI *)malloc(cSize * sizeof(PixelI));
940         pSC->pResV = (PixelI *)malloc(cSize * sizeof(PixelI));
941         if(pSC->pResU == NULL || pSC->pResV == NULL){
942             return ICERR_ERROR;
943         }
944     }
945 
946     pSC->cTileColumn = pSC->cTileRow = 0;
947 
948     if(allocateTileInfo(pSC) != ICERR_OK)
949         return ICERR_ERROR;
950 
951     if(pSC->m_param.bTranscode == FALSE){
952         pSC->m_param.uQPMode = 0x150;   // 101010 000
953                                         // 000    == uniform (not per tile) DC, LP, HP
954                                         // 101010 == cChMode == 2 == independent (not same) DC, LP, HP
955 
956         /** lossless or Y component lossless condition: all subbands present, uniform quantization with QPIndex 1 **/
957         pSC->m_param.bScaledArith = !((pSC->m_param.uQPMode & 7) == 0 &&
958 									  1 == pSC->WMISCP.uiDefaultQPIndex <= 1 &&
959 									  pSC->WMISCP.sbSubband == SB_ALL &&
960 									  pSC->m_bUVResolutionChange == FALSE) &&
961                                      !pSC->WMISCP.bUnscaledArith;
962         if (BD_32 == pSC->WMII.bdBitDepth || BD_32S == pSC->WMII.bdBitDepth || BD_32F == pSC->WMII.bdBitDepth) {
963             pSC->m_param.bScaledArith = FALSE;
964         }
965         pSC->m_param.uQPMode |= 0x600;  // don't use DC QP for LP, LP QP for HP
966 
967         // default QPs
968         iQPIndexY = pSC->m_param.bAlphaChannel && pSC->m_param.cNumChannels == 1?
969             pSC->WMISCP.uiDefaultQPIndexAlpha : pSC->WMISCP.uiDefaultQPIndex;
970 
971 		// determine the U,V index
972         iQPIndexU = pSC->WMISCP.uiDefaultQPIndexU!=0?
973 			pSC->WMISCP.uiDefaultQPIndexU: iQPIndexY;
974         iQPIndexV = pSC->WMISCP.uiDefaultQPIndexV!=0?
975 			pSC->WMISCP.uiDefaultQPIndexV: iQPIndexY;
976 
977 		// determine the QPIndexYLP
978         iQPIndexYLP = pSC->m_param.bAlphaChannel && pSC->m_param.cNumChannels == 1 ?
979             pSC->WMISCP.uiDefaultQPIndexAlpha :
980             (pSC->WMISCP.uiDefaultQPIndexYLP == 0 ?
981 			 pSC->WMISCP.uiDefaultQPIndex : pSC->WMISCP.uiDefaultQPIndexYLP); // default to QPIndex if not set
982 
983 		// determine the QPIndexYHP
984         iQPIndexYHP = pSC->m_param.bAlphaChannel && pSC->m_param.cNumChannels == 1 ?
985             pSC->WMISCP.uiDefaultQPIndexAlpha :
986             (pSC->WMISCP.uiDefaultQPIndexYHP == 0 ?
987 			 pSC->WMISCP.uiDefaultQPIndex : pSC->WMISCP.uiDefaultQPIndexYHP); // default to QPIndex if not set
988 
989 		// determine the U,V LP index
990         iQPIndexULP = pSC->WMISCP.uiDefaultQPIndexULP!=0?
991 			pSC->WMISCP.uiDefaultQPIndexULP: iQPIndexU;
992         iQPIndexVLP = pSC->WMISCP.uiDefaultQPIndexVLP!=0?
993 			pSC->WMISCP.uiDefaultQPIndexVLP: iQPIndexV;
994 
995 		// determine the U,V HP index
996         iQPIndexUHP = pSC->WMISCP.uiDefaultQPIndexUHP!=0?
997 			pSC->WMISCP.uiDefaultQPIndexUHP: iQPIndexU;
998         iQPIndexVHP = pSC->WMISCP.uiDefaultQPIndexVHP!=0?
999 			pSC->WMISCP.uiDefaultQPIndexVHP: iQPIndexV;
1000 
1001 		// clamp the QPIndex - 0 is lossless mode
1002         if(iQPIndexY < 2)
1003             iQPIndexY = 0;
1004         if (iQPIndexYLP < 2)
1005             iQPIndexYLP = 0;
1006         if (iQPIndexYHP < 2)
1007             iQPIndexYHP = 0;
1008 		if(iQPIndexU < 2)
1009             iQPIndexU = 0;
1010         if (iQPIndexULP < 2)
1011             iQPIndexULP = 0;
1012         if (iQPIndexUHP < 2)
1013             iQPIndexUHP = 0;
1014 		if(iQPIndexV < 2)
1015             iQPIndexV = 0;
1016 		if (iQPIndexVLP < 2)
1017             iQPIndexVLP = 0;
1018 		if (iQPIndexVHP < 2)
1019             iQPIndexVHP = 0;
1020     }
1021 
1022     if((pSC->m_param.uQPMode & 1) == 0){ // DC frame uniform quantization
1023         if(allocateQuantizer(pSC->pTile[0].pQuantizerDC, pSC->m_param.cNumChannels, 1) != ICERR_OK)
1024             return ICERR_ERROR;
1025         setUniformQuantizer(pSC, 0);
1026         for(i = 0; i < pSC->m_param.cNumChannels; i ++)
1027             if(pSC->m_param.bTranscode)
1028                 pSC->pTile[0].pQuantizerDC[i]->iIndex = pSC->m_param.uiQPIndexDC[i];
1029             else
1030                 pSC->pTile[0].pQuantizerDC[i]->iIndex = pSC->m_param.uiQPIndexDC[i] = (U8)(((i == 0 ? iQPIndexY : (i == 1) ? iQPIndexU: iQPIndexV)) & 0xff);
1031         formatQuantizer(pSC->pTile[0].pQuantizerDC, (pSC->m_param.uQPMode >> 3) & 3, pSC->m_param.cNumChannels, 0, TRUE, pSC->m_param.bScaledArith);
1032 
1033         for(i = 0; i < pSC->m_param.cNumChannels; i ++)
1034             pSC->pTile[0].pQuantizerDC[i]->iOffset = (pSC->pTile[0].pQuantizerDC[i]->iQP >> 1);
1035     }
1036 
1037     if(pSC->WMISCP.sbSubband != SB_DC_ONLY){
1038         if((pSC->m_param.uQPMode & 2) == 0){ // LP frame uniform quantization
1039             if(allocateQuantizer(pSC->pTile[0].pQuantizerLP, pSC->m_param.cNumChannels, 1) != ICERR_OK)
1040                 return ICERR_ERROR;
1041             setUniformQuantizer(pSC, 1);
1042             for(i = 0; i < pSC->m_param.cNumChannels; i ++)
1043                 if(pSC->m_param.bTranscode)
1044                     pSC->pTile[0].pQuantizerLP[i]->iIndex = pSC->m_param.uiQPIndexLP[i];
1045                 else
1046                     pSC->pTile[0].pQuantizerLP[i]->iIndex = pSC->m_param.uiQPIndexLP[i] = (U8)(((i == 0 ? iQPIndexYLP : (i == 1) ? iQPIndexULP: iQPIndexVLP)) & 0xff);
1047             formatQuantizer(pSC->pTile[0].pQuantizerLP, (pSC->m_param.uQPMode >> 5) & 3, pSC->m_param.cNumChannels, 0, TRUE, pSC->m_param.bScaledArith);
1048         }
1049 
1050         if(pSC->WMISCP.sbSubband != SB_NO_HIGHPASS){
1051             if((pSC->m_param.uQPMode & 4) == 0){ // HP frame uniform quantization
1052                 if(allocateQuantizer(pSC->pTile[0].pQuantizerHP, pSC->m_param.cNumChannels, 1) != ICERR_OK)
1053                     return ICERR_ERROR;
1054                 setUniformQuantizer(pSC, 2);
1055                 for(i = 0; i < pSC->m_param.cNumChannels; i ++)
1056                     if(pSC->m_param.bTranscode)
1057                         pSC->pTile[0].pQuantizerHP[i]->iIndex = pSC->m_param.uiQPIndexHP[i];
1058                     else
1059                         pSC->pTile[0].pQuantizerHP[i]->iIndex = pSC->m_param.uiQPIndexHP[i] = (U8)(((i == 0 ? iQPIndexYHP : (i == 1) ? iQPIndexUHP: iQPIndexVHP)) & 0xff);
1060                 formatQuantizer(pSC->pTile[0].pQuantizerHP, (pSC->m_param.uQPMode >> 7) & 3, pSC->m_param.cNumChannels, 0, FALSE, pSC->m_param.bScaledArith);
1061             }
1062         }
1063     }
1064 
1065     if(allocatePredInfo(pSC) != ICERR_OK){
1066         return ICERR_ERROR;
1067     }
1068 
1069     if(pSC->WMISCP.cNumOfSliceMinus1V >= MAX_TILES || AllocateCodingContextEnc (pSC, pSC->WMISCP.cNumOfSliceMinus1V + 1, pSC->WMISCP.uiTrimFlexBits) != ICERR_OK){
1070         return ICERR_ERROR;
1071     }
1072 
1073     if (pSC->m_bSecondary) {
1074         pSC->pIOHeader = pSC->m_pNextSC->pIOHeader;
1075         pSC->m_ppBitIO = pSC->m_pNextSC->m_ppBitIO;
1076         pSC->cNumBitIO = pSC->m_pNextSC->cNumBitIO;
1077         pSC->cSB = pSC->m_pNextSC->cSB;
1078         pSC->ppWStream = pSC->m_pNextSC->ppWStream;
1079         pSC->pIndexTable = pSC->m_pNextSC->pIndexTable;
1080         setBitIOPointers(pSC);
1081     }
1082     else {
1083         StrIOEncInit(pSC);
1084         setBitIOPointers(pSC);
1085         WriteWMIHeader(pSC);
1086     }
1087 
1088     return ICERR_OK;
1089 }
1090 
StrEncTerm(CTXSTRCODEC ctxSC)1091 static Int StrEncTerm(CTXSTRCODEC ctxSC)
1092 {
1093     CWMImageStrCodec* pSC = (CWMImageStrCodec*)ctxSC;
1094     size_t j, jend = (pSC->m_pNextSC != NULL);
1095 
1096     for (j = 0; j <= jend; j++) {
1097         if (sizeof(*pSC) != pSC->cbStruct) {
1098             return ICERR_ERROR;
1099         }
1100 
1101         if(pSC->m_bUVResolutionChange){
1102             if(pSC->pResU != NULL)
1103                 free(pSC->pResU);
1104             if(pSC->pResV != NULL)
1105                 free(pSC->pResV);
1106         }
1107 
1108         freePredInfo(pSC);
1109 
1110         if (j == 0)
1111             StrIOEncTerm(pSC);
1112 
1113         FreeCodingContextEnc(pSC);
1114 
1115         freeTileInfo(pSC);
1116 
1117         pSC->WMISCP.nExpBias -= 128; // reset
1118 
1119         pSC = pSC->m_pNextSC;
1120     }
1121 
1122     return 0;
1123 }
1124 
setUniformTiling(U32 * pTile,U32 cNumTile,U32 cNumMB)1125 U32 setUniformTiling(U32 * pTile, U32 cNumTile, U32 cNumMB)
1126 {
1127     U32 i, j;
1128 
1129     while((cNumMB + cNumTile - 1) / cNumTile > 65535) // too few tiles
1130         cNumTile ++;
1131 
1132     for(i = cNumTile, j = cNumMB; i > 1; i --){
1133         pTile[cNumTile - i] = (j + i - 1) / i;
1134         j -= pTile[cNumTile - i];
1135     }
1136 
1137     return cNumTile;
1138 }
1139 
validateTiling(U32 * pTile,U32 cNumTile,U32 cNumMB)1140 U32 validateTiling(U32 * pTile, U32 cNumTile, U32 cNumMB)
1141 {
1142     U32 i, cMBs;
1143 
1144     if(cNumTile == 0)
1145         cNumTile = 1;
1146     if(cNumTile > cNumMB) // too many tiles
1147         cNumTile = 1;
1148     if(cNumTile > MAX_TILES)
1149         cNumTile = MAX_TILES;
1150 
1151     for(i = cMBs = 0; i + 1 < cNumTile; i ++){
1152         if(pTile[i] == 0 || pTile[i] > 65535){ // invalid tile setting, resetting to uniform tiling
1153             cNumTile = setUniformTiling(pTile, cNumTile, cNumMB);
1154             break;
1155         }
1156 
1157         cMBs += pTile[i];
1158 
1159         if(cMBs >= cNumMB){
1160             cNumTile = i + 1;
1161             break;
1162         }
1163     }
1164 
1165     // last tile
1166     if(cNumMB - cMBs > 65536)
1167         cNumTile = setUniformTiling(pTile, cNumTile, cNumMB);
1168 
1169     for(i = 1; i < cNumTile; i ++)
1170         pTile[i] += pTile[i - 1];
1171     for(i = cNumTile - 1; i > 0; i --)
1172         pTile[i] = pTile[i - 1];
1173     pTile[0] = 0;
1174 
1175     return cNumTile;
1176 }
1177 
1178 /*************************************************************************
1179   Validate and adjust input params here
1180 *************************************************************************/
ValidateArgs(CWMImageInfo * pII,CWMIStrCodecParam * pSCP)1181 Int ValidateArgs(CWMImageInfo* pII, CWMIStrCodecParam *pSCP)
1182 {
1183     int i;
1184     Bool bTooNarrowTile = FALSE;
1185 
1186     if(pII->cWidth > (1 << 28) || pII->cHeight > (1 << 28) || pII->cWidth == 0 || pII->cHeight == 0){
1187         printf("Unsurpported image size!\n");
1188         return ICERR_ERROR; // unsurpported image size
1189     }
1190 
1191     if (((pSCP->cfColorFormat == YUV_420) || (pSCP->cfColorFormat == YUV_422)) && (pSCP->olOverlap == OL_TWO) && ((Int)(((U32)pII->cWidth + 15) >> 4) < 2)) {
1192         printf("Image width must be at least 2 MB wide for subsampled chroma and two levels of overlap!\n");
1193         return ICERR_ERROR;
1194     }
1195 
1196     if(pSCP->sbSubband == SB_ISOLATED || pSCP->sbSubband >= SB_MAX) // not allowed
1197         pSCP->sbSubband = SB_ALL;
1198 
1199     if(pII->bdBitDepth == BD_5 && (pII->cfColorFormat != CF_RGB || pII->cBitsPerUnit != 16 || pII->cLeadingPadding != 0)){
1200         printf("Unsupported BD_5 image format!\n");
1201         return ICERR_ERROR; // BD_5 must be compact RGB!
1202     }
1203     if(pII->bdBitDepth == BD_565 && (pII->cfColorFormat != CF_RGB || pII->cBitsPerUnit != 16 || pII->cLeadingPadding != 0)){
1204         printf("Unsupported BD_565 image format!\n");
1205         return ICERR_ERROR; // BD_5 must be compact RGB!
1206     }
1207     if(pII->bdBitDepth == BD_10 && (pII->cfColorFormat != CF_RGB || pII->cBitsPerUnit != 32 || pII->cLeadingPadding != 0)){
1208         printf("Unsupported BD_10 image format!\n");
1209         return ICERR_ERROR; // BD_10 must be compact RGB!
1210     }
1211 
1212     if((pII->bdBitDepth == BD_5 || pII->bdBitDepth == BD_565 || pII->bdBitDepth == BD_10) &&
1213         (pSCP->cfColorFormat != YUV_420 && pSCP->cfColorFormat != YUV_422 && pSCP->cfColorFormat != Y_ONLY))
1214             pSCP->cfColorFormat = YUV_444;
1215 
1216     if(BD_1 == pII->bdBitDepth){ // binary image
1217         if(pII->cfColorFormat != Y_ONLY){
1218             printf("BD_1 image must be black-and white!\n");
1219             return ICERR_ERROR;
1220         }
1221         pSCP->cfColorFormat = Y_ONLY; // can only be black white
1222     }
1223 
1224     if(pSCP->bdBitDepth != BD_LONG)
1225         pSCP->bdBitDepth = BD_LONG; // currently only support 32 bit internally
1226 
1227     if(pSCP->uAlphaMode > 1 && (pII->cfColorFormat == YUV_420 || pII->cfColorFormat == YUV_422
1228 								|| pII->bdBitDepth == BD_5 || pII->bdBitDepth == BD_10
1229 								|| pII->bdBitDepth == BD_1))
1230     {
1231         printf("Alpha is not supported for this pixel format!\n");
1232         return ICERR_ERROR;
1233     }
1234 
1235     if((pSCP->cfColorFormat == YUV_420 || pSCP->cfColorFormat == YUV_422) && (pII->bdBitDepth == BD_16F || pII->bdBitDepth == BD_32F || pII->cfColorFormat == CF_RGBE))
1236     {
1237         printf("Float or RGBE images must be encoded with YUV 444!\n");
1238         return ICERR_ERROR;
1239     }
1240 
1241     // adjust tiling
1242     pSCP->cNumOfSliceMinus1V = validateTiling(pSCP->uiTileX, pSCP->cNumOfSliceMinus1V + 1, (((U32)pII->cWidth + 15) >> 4)) - 1;
1243     pSCP->cNumOfSliceMinus1H = validateTiling(pSCP->uiTileY, pSCP->cNumOfSliceMinus1H + 1, (((U32)pII->cHeight + 15) >> 4)) - 1;
1244 
1245     if (pSCP->bUseHardTileBoundaries && ((pSCP->cfColorFormat == YUV_420) || (pSCP->cfColorFormat == YUV_422)) && (pSCP->olOverlap == OL_TWO)) {
1246         for (i = 1; i < (int) (pSCP->cNumOfSliceMinus1H + 1); i++) {
1247             if ((Int)(pSCP->uiTileY[i] - pSCP->uiTileY[i - 1]) < 2) {
1248                 bTooNarrowTile = TRUE;
1249                 break;
1250             }
1251         }
1252         if ((Int)((((U32)pII->cWidth + 15) >> 4) - pSCP->uiTileY[pSCP->cNumOfSliceMinus1H]) < 2)
1253             bTooNarrowTile = TRUE;
1254     }
1255     if (bTooNarrowTile) {
1256         printf("Tile width must be at least 2 MB wide for hard tiles, subsampled chroma, and two levels of overlap!\n");
1257         return ICERR_ERROR;
1258     }
1259 
1260     if(pSCP->cChannel > MAX_CHANNELS)
1261         return ICERR_ERROR;
1262 
1263     /** supported color transcoding **/
1264     /** ARGB, RGB => YUV_444, YUV_422, YUV_420, Y_ONLY **/
1265     /** YUV_444   =>          YUV_422, YUV_420, Y_ONLY **/
1266     /** YUV_422   =>                   YUV_420, Y_ONLY **/
1267     /** YUV_420   =>                            Y_ONLY **/
1268 
1269     /** unsupported color transcoding       **/
1270     /** Y_ONLY, YUV_420, YUV_422 => YUV_444 **/
1271     /** Y_ONLY, YUV_420          => YUV_422 **/
1272     /** Y_ONLY                   => YUV_420 **/
1273     if((pII->cfColorFormat == Y_ONLY &&  pSCP->cfColorFormat != Y_ONLY) ||
1274         (pSCP->cfColorFormat == YUV_422 && (pII->cfColorFormat == YUV_420 || pII->cfColorFormat == Y_ONLY)) ||
1275         (pSCP->cfColorFormat == YUV_444 && (pII->cfColorFormat == YUV_422 || pII->cfColorFormat == YUV_420 || pII->cfColorFormat == Y_ONLY))){
1276 		pSCP->cfColorFormat = pII->cfColorFormat; // force not to do color transcoding!
1277     }
1278     else if (pII->cfColorFormat == NCOMPONENT) {
1279 		pSCP->cfColorFormat = NCOMPONENT; // force not to do color transcoding!
1280     }
1281     if (CMYK == pII->cfColorFormat && pSCP->cfColorFormat == NCOMPONENT)
1282     {
1283         pSCP->cfColorFormat = CMYK;
1284     }
1285 
1286     if(pSCP->cfColorFormat != NCOMPONENT){
1287         if(pSCP->cfColorFormat == Y_ONLY)
1288             pSCP->cChannel = 1;
1289         else if(pSCP->cfColorFormat == CMYK)
1290             pSCP->cChannel = 4;
1291         else
1292             pSCP->cChannel = 3;
1293     }
1294 
1295     if(pSCP->sbSubband >= SB_MAX)
1296         pSCP->sbSubband = SB_ALL;
1297 
1298 
1299     pII->cChromaCenteringX = 0;
1300     pII->cChromaCenteringY = 0;
1301 
1302     return ICERR_OK;
1303 }
1304 
1305 /*************************************************************************
1306   Initialization of CWMImageStrCodec struct
1307 *************************************************************************/
InitializeStrEnc(CWMImageStrCodec * pSC,const CWMImageInfo * pII,const CWMIStrCodecParam * pSCP)1308 static Void InitializeStrEnc(CWMImageStrCodec *pSC,
1309     const CWMImageInfo* pII, const CWMIStrCodecParam *pSCP)
1310 {
1311     pSC->cbStruct = sizeof(*pSC);
1312     pSC->WMII = *pII;
1313     pSC->WMISCP = *pSCP;
1314 
1315     // set nExpBias
1316     if (pSC->WMISCP.nExpBias == 0)
1317         pSC->WMISCP.nExpBias = 4 + 128;//default
1318     pSC->WMISCP.nExpBias += 128; // rollover arithmetic
1319 
1320     pSC->cRow = 0;
1321     pSC->cColumn = 0;
1322 
1323     pSC->cmbWidth = (pSC->WMII.cWidth + 15) / 16;
1324     pSC->cmbHeight = (pSC->WMII.cHeight + 15) / 16;
1325 
1326     pSC->Load = inputMBRow;
1327     pSC->Quantize = quantizeMacroblock;
1328     pSC->ProcessTopLeft = processMacroblock;
1329     pSC->ProcessTop = processMacroblock;
1330     pSC->ProcessTopRight = processMacroblock;
1331     pSC->ProcessLeft = processMacroblock;
1332     pSC->ProcessCenter = processMacroblock;
1333     pSC->ProcessRight = processMacroblock;
1334     pSC->ProcessBottomLeft = processMacroblock;
1335     pSC->ProcessBottom = processMacroblock;
1336     pSC->ProcessBottomRight = processMacroblock;
1337 
1338     pSC->m_pNextSC = NULL;
1339     pSC->m_bSecondary = FALSE;
1340 }
1341 
1342 /*************************************************************************
1343    Streaming API init
1344 *************************************************************************/
ImageStrEncInit(CWMImageInfo * pII,CWMIStrCodecParam * pSCP,CTXSTRCODEC * pctxSC)1345 Int ImageStrEncInit(
1346     CWMImageInfo* pII,
1347     CWMIStrCodecParam *pSCP,
1348     CTXSTRCODEC* pctxSC)
1349 {
1350     static size_t cbChannels[BD_MAX] = {2, 4};
1351 
1352     size_t cbChannel = 0, cblkChroma = 0, i;
1353     size_t cbMacBlockStride = 0, cbMacBlockChroma = 0, cMacBlock = 0;
1354 
1355     CWMImageStrCodec* pSC = NULL, *pNextSC = NULL;
1356     char* pb = NULL;
1357     size_t cb = 0;
1358     Bool b32bit = sizeof(size_t) == 4;
1359 
1360     Int err;
1361 
1362     if(ValidateArgs(pII, pSCP) != ICERR_OK){
1363         goto ErrorExit;
1364     }
1365 
1366     //================================================
1367     *pctxSC = NULL;
1368 
1369     //================================================
1370     cbChannel = cbChannels[pSCP->bdBitDepth];
1371     cblkChroma = cblkChromas[pSCP->cfColorFormat];
1372     cbMacBlockStride = cbChannel * 16 * 16;
1373     cbMacBlockChroma = cbChannel * 16 * cblkChroma;
1374     cMacBlock = (pII->cWidth + 15) / 16;
1375 
1376     //================================================
1377     cb = sizeof(*pSC) + (128 - 1) + (PACKETLENGTH * 4 - 1) + (PACKETLENGTH * 2 ) + sizeof(*pSC->pIOHeader);
1378     i = cbMacBlockStride + cbMacBlockChroma * (pSCP->cChannel - 1);
1379     if(b32bit) // integer overlow/underflow check for 32-bit system
1380         if(((cMacBlock >> 15) * i) & 0xffff0000)
1381             return ICERR_ERROR;
1382     i *= cMacBlock * 2;
1383     cb += i;
1384 
1385     pb = malloc(cb);
1386     if (NULL == pb)
1387     {
1388         goto ErrorExit;
1389     }
1390     memset(pb, 0, cb);
1391 
1392     //================================================
1393     pSC = (CWMImageStrCodec*)pb; pb += sizeof(*pSC);
1394 
1395     // Set up perf timers
1396     PERFTIMER_ONLY(pSC->m_fMeasurePerf = pSCP->fMeasurePerf);
1397     PERFTIMER_NEW(pSC->m_fMeasurePerf, &pSC->m_ptEndToEndPerf);
1398     PERFTIMER_NEW(pSC->m_fMeasurePerf, &pSC->m_ptEncDecPerf);
1399     PERFTIMER_START(pSC->m_fMeasurePerf, pSC->m_ptEndToEndPerf);
1400     PERFTIMER_START(pSC->m_fMeasurePerf, pSC->m_ptEncDecPerf);
1401     PERFTIMER_COPYSTARTTIME(pSC->m_fMeasurePerf, pSC->m_ptEncDecPerf, pSC->m_ptEndToEndPerf);
1402 
1403     pSC->m_param.cfColorFormat = pSCP->cfColorFormat;
1404     pSC->m_param.bAlphaChannel = (pSCP->uAlphaMode == 3);
1405     pSC->m_param.cNumChannels = pSCP->cChannel;
1406     pSC->m_param.cExtraPixelsTop = pSC->m_param.cExtraPixelsBottom
1407         = pSC->m_param.cExtraPixelsLeft = pSC->m_param.cExtraPixelsRight = 0;
1408 
1409     pSC->cbChannel = cbChannel;
1410 
1411     pSC->m_param.bTranscode = pSC->bTileExtraction = FALSE;
1412 
1413     //================================================
1414     InitializeStrEnc(pSC, pII, pSCP);
1415 
1416     //================================================
1417     // 2 Macro Row buffers for each channel
1418     pb = ALIGNUP(pb, 128);
1419     for (i = 0; i < pSC->m_param.cNumChannels; i++) {
1420         pSC->a0MBbuffer[i] = (PixelI*)pb; pb += cbMacBlockStride * pSC->cmbWidth;
1421         pSC->a1MBbuffer[i] = (PixelI*)pb; pb += cbMacBlockStride * pSC->cmbWidth;
1422         cbMacBlockStride = cbMacBlockChroma;
1423     }
1424 
1425     //================================================
1426     // lay 2 aligned IO buffers just below pIO struct
1427     pb = (char*)ALIGNUP(pb, PACKETLENGTH * 4) + PACKETLENGTH * 2;
1428     pSC->pIOHeader = (BitIOInfo*)pb;
1429 
1430     //================================================
1431     err = StrEncInit(pSC);
1432     if (ICERR_OK != err)
1433         goto ErrorExit;
1434 
1435     // if interleaved alpha is needed
1436     if (pSC->m_param.bAlphaChannel) {
1437         cbMacBlockStride = cbChannel * 16 * 16;
1438         // 1. allocate new pNextSC info
1439         //================================================
1440         cb = sizeof(*pNextSC) + (128 - 1) + cbMacBlockStride * cMacBlock * 2;
1441         pb = malloc(cb);
1442         if (NULL == pb)
1443         {
1444             goto ErrorExit;
1445         }
1446         memset(pb, 0, cb);
1447         //================================================
1448         pNextSC = (CWMImageStrCodec*)pb; pb += sizeof(*pNextSC);
1449 
1450         // 2. initialize pNextSC
1451         pNextSC->m_param.cfColorFormat = Y_ONLY;
1452         pNextSC->m_param.cNumChannels = 1;
1453         pNextSC->m_param.bAlphaChannel = TRUE;
1454         pNextSC->cbChannel = cbChannel;
1455         //================================================
1456 
1457         // 3. initialize arrays
1458         InitializeStrEnc(pNextSC, pII, pSCP);
1459         //================================================
1460 
1461         // 2 Macro Row buffers for each channel
1462         pb = ALIGNUP(pb, 128);
1463         pNextSC->a0MBbuffer[0] = (PixelI*)pb; pb += cbMacBlockStride * pNextSC->cmbWidth;
1464         pNextSC->a1MBbuffer[0] = (PixelI*)pb; pb += cbMacBlockStride * pNextSC->cmbWidth;
1465         //================================================
1466         pNextSC->pIOHeader = pSC->pIOHeader;
1467         //================================================
1468 
1469         // 4. link pSC->pNextSC = pNextSC
1470         pNextSC->m_pNextSC = pSC;
1471         pNextSC->m_bSecondary = TRUE;
1472 
1473         // 5. StrEncInit
1474         StrEncInit(pNextSC);
1475 
1476         // 6. Write header of image plane
1477         WriteImagePlaneHeader(pNextSC);
1478     }
1479 
1480     pSC->m_pNextSC = pNextSC;
1481     //================================================
1482     *pctxSC = (CTXSTRCODEC)pSC;
1483 
1484     writeIndexTableNull(pSC);
1485 #if defined(WMP_OPT_SSE2) || defined(WMP_OPT_CC_ENC) || defined(WMP_OPT_TRFM_ENC)
1486     StrEncOpt(pSC);
1487 #endif // OPT defined
1488 
1489     PERFTIMER_STOP(pSC->m_fMeasurePerf, pSC->m_ptEncDecPerf);
1490     return ICERR_OK;
1491 
1492 ErrorExit:
1493     return ICERR_ERROR;
1494 }
1495 
1496 /*************************************************************************
1497    Streaming API encode
1498 *************************************************************************/
ImageStrEncEncode(CTXSTRCODEC ctxSC,const CWMImageBufferInfo * pBI)1499 Int ImageStrEncEncode(
1500     CTXSTRCODEC ctxSC,
1501     const CWMImageBufferInfo* pBI)
1502 {
1503     CWMImageStrCodec* pSC = (CWMImageStrCodec*)ctxSC;
1504     CWMImageStrCodec* pNextSC = pSC->m_pNextSC;
1505     ImageDataProc ProcessLeft, ProcessCenter, ProcessRight;
1506 
1507     if (sizeof(*pSC) != pSC->cbStruct)
1508     {
1509         return ICERR_ERROR;
1510     }
1511 
1512     //================================
1513     PERFTIMER_START(pSC->m_fMeasurePerf, pSC->m_ptEncDecPerf);
1514 
1515     pSC->WMIBI = *pBI;
1516     pSC->cColumn = 0;
1517     initMRPtr(pSC);
1518     if (pNextSC)
1519         pNextSC->WMIBI = *pBI;
1520 
1521     if (0 == pSC->cRow) {
1522         ProcessLeft = pSC->ProcessTopLeft;
1523         ProcessCenter = pSC->ProcessTop;
1524         ProcessRight = pSC->ProcessTopRight;
1525     }
1526     else {
1527         ProcessLeft = pSC->ProcessLeft;
1528         ProcessCenter = pSC->ProcessCenter;
1529         ProcessRight = pSC->ProcessRight;
1530     }
1531 
1532     if( pSC->Load(pSC) != ICERR_OK )
1533 		return ICERR_ERROR;
1534     if(ProcessLeft(pSC) != ICERR_OK)
1535         return ICERR_ERROR;
1536     advanceMRPtr(pSC);
1537 
1538     //================================
1539     for (pSC->cColumn = 1; pSC->cColumn < pSC->cmbWidth; ++pSC->cColumn) {
1540         if(ProcessCenter(pSC) != ICERR_OK)
1541             return ICERR_ERROR;
1542         advanceMRPtr(pSC);
1543     }
1544 
1545     //================================
1546     if(ProcessRight(pSC) != ICERR_OK)
1547         return ICERR_ERROR;
1548     if (pSC->cRow)
1549         advanceOneMBRow(pSC);
1550 
1551     ++pSC->cRow;
1552     swapMRPtr(pSC);
1553 
1554     PERFTIMER_STOP(pSC->m_fMeasurePerf, pSC->m_ptEncDecPerf);
1555     return ICERR_OK;
1556 }
1557 
1558 /*************************************************************************
1559    Streaming API term
1560 *************************************************************************/
ImageStrEncTerm(CTXSTRCODEC ctxSC)1561 Int ImageStrEncTerm(
1562     CTXSTRCODEC ctxSC)
1563 {
1564     CWMImageStrCodec* pSC = (CWMImageStrCodec*)ctxSC;
1565     // CWMImageStrCodec *pNextSC = pSC->m_pNextSC;
1566 
1567     if (sizeof(*pSC) != pSC->cbStruct)
1568     {
1569         return ICERR_ERROR;
1570     }
1571 
1572     //================================
1573     PERFTIMER_START(pSC->m_fMeasurePerf, pSC->m_ptEncDecPerf);
1574     pSC->cColumn = 0;
1575     initMRPtr(pSC);
1576 
1577     pSC->ProcessBottomLeft(pSC);
1578     advanceMRPtr(pSC);
1579 
1580     //================================
1581     for (pSC->cColumn = 1; pSC->cColumn < pSC->cmbWidth; ++pSC->cColumn) {
1582         pSC->ProcessBottom(pSC);
1583         advanceMRPtr(pSC);
1584     }
1585 
1586     //================================
1587     pSC->ProcessBottomRight(pSC);
1588 
1589     //================================
1590     StrEncTerm(pSC);
1591 
1592     PERFTIMER_STOP(pSC->m_fMeasurePerf, pSC->m_ptEncDecPerf);
1593     PERFTIMER_STOP(pSC->m_fMeasurePerf, pSC->m_ptEndToEndPerf);
1594     PERFTIMER_REPORT(pSC->m_fMeasurePerf, pSC);
1595     PERFTIMER_DELETE(pSC->m_fMeasurePerf, pSC->m_ptEncDecPerf);
1596     PERFTIMER_DELETE(pSC->m_fMeasurePerf, pSC->m_ptEndToEndPerf);
1597 
1598     free(pSC);
1599     return ICERR_OK;
1600 }
1601 
1602 // centralized UV downsampling
1603 #define DF_ODD ((((d1 + d2 + d3) << 2) + (d2 << 1) + d0 + d4 + 8) >> 4)
downsampleUV(CWMImageStrCodec * pSC)1604 Void downsampleUV(CWMImageStrCodec * pSC)
1605 {
1606     const COLORFORMAT cfInt = pSC->m_param.cfColorFormat;
1607     const COLORFORMAT cfExt = pSC->WMII.cfColorFormat;
1608     PixelI * pSrc, * pDst;
1609     PixelI d0, d1, d2, d3, d4;
1610     size_t iChannel, iRow, iColumn;
1611 
1612     for(iChannel = 1; iChannel < 3; iChannel ++){
1613         if(cfExt != YUV_422){ // need to do horizontal downsampling, 444 => 422
1614             const size_t cShift = (cfInt == YUV_422 ? 1 : 0);
1615 
1616             pSrc = (iChannel == 1 ? pSC->pResU : pSC->pResV);
1617             pDst = (cfInt == YUV_422 ? pSC->p1MBbuffer[iChannel] : pSrc);
1618 
1619             for(iRow = 0; iRow < 16; iRow ++){
1620                 d0 = d4 = pSrc[idxCC[iRow][2]], d1 = d3 = pSrc[idxCC[iRow][1]], d2 = pSrc[idxCC[iRow][0]]; // left boundary
1621 
1622                 for(iColumn = 0; iColumn + 2 < pSC->cmbWidth * 16; iColumn += 2){
1623                     pDst[((iColumn >> 4) << (8 - cShift)) + idxCC[iRow][(iColumn & 15) >> cShift]] = DF_ODD;
1624                     d0 = d2, d1 = d3, d2 = d4;
1625                     d3 = pSrc[(((iColumn + 3) >> 4) << 8) + idxCC[iRow][(iColumn + 3) & 0xf]];
1626                     d4 = pSrc[(((iColumn + 4) >> 4) << 8) + idxCC[iRow][(iColumn + 4) & 0xf]];
1627                 }
1628 
1629                 d4 = d2; // right boundary
1630                 pDst[((iColumn >> 4) << (8 - cShift)) + idxCC[iRow][(iColumn & 15) >> cShift]] = DF_ODD;
1631             }
1632         }
1633 
1634         if(cfInt == YUV_420){ // need to do vertical downsampling
1635             const size_t cShift = (cfExt == YUV_422 ? 0 : 1);
1636             PixelI * pBuf[4];
1637             size_t mbOff, pxOff;
1638 
1639             pDst = pSC->p1MBbuffer[iChannel];
1640             pSrc = (iChannel == 1 ? pSC->pResU : pSC->pResV);
1641             pBuf[0] = pSrc + (pSC->cmbWidth << (cfExt == YUV_422 ? 7 : 8));
1642             pBuf[1] = pBuf[0] + pSC->cmbWidth * 8, pBuf[2] = pBuf[1] + pSC->cmbWidth * 8, pBuf[3] = pBuf[2] + pSC->cmbWidth * 8;
1643 
1644             for(iColumn = 0; iColumn < pSC->cmbWidth * 8; iColumn ++){
1645                 mbOff = (iColumn >> 3) << (7 + cShift);
1646                 pxOff = (iColumn & 7) << cShift;
1647 
1648                 if(pSC->cRow == 0) // top image boundary
1649                     d0 = d4 = pSrc[mbOff + idxCC[2][pxOff]], d1 = d3 = pSrc[mbOff + idxCC[1][pxOff]], d2 = pSrc[mbOff + idxCC[0][pxOff]]; // top MB boundary
1650                 else{
1651                     // last row of previous MB row
1652                     d0 = pBuf[0][iColumn], d1 = pBuf[1][iColumn], d2 = pBuf[2][iColumn], d3 = pBuf[3][iColumn], d4 = pSrc[mbOff + idxCC[0][pxOff]];
1653                     pSC->p0MBbuffer[iChannel][((iColumn >> 3) << 6) + idxCC_420[7][iColumn & 7]] = DF_ODD;
1654 
1655                     // for first row of current MB
1656                     d0 = pBuf[2][iColumn], d1 = pBuf[3][iColumn];
1657                     d2 = pSrc[mbOff + idxCC[0][pxOff]], d3 = pSrc[mbOff + idxCC[1][pxOff]], d4 = pSrc[mbOff + idxCC[2][pxOff]];
1658                 }
1659 
1660                 for(iRow = 0; iRow < 12; iRow += 2){
1661                     pDst[((iColumn >> 3) << 6) + idxCC_420[iRow >> 1][iColumn & 7]] = DF_ODD;
1662                     d0 = d2, d1 = d3, d2 = d4;
1663                     d3 = pSrc[mbOff + idxCC[iRow + 3][pxOff]];
1664                     d4 = pSrc[mbOff + idxCC[iRow + 4][pxOff]];
1665                 }
1666 
1667                 //last row of current MB
1668                 pDst[((iColumn >> 3) << 6) + idxCC_420[6][iColumn & 7]] = DF_ODD;
1669                 d0 = d2, d1 = d3, d2 = d4;
1670                 d3 = pSrc[mbOff + idxCC[iRow + 3][pxOff]];
1671 
1672                 if(pSC->cRow + 1 == pSC->cmbHeight){ // bottom image boundary
1673                     d4 = d2;
1674                     pDst[((iColumn >> 3) << 6) + idxCC_420[7][iColumn & 7]] = DF_ODD;
1675                 }
1676                 else{
1677                     for(iRow = 0; iRow < 4; iRow ++)
1678                         pBuf[iRow][iColumn] = pSrc[mbOff + idxCC[iRow + 12][pxOff]];
1679                 }
1680             }
1681         }
1682     }
1683 }
1684 
1685 // centralized horizontal padding
padHorizontally(CWMImageStrCodec * pSC)1686 Void padHorizontally(CWMImageStrCodec * pSC)
1687 {
1688     if(pSC->WMII.cWidth != pSC->cmbWidth * 16){ // horizontal padding is necessary!
1689         const COLORFORMAT cfExt = pSC->WMISCP.bYUVData ?
1690             pSC->m_param.cfColorFormat : pSC->WMII.cfColorFormat;
1691         size_t cFullChannel = pSC->WMISCP.cChannel;
1692         size_t iLast = pSC->WMII.cWidth - 1;
1693         PixelI * pCh[16];
1694         size_t iChannel, iColumn, iRow;
1695 
1696         if(cfExt == YUV_420 || cfExt == YUV_422 || cfExt == Y_ONLY)
1697             cFullChannel = 1;
1698 
1699         assert(cFullChannel <= 16);
1700 
1701         assert(pSC->WMISCP.cChannel <= 16);
1702         for(iChannel = 0; iChannel < pSC->WMISCP.cChannel; iChannel ++)
1703             pCh[iChannel & 15] = pSC->p1MBbuffer[iChannel & 15];
1704 
1705         if(pSC->m_bUVResolutionChange)
1706             pCh[1] = pSC->pResU, pCh[2] = pSC->pResV;
1707 
1708         // pad full resoluton channels
1709         for(iRow = 0; iRow < 16; iRow ++){
1710             const size_t iPosLast = ((iLast >> 4) << 8) + idxCC[iRow][iLast & 0xf];
1711             for(iColumn = iLast + 1; iColumn < pSC->cmbWidth * 16; iColumn ++){
1712                 const size_t iPos = ((iColumn >> 4) << 8) + idxCC[iRow][iColumn & 0xf];
1713                 for(iChannel = 0; iChannel < cFullChannel; iChannel ++)
1714                     pCh[iChannel & 15][iPos] = pCh[iChannel & 15][iPosLast];
1715             }
1716         }
1717 
1718         if(cfExt == YUV_422) // pad YUV_422 UV
1719             for(iLast >>= 1, iRow = 0; iRow < 16; iRow ++){
1720                 const size_t iPosLast = ((iLast >> 3) << 7) + idxCC[iRow][iLast & 7];
1721                 for(iColumn = iLast + 1; iColumn < pSC->cmbWidth * 8; iColumn ++){
1722                     const size_t iPos = ((iColumn >> 3) << 7) + idxCC[iRow][iColumn & 7];
1723                     for(iChannel = 1; iChannel < 3; iChannel ++)
1724                         pCh[iChannel][iPos] = pCh[iChannel][iPosLast];
1725                 }
1726             }
1727         else if(cfExt == YUV_420) // pad YUV_420 UV
1728             for(iLast >>= 1, iRow = 0; iRow < 8; iRow ++){
1729                 const size_t iPosLast = ((iLast >> 3) << 6) + idxCC_420[iRow][iLast & 7];
1730                 for(iColumn = iLast + 1; iColumn < pSC->cmbWidth * 8; iColumn ++){
1731                     const size_t iPos = ((iColumn >> 3) << 6) + idxCC_420[iRow][iColumn & 7];
1732                     for(iChannel = 1; iChannel < 3; iChannel ++)
1733                         pCh[iChannel][iPos] = pCh[iChannel][iPosLast];
1734                 }
1735             }
1736     }
1737 }
1738 
1739 // centralized alpha channel color conversion, small perf penalty
inputMBRowAlpha(CWMImageStrCodec * pSC)1740 Int inputMBRowAlpha(CWMImageStrCodec* pSC)
1741 {
1742     if(pSC->m_bSecondary == FALSE && pSC->m_pNextSC != NULL){ // alpha channel is present
1743         const size_t cShift = (pSC->m_pNextSC->m_param.bScaledArith ? (SHIFTZERO + QPFRACBITS) : 0);
1744         const BITDEPTH_BITS bdExt = pSC->WMII.bdBitDepth;
1745         const size_t iAlphaPos = pSC->WMII.cLeadingPadding + (pSC->WMII.cfColorFormat == CMYK ? 4 : 3);//only RGB and CMYK may have interleaved alpha
1746         const size_t cRow = pSC->WMIBI.cLine;
1747         const size_t cColumn = pSC->WMII.cWidth;
1748         const U8 * pSrc0 = (U8 *)pSC->WMIBI.pv;
1749         PixelI * pA = pSC->m_pNextSC->p1MBbuffer[0];
1750         size_t iRow, iColumn;
1751 
1752         for(iRow = 0; iRow < 16; iRow ++){
1753             if(bdExt == BD_8){
1754                 const size_t cStride = (pSC->WMII.cBitsPerUnit >> 3);
1755                 const U8 * pSrc = pSrc0;
1756 
1757                 for(iColumn = 0; iColumn < cColumn; iColumn ++, pSrc += cStride)
1758                     pA[((iColumn >> 4) << 8) + idxCC[iRow][iColumn & 0xf]] = ((PixelI)pSrc[iAlphaPos] - (1 << 7)) << cShift;
1759             }
1760             else if(bdExt == BD_16){
1761                 const size_t cStride = (pSC->WMII.cBitsPerUnit >> 3) / sizeof(U16);
1762                 const U8 nLenMantissaOrShift = pSC->m_pNextSC->WMISCP.nLenMantissaOrShift;
1763                 const U16 * pSrc = (U16 *)pSrc0;
1764 
1765                 for(iColumn = 0; iColumn < cColumn; iColumn ++, pSrc += cStride)
1766                     pA[((iColumn >> 4) << 8) + idxCC[iRow][iColumn & 0xf]] = ((((PixelI)pSrc[iAlphaPos] - (1 << 15)) >> nLenMantissaOrShift) << cShift);
1767             }
1768             else if(bdExt == BD_16S){
1769                 const size_t cStride = (pSC->WMII.cBitsPerUnit >> 3) / sizeof(I16);
1770                 const U8 nLenMantissaOrShift = pSC->m_pNextSC->WMISCP.nLenMantissaOrShift;
1771                 const I16 * pSrc = (I16 *)pSrc0;
1772 
1773                 for(iColumn = 0; iColumn < cColumn; iColumn ++, pSrc += cStride)
1774                     pA[((iColumn >> 4) << 8) + idxCC[iRow][iColumn & 0xf]] = (((PixelI)pSrc[iAlphaPos] >> nLenMantissaOrShift) << cShift);
1775             }
1776             else if(bdExt == BD_16F){
1777                 const size_t cStride = (pSC->WMII.cBitsPerUnit >> 3) / sizeof(U16);
1778                 const I16 * pSrc = (I16 *)pSrc0;
1779 
1780                 for(iColumn = 0; iColumn < cColumn; iColumn ++, pSrc += cStride)
1781                     pA[((iColumn >> 4) << 8) + idxCC[iRow][iColumn & 0xf]] = forwardHalf (pSrc[iAlphaPos]) << cShift;
1782             }
1783             else if(bdExt == BD_32S){
1784                 const size_t cStride = (pSC->WMII.cBitsPerUnit >> 3) / sizeof(I32);
1785                 const U8 nLenMantissaOrShift = pSC->m_pNextSC->WMISCP.nLenMantissaOrShift;
1786                 const I32 * pSrc = (I32 *)pSrc0;
1787 
1788                 for(iColumn = 0; iColumn < cColumn; iColumn ++, pSrc += cStride)
1789                     pA[((iColumn >> 4) << 8) + idxCC[iRow][iColumn & 0xf]] = (((PixelI)pSrc[iAlphaPos] >> nLenMantissaOrShift) << cShift);
1790             }
1791             else if(bdExt == BD_32F){
1792                 const size_t cStride = (pSC->WMII.cBitsPerUnit >> 3) / sizeof(float);
1793                 const U8 nLen = pSC->m_pNextSC->WMISCP.nLenMantissaOrShift;
1794                 const I8 nExpBias = pSC->m_pNextSC->WMISCP.nExpBias;
1795                 const float * pSrc = (float *)pSrc0;
1796 
1797                 for(iColumn = 0; iColumn < cColumn; iColumn ++, pSrc += cStride)
1798                     pA[((iColumn >> 4) << 8) + idxCC[iRow][iColumn & 0xf]] = float2pixel (pSrc[iAlphaPos], nExpBias, nLen) << cShift;
1799             }
1800             else // not supported
1801                 return ICERR_ERROR;
1802 
1803             if(iRow + 1 < cRow) // vertical padding!
1804                 pSrc0 += pSC->WMIBI.cbStride;
1805 
1806             for(iColumn = cColumn; iColumn < pSC->cmbWidth * 16; iColumn ++) // horizontal padding
1807                 pA[((iColumn >> 4) << 8) + idxCC[iRow][iColumn & 0xf]] =  pA[(((cColumn - 1) >> 4) << 8) + idxCC[iRow][(cColumn - 1) & 0xf]];
1808         }
1809     }
1810 
1811     return ICERR_OK;
1812 }
1813 
1814 // input one MB row of image data from input buffer
inputMBRow(CWMImageStrCodec * pSC)1815 Int inputMBRow(CWMImageStrCodec* pSC)
1816 {
1817     const size_t cShift = (pSC->m_param.bScaledArith ? (SHIFTZERO + QPFRACBITS) : 0);
1818     const BITDEPTH_BITS bdExt = pSC->WMII.bdBitDepth;
1819     COLORFORMAT cfExt = pSC->WMII.cfColorFormat;
1820     const COLORFORMAT cfInt = pSC->m_param.cfColorFormat;
1821     const size_t cPixelStride = (pSC->WMII.cBitsPerUnit >> 3);
1822     const size_t iRowStride =
1823 		(cfExt == YUV_420 || (pSC->WMISCP.bYUVData && pSC->m_param.cfColorFormat==YUV_420)) ? 2 : 1;
1824     const size_t cRow = pSC->WMIBI.cLine;
1825     const size_t cColumn = pSC->WMII.cWidth;
1826 	const size_t iB = (pSC->WMII.bRGB ? 2 : 0);
1827     const size_t iR = 2 - iB;
1828     const U8 * pSrc0 = (U8 *)pSC->WMIBI.pv;
1829     const U8 nLen = pSC->WMISCP.nLenMantissaOrShift;
1830     const I8 nExpBias = pSC->WMISCP.nExpBias;
1831 
1832     PixelI *pY = pSC->p1MBbuffer[0], *pU = pSC->p1MBbuffer[1], *pV = pSC->p1MBbuffer[2];
1833     size_t iRow, iColumn, iPos;
1834 
1835     // guard input buffer
1836     if(checkImageBuffer(pSC, cColumn, cRow) != ICERR_OK)
1837         return ICERR_ERROR;
1838 
1839     if(pSC->m_bUVResolutionChange)  // will do downsampling somewhere else!
1840         pU = pSC->pResU, pV = pSC->pResV;
1841     else if(cfInt == Y_ONLY) // xxx to Y_ONLY transcoding!
1842         pU = pV = pY; // write pY AFTER pU and pV so Y will overwrite U&V
1843 
1844     for(iRow = 0; iRow < 16; iRow += iRowStride){
1845         if (pSC->WMISCP.bYUVData){
1846             I32 * pSrc = (I32 *)pSrc0 + pSC->WMII.cLeadingPadding;
1847 
1848             switch(pSC->m_param.cfColorFormat){
1849             case Y_ONLY:
1850             case YUV_444:
1851             case NCOMPONENT:
1852                 {
1853                     const size_t cChannel = pSC->m_param.cNumChannels;
1854                     PixelI * pChannel[16];
1855                     size_t iChannel;
1856 
1857                     assert(cChannel <= 16);
1858                     for(iChannel = 0; iChannel < cChannel; iChannel ++)
1859                         pChannel[iChannel & 15] = pSC->p1MBbuffer[iChannel & 15];
1860                     if(pSC->m_bUVResolutionChange)
1861                         pChannel[1] = pSC->pResU, pChannel[2] = pSC->pResV;
1862 
1863                     for(iColumn = 0; iColumn < cColumn; iColumn ++, pSrc += cChannel){
1864                         iPos = ((iColumn >> 4) << 8) + idxCC[iRow][iColumn & 0xf];
1865                         for(iChannel = 0; iChannel < cChannel; iChannel ++)
1866                             pChannel[iChannel & 15][iPos] = (PixelI)pSrc[iChannel & 15];
1867                     }
1868                 }
1869                 break;
1870 
1871             case YUV_422:
1872                 for(iColumn = 0; iColumn < cColumn; iColumn += 2, pSrc += 4){
1873                     if(cfInt != Y_ONLY){
1874                         iPos = ((iColumn >> 4) << 7) + idxCC[iRow][(iColumn >> 1) & 7];
1875                         pU[iPos] = (PixelI)pSrc[0];
1876                         pV[iPos] = (PixelI)pSrc[2];
1877                     }
1878 
1879                     pY[((iColumn >> 4) << 8) + idxCC[iRow][iColumn & 15]] = (PixelI)pSrc[1];
1880                     pY[(((iColumn + 1) >> 4) << 8) + idxCC[iRow][(iColumn + 1) & 15]] = (PixelI)pSrc[3];
1881                 }
1882                 break;
1883 
1884             case YUV_420:
1885                 for(iColumn = 0; iColumn < cColumn; iColumn += 2, pSrc += 6){
1886                     if(cfInt != Y_ONLY){
1887                         iPos = ((iColumn >> 4) << 6) + idxCC_420[iRow >> 1][(iColumn >> 1) & 7];
1888                         pU[iPos] = (PixelI)pSrc[4];
1889                         pV[iPos] = (PixelI)pSrc[5];
1890                     }
1891 
1892                     pY[((iColumn >> 4) << 8) + idxCC[iRow][iColumn & 15]] = (PixelI)pSrc[0];
1893                     pY[(((iColumn + 1) >> 4) << 8) + idxCC[iRow][(iColumn + 1) & 15]] = (PixelI)pSrc[1];
1894                     pY[((iColumn >> 4) << 8) + idxCC[iRow + 1][iColumn & 15]] = (PixelI)pSrc[2];
1895                     pY[(((iColumn + 1) >> 4) << 8) + idxCC[iRow + 1][(iColumn + 1) & 15]] = (PixelI)pSrc[3];
1896                 }
1897                 break;
1898 
1899             default:
1900                 assert(0);
1901                 break;
1902             }
1903         }
1904         else if(bdExt == BD_8){
1905             const U8 * pSrc = pSrc0 + pSC->WMII.cLeadingPadding;
1906             const PixelI iOffset = (128 << cShift);
1907 
1908             switch(cfExt){
1909                 case CF_RGB:
1910                     assert (pSC->m_bSecondary == FALSE);
1911 					for(iColumn = 0; iColumn < cColumn; iColumn ++, pSrc += cPixelStride){
1912 						PixelI r = ((PixelI)pSrc[iR]) << cShift, g = ((PixelI)pSrc[1]) << cShift, b = ((PixelI)pSrc[iB]) << cShift;
1913 
1914 						_CC(r, g, b); // color conversion
1915 
1916 						iPos = ((iColumn >> 4) << 8) + idxCC[iRow][iColumn & 0xf];
1917 						pU[iPos] = -r, pV[iPos] = b, pY[iPos] = g - iOffset;
1918 					}
1919                     break;
1920 
1921                 case Y_ONLY:
1922                 case YUV_444:
1923                 case NCOMPONENT:
1924                 {
1925                     const size_t cChannel = pSC->m_param.cNumChannels;
1926                     PixelI * pChannel[16];
1927                     size_t iChannel;
1928 
1929                     assert(cChannel <= 16);
1930                     for(iChannel = 0; iChannel < cChannel; iChannel ++)
1931                         pChannel[iChannel & 15] = pSC->p1MBbuffer[iChannel & 15];
1932                     if(pSC->m_bUVResolutionChange)
1933                         pChannel[1] = pSC->pResU, pChannel[2] = pSC->pResV;
1934 
1935                     for(iColumn = 0; iColumn < cColumn; iColumn ++, pSrc += cPixelStride){
1936                         iPos = ((iColumn >> 4) << 8) + idxCC[iRow][iColumn & 0xf];
1937                         for(iChannel = 0; iChannel < cChannel; iChannel ++)
1938                             pChannel[iChannel & 15][iPos] = (((PixelI)pSrc[iChannel & 15]) << cShift) - iOffset;
1939                     }
1940                     break;
1941                 }
1942 
1943                 case CF_RGBE:
1944                     for(iColumn = 0; iColumn < cColumn; iColumn ++, pSrc += cPixelStride){
1945                         PixelI iExp = (PixelI)pSrc[3];
1946                         PixelI r = forwardRGBE (pSrc[0], iExp) << cShift;
1947                         PixelI g = forwardRGBE (pSrc[1], iExp) << cShift;
1948                         PixelI b = forwardRGBE (pSrc[2], iExp) << cShift;
1949 
1950                         _CC(r, g, b);
1951 
1952                         iPos = ((iColumn >> 4) << 8) + idxCC[iRow][iColumn & 0xf];
1953                         pU[iPos] = -r, pV[iPos] = b, pY[iPos] = g;
1954                     }
1955                     break;
1956 
1957                 case CMYK:
1958                 {
1959                     PixelI * pK = (cfInt == CMYK ? pSC->p1MBbuffer[3] : pY); // CMYK -> YUV_xxx transcoding!
1960 
1961                     for(iColumn = 0; iColumn < cColumn; iColumn ++, pSrc += cPixelStride){
1962                         PixelI c = ((PixelI)pSrc[0]) << cShift;
1963                         PixelI m = ((PixelI)pSrc[1]) << cShift;
1964                         PixelI y = ((PixelI)pSrc[2]) << cShift;
1965                         PixelI k = ((PixelI)pSrc[3]) << cShift;
1966 
1967                         _CC_CMYK(c, m, y, k);
1968 
1969                         iPos = ((iColumn >> 4) << 8) + idxCC[iRow][iColumn & 0xf];
1970                         pU[iPos] = c, pV[iPos] = -y, pK[iPos] = k, pY[iPos] = iOffset - m;
1971                     }
1972                     break;
1973                 }
1974 
1975                 case YUV_422:
1976                     for(iColumn = 0; iColumn < cColumn; iColumn += 2, pSrc += cPixelStride){
1977                         if(cfInt != Y_ONLY){
1978                             iPos = ((iColumn >> 4) << 7) + idxCC[iRow][(iColumn >> 1) & 7];
1979                             pU[iPos] = (((PixelI)pSrc[0]) << cShift) - iOffset;
1980                             pV[iPos] = (((PixelI)pSrc[2]) << cShift) - iOffset;
1981                         }
1982 
1983                         pY[((iColumn >> 4) << 8) + idxCC[iRow][iColumn & 15]] = (((PixelI)pSrc[1]) << cShift) - iOffset;
1984                         pY[(((iColumn + 1) >> 4) << 8) + idxCC[iRow][(iColumn + 1) & 15]] = (((PixelI)pSrc[3]) << cShift) - iOffset;
1985                     }
1986                     break;
1987 
1988                 case YUV_420:
1989                     for(iColumn = 0; iColumn < cColumn; iColumn += 2, pSrc += cPixelStride){
1990                         if(cfInt != Y_ONLY){
1991                             iPos = ((iColumn >> 4) << 6) + idxCC_420[iRow >> 1][(iColumn >> 1) & 7];
1992                             pU[iPos] = (((PixelI)pSrc[4]) << cShift) - iOffset;
1993                             pV[iPos] = (((PixelI)pSrc[5]) << cShift) - iOffset;
1994                         }
1995 
1996                         pY[((iColumn >> 4) << 8) + idxCC[iRow][iColumn & 15]] = (((PixelI)pSrc[0]) << cShift) - iOffset;
1997                         pY[(((iColumn + 1) >> 4) << 8) + idxCC[iRow][(iColumn + 1) & 15]] = (((PixelI)pSrc[1]) << cShift) - iOffset;
1998                         pY[((iColumn >> 4) << 8) + idxCC[iRow + 1][iColumn & 15]] = (((PixelI)pSrc[2]) << cShift) - iOffset;
1999                         pY[(((iColumn + 1) >> 4) << 8) + idxCC[iRow + 1][(iColumn + 1) & 15]] = (((PixelI)pSrc[3]) << cShift) - iOffset;
2000                     }
2001                     break;
2002 
2003                 default:
2004                     assert(0);
2005                     break;
2006             }
2007         }
2008         else if(bdExt == BD_16){
2009             const U16 * pSrc = (U16 *)pSrc0 + pSC->WMII.cLeadingPadding;
2010             const size_t cStride = cPixelStride / sizeof(U16);
2011             const PixelI iOffset = ((1 << 15) >> nLen) << cShift;
2012 
2013             switch(cfExt){
2014                 case CF_RGB:
2015                     for(iColumn = 0; iColumn < cColumn; iColumn ++, pSrc += cStride){
2016                         PixelI r = ((PixelI)pSrc[0] >> nLen) << cShift, g = ((PixelI)pSrc[1] >> nLen) << cShift, b = ((PixelI)pSrc[2] >> nLen) << cShift;
2017 
2018                         _CC(r, g, b); // color conversion
2019 
2020                         iPos = ((iColumn >> 4) << 8) + idxCC[iRow][iColumn & 0xf];
2021                         pU[iPos] = -r, pV[iPos] = b, pY[iPos] = g - iOffset;
2022                     }
2023                     break;
2024 
2025                 case Y_ONLY:
2026                 case YUV_444:
2027                 case NCOMPONENT:
2028                 {
2029                     const size_t cChannel = pSC->WMISCP.cChannel;
2030                     size_t iChannel;
2031 
2032                     for(iColumn = 0; iColumn < cColumn; iColumn ++, pSrc += cStride){
2033                         iPos = ((iColumn >> 4) << 8) + idxCC[iRow][iColumn & 0xf];
2034                         for(iChannel = 0; iChannel < cChannel; iChannel ++)
2035                             pSC->p1MBbuffer[iChannel][iPos] = (((PixelI)pSrc[iChannel] >> nLen) << cShift) - iOffset;
2036                     }
2037                     break;
2038                 }
2039 
2040                 case CMYK:
2041                 {
2042                     PixelI * pK = (cfInt == CMYK ? pSC->p1MBbuffer[3] : pY); // CMYK -> YUV_xxx transcoding!
2043 
2044                     for(iColumn = 0; iColumn < cColumn; iColumn ++, pSrc += cStride){
2045                         PixelI c = ((PixelI)pSrc[0] >> nLen) << cShift;
2046                         PixelI m = ((PixelI)pSrc[1] >> nLen) << cShift;
2047                         PixelI y = ((PixelI)pSrc[2] >> nLen) << cShift;
2048                         PixelI k = ((PixelI)pSrc[3] >> nLen) << cShift;
2049 
2050                         _CC_CMYK(c, m, y, k);
2051 
2052                         iPos = ((iColumn >> 4) << 8) + idxCC[iRow][iColumn & 0xf];
2053                         pU[iPos] = c, pV[iPos] = -y, pK[iPos] = k, pY[iPos] = iOffset - m;
2054                     }
2055                     break;
2056                 }
2057 
2058                 case YUV_422:
2059                     for(iColumn = 0; iColumn < cColumn; iColumn += 2, pSrc += cStride){
2060                         if(cfInt != Y_ONLY){
2061                             iPos = ((iColumn >> 4) << 7) + idxCC[iRow][(iColumn >> 1) & 7];
2062                             pU[iPos] = (((PixelI)pSrc[0]) << cShift) - iOffset;
2063                             pV[iPos] = (((PixelI)pSrc[2]) << cShift) - iOffset;
2064                         }
2065 
2066                         pY[((iColumn >> 4) << 8) + idxCC[iRow][iColumn & 15]] = (((PixelI)pSrc[1]) << cShift) - iOffset;
2067                         pY[(((iColumn + 1) >> 4) << 8) + idxCC[iRow][(iColumn + 1) & 15]] = (((PixelI)pSrc[3]) << cShift) - iOffset;
2068                     }
2069                     break;
2070 
2071                 case YUV_420:
2072                     for(iColumn = 0; iColumn < cColumn; iColumn += 2, pSrc += cStride){
2073                         if(cfInt != Y_ONLY){
2074                             iPos = ((iColumn >> 4) << 6) + idxCC_420[iRow >> 1][(iColumn >> 1) & 7];
2075                             pU[iPos] = (((PixelI)pSrc[4]) << cShift) - iOffset;
2076                             pV[iPos] = (((PixelI)pSrc[5]) << cShift) - iOffset;
2077                         }
2078 
2079                         pY[((iColumn >> 4) << 8) + idxCC[iRow][iColumn & 15]] = (((PixelI)pSrc[0]) << cShift) - iOffset;
2080                         pY[(((iColumn + 1) >> 4) << 8) + idxCC[iRow][(iColumn + 1) & 15]] = (((PixelI)pSrc[1]) << cShift) - iOffset;
2081                         pY[((iColumn >> 4) << 8) + idxCC[iRow + 1][iColumn & 15]] = (((PixelI)pSrc[2]) << cShift) - iOffset;
2082                         pY[(((iColumn + 1) >> 4) << 8) + idxCC[iRow + 1][(iColumn + 1) & 15]] = (((PixelI)pSrc[3]) << cShift) - iOffset;
2083                     }
2084                     break;
2085 
2086                 default:
2087                     assert(0);
2088                     break;
2089             }
2090         }
2091         else if(bdExt == BD_16S){
2092             const I16 * pSrc = (I16 *)pSrc0 + pSC->WMII.cLeadingPadding;
2093             const size_t cStride = cPixelStride / sizeof(I16);
2094 
2095             switch(cfExt){
2096                 case CF_RGB:
2097                     for(iColumn = 0; iColumn < cColumn; iColumn ++, pSrc += cStride){
2098                         PixelI r = ((PixelI)pSrc[0] >> nLen) << cShift, g = ((PixelI)pSrc[1] >> nLen) << cShift, b = ((PixelI)pSrc[2] >> nLen) << cShift;
2099 
2100                         _CC(r, g, b); // color conversion
2101 
2102                         iPos = ((iColumn >> 4) << 8) + idxCC[iRow][iColumn & 0xf];
2103                         pU[iPos] = -r, pV[iPos] = b, pY[iPos] = g;
2104                     }
2105                     break;
2106 
2107                 case Y_ONLY:
2108                 case YUV_444:
2109                 case NCOMPONENT:
2110 					{
2111 						const size_t cChannel = pSC->WMISCP.cChannel;
2112 						size_t iChannel;
2113 
2114 						for(iColumn = 0; iColumn < cColumn; iColumn ++, pSrc += cStride){
2115 							iPos = ((iColumn >> 4) << 8) + idxCC[iRow][iColumn & 0xf];
2116 							for(iChannel = 0; iChannel < cChannel; iChannel ++)
2117 								pSC->p1MBbuffer[iChannel][iPos] = (((PixelI)pSrc[iChannel] >> nLen) << cShift);
2118 						}
2119 					}
2120 				    break;
2121 
2122                 case CMYK:
2123 					{
2124 						PixelI * pK = (cfInt == CMYK ? pSC->p1MBbuffer[3] : pY); // CMYK -> YUV_xxx transcoding!
2125 
2126 						for(iColumn = 0; iColumn < cColumn; iColumn ++, pSrc += cStride){
2127 							PixelI c = ((PixelI)pSrc[0] >> nLen) << cShift;
2128 							PixelI m = ((PixelI)pSrc[1] >> nLen) << cShift;
2129 							PixelI y = ((PixelI)pSrc[2] >> nLen) << cShift;
2130 							PixelI k = ((PixelI)pSrc[3] >> nLen) << cShift;
2131 
2132 							_CC_CMYK(c, m, y, k);
2133 
2134 							iPos = ((iColumn >> 4) << 8) + idxCC[iRow][iColumn & 0xf];
2135 							pU[iPos] = c, pV[iPos] = -y, pK[iPos] = k, pY[iPos] = -m;
2136 						}
2137 					}
2138 					break;
2139 
2140                 default:
2141                     assert(0);
2142                     break;
2143             }
2144         }
2145         else if(bdExt == BD_16F){
2146             const I16 * pSrc = (I16 *)pSrc0 + pSC->WMII.cLeadingPadding;
2147             const size_t cStride = cPixelStride / sizeof(U16);
2148 
2149             switch(cfExt){
2150                 case CF_RGB:
2151                     for(iColumn = 0; iColumn < cColumn; iColumn ++, pSrc += cStride){
2152                         PixelI r = forwardHalf (pSrc[0]) << cShift;
2153                         PixelI g = forwardHalf (pSrc[1]) << cShift;
2154                         PixelI b = forwardHalf (pSrc[2]) << cShift;
2155 
2156                         _CC(r, g, b); // color conversion
2157 
2158                         iPos = ((iColumn >> 4) << 8) + idxCC[iRow][iColumn & 0xf];
2159                         pU[iPos] = -r, pV[iPos] = b, pY[iPos] = g;
2160                     }
2161                     break;
2162 
2163                 case Y_ONLY:
2164                 case YUV_444:
2165                 case NCOMPONENT:
2166 					{
2167 						const size_t cChannel = pSC->WMISCP.cChannel; // check xxx => Y_ONLY transcoding!
2168 						size_t iChannel;
2169 
2170 						for(iColumn = 0; iColumn < cColumn; iColumn ++, pSrc += cStride){
2171 							iPos = ((iColumn >> 4) << 8) + idxCC[iRow][iColumn & 0xf];
2172 							for(iChannel = 0; iChannel < cChannel; iChannel ++)
2173 								pSC->p1MBbuffer[iChannel][iPos] = forwardHalf (pSrc[iChannel]) << cShift;
2174 						}
2175 					}
2176 					break;
2177 
2178                 default:
2179                     assert(0);
2180                     break;
2181             }
2182         }
2183         else if(bdExt == BD_32){
2184             const U32 * pSrc = (U32 *)pSrc0 + pSC->WMII.cLeadingPadding;
2185             const size_t cStride = cPixelStride / sizeof(U32);
2186             const PixelI iOffset = ((1 << 31) >> nLen) << cShift;
2187 
2188             switch(cfExt){
2189                 case CF_RGB:
2190                     for(iColumn = 0; iColumn < cColumn; iColumn ++, pSrc += cStride){
2191                         PixelI r = (pSrc[0] >> nLen) << cShift, g = (pSrc[1] >> nLen) << cShift, b = (pSrc[2] >> nLen) << cShift;
2192 
2193                         _CC(r, g, b); // color conversion
2194 
2195                         iPos = ((iColumn >> 4) << 8) + idxCC[iRow][iColumn & 0xf];
2196                         pU[iPos] = -r, pV[iPos] = b, pY[iPos] = g - iOffset;
2197                     }
2198                     break;
2199 
2200                 case Y_ONLY:
2201                 case YUV_444:
2202                 case NCOMPONENT:
2203                 {
2204                     const size_t cChannel = pSC->WMISCP.cChannel;
2205                     size_t iChannel;
2206 
2207                     for(iColumn = 0; iColumn < cColumn; iColumn ++, pSrc += cStride){
2208                         iPos = ((iColumn >> 4) << 8) + idxCC[iRow][iColumn & 0xf];
2209                         for(iChannel = 0; iChannel < cChannel; iChannel ++)
2210                             pSC->p1MBbuffer[iChannel][iPos] = (pSrc[iChannel] >> nLen) << cShift;
2211                     }
2212                     break;
2213                 }
2214 
2215                 default:
2216                     assert(0);
2217                     break;
2218             }
2219         }
2220         else if(bdExt == BD_32S){
2221             const I32 * pSrc = (I32 *)pSrc0 + pSC->WMII.cLeadingPadding;
2222             const size_t cStride = cPixelStride / sizeof(I32);
2223 
2224             switch(cfExt){
2225                 case CF_RGB:
2226                     for(iColumn = 0; iColumn < cColumn; iColumn ++, pSrc += cStride){
2227                         PixelI r = (pSrc[0] >> nLen)<< cShift, g = (pSrc[1] >> nLen)<< cShift, b = (pSrc[2] >> nLen)<< cShift;
2228 
2229                         _CC(r, g, b); // color conversion
2230 
2231                         iPos = ((iColumn >> 4) << 8) + idxCC[iRow][iColumn & 0xf];
2232                         pU[iPos] = -r, pV[iPos] = b, pY[iPos] = g;
2233                     }
2234                     break;
2235 
2236                 case Y_ONLY:
2237                 case YUV_444:
2238                 case NCOMPONENT:
2239 					{
2240 						const size_t cChannel = pSC->WMISCP.cChannel; // check xxx => Y_ONLY transcoding!
2241 						size_t iChannel;
2242 
2243 						for(iColumn = 0; iColumn < cColumn; iColumn ++, pSrc += cStride){
2244 							iPos = ((iColumn >> 4) << 8) + idxCC[iRow][iColumn & 0xf];
2245 							for(iChannel = 0; iChannel < cChannel; iChannel ++)
2246 								pSC->p1MBbuffer[iChannel][iPos] = (pSrc[iChannel] >> nLen) << cShift;
2247 						}
2248 					}
2249 					break;
2250 
2251                 default:
2252                     assert(0);
2253                     break;
2254             }
2255         }
2256         else if(bdExt == BD_32F){
2257             const float * pSrc = (float *)pSrc0 + pSC->WMII.cLeadingPadding;
2258             const size_t cStride = cPixelStride / sizeof(float);
2259 
2260             switch(cfExt){
2261                 case CF_RGB:
2262                     for(iColumn = 0; iColumn < cColumn; iColumn ++, pSrc += cStride){
2263                         PixelI r = float2pixel (pSrc[0], nExpBias, nLen) << cShift;
2264                         PixelI g = float2pixel (pSrc[1], nExpBias, nLen) << cShift;
2265                         PixelI b = float2pixel (pSrc[2], nExpBias, nLen) << cShift;
2266 
2267                         _CC(r, g, b); // color conversion
2268 
2269                         iPos = ((iColumn >> 4) << 8) + idxCC[iRow][iColumn & 0xf];
2270                         pU[iPos] = -r, pV[iPos] = b, pY[iPos] = g;
2271                     }
2272                     break;
2273 
2274                 case Y_ONLY:
2275                 case YUV_444:
2276                 case NCOMPONENT:
2277 					{
2278 						const size_t cChannel = pSC->WMISCP.cChannel;
2279 						size_t iChannel;
2280 
2281 						for(iColumn = 0; iColumn < cColumn; iColumn ++, pSrc += cStride){
2282 							iPos = ((iColumn >> 4) << 8) + idxCC[iRow][iColumn & 0xf];
2283 							for(iChannel = 0; iChannel < cChannel; iChannel ++)
2284 								pSC->p1MBbuffer[iChannel][iPos] = float2pixel (pSrc[iChannel], nExpBias, nLen) << cShift;
2285 						}
2286 					}
2287 					break;
2288                 default:
2289                     assert(0);
2290                     break;
2291             }
2292         }
2293         else if(bdExt == BD_5){ // RGB 555, work for both big endian and small endian!
2294             const U8 * pSrc = pSrc0;
2295             const PixelI iOffset = (16 << cShift);
2296 
2297             assert(cfExt == CF_RGB);
2298 
2299             for(iColumn = 0; iColumn < cColumn; iColumn ++, pSrc += cPixelStride){
2300                 PixelI r = (PixelI)pSrc[0], g = (PixelI)pSrc[1], b = ((g >> 2) & 0x1F) << cShift;
2301 
2302                 g = ((r >> 5) + ((g & 3) << 3)) << cShift, r = (r & 0x1F) << cShift;
2303 
2304                 _CC(r, g, b); // color conversion
2305 
2306                 iPos = ((iColumn >> 4) << 8) + idxCC[iRow][iColumn & 0xf];
2307                 pU[iPos] = -r, pV[iPos] = b, pY[iPos] = g - iOffset;
2308             }
2309         }
2310         else if(bdExt == BD_565){ // RGB 555, work for both big endian and small endian!
2311             const U8 * pSrc = pSrc0;
2312             const PixelI iOffset = (32 << cShift);
2313 
2314             assert(cfExt == CF_RGB);
2315 
2316             for(iColumn = 0; iColumn < cColumn; iColumn ++, pSrc += cPixelStride){
2317                 PixelI r = (PixelI)pSrc[0], g = (PixelI)pSrc[1], b = (g >> 3) << (cShift + 1);
2318 
2319                 g = ((r >> 5) + ((g & 7) << 3)) << cShift, r = (r & 0x1F) << (cShift + 1);
2320 
2321                 _CC(r, g, b); // color conversion
2322 
2323                 iPos = ((iColumn >> 4) << 8) + idxCC[iRow][iColumn & 0xf];
2324                 pU[iPos] = -r, pV[iPos] = b, pY[iPos] = g - iOffset;
2325             }
2326         }
2327         else if(bdExt == BD_10){ //RGB 101010, work for both big endian and small endian!
2328             const U8 * pSrc = pSrc0;
2329             const PixelI iOffset = (512 << cShift);
2330 
2331             assert(cfExt == CF_RGB);
2332 
2333             for(iColumn = 0; iColumn < cColumn; iColumn ++, pSrc += cPixelStride){
2334                 PixelI r = (PixelI)pSrc[0], g = (PixelI)pSrc[1], b = (PixelI)pSrc[2];
2335 
2336                 r = (r + ((g & 3) << 8)) << cShift, g = ((g >> 2) + ((b & 0xF) << 6)) << cShift;
2337                 b = ((b >> 4) + (((PixelI)pSrc[3] & 0x3F) << 4)) << cShift;
2338 
2339                 _CC(r, g, b); // color conversion
2340 
2341                 iPos = ((iColumn >> 4) << 8) + idxCC[iRow][iColumn & 0xf];
2342                 pU[iPos] = -r, pV[iPos] = b, pY[iPos] = g - iOffset;
2343             }
2344         }
2345         else if(bdExt == BD_1){
2346             assert(cfExt == Y_ONLY);
2347             for(iColumn = 0; iColumn < cColumn; iColumn ++) {
2348                 pY[((iColumn >> 4) << 8) + idxCC[iRow][iColumn & 0xf]] = ((pSC->WMISCP.bBlackWhite + (pSrc0[iColumn >> 3] >> (7 - (iColumn & 7)))) & 1) << cShift;
2349             }
2350         }
2351 
2352         if(iRow + iRowStride < cRow) // centralized vertical padding!
2353             pSrc0 += pSC->WMIBI.cbStride;
2354     }
2355 
2356     padHorizontally(pSC); // centralized horizontal padding
2357 
2358     // centralized down-sampling
2359     if(pSC->m_bUVResolutionChange)
2360         downsampleUV(pSC);
2361 
2362     // centralized alpha channel handdling
2363     if (pSC->WMISCP.uAlphaMode == 3)
2364         if(inputMBRowAlpha(pSC) != ICERR_OK)
2365             return ICERR_ERROR;
2366 
2367     return ICERR_OK;
2368 }
2369 
2370 
2371