//*@@@+++@@@@****************************************************************** // // Copyright © Microsoft Corp. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: // // • Redistributions of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // • Redistributions in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // POSSIBILITY OF SUCH DAMAGE. // //*@@@---@@@@****************************************************************** #include "strcodec.h" #include "perfTimer.h" #ifdef MEM_TRACE #define TRACE_MALLOC 1 #define TRACE_NEW 0 #define TRACE_HEAP 0 #include "memtrace.h" #endif //================================================================ // Quantization index tables //================================================================ const int blkOffset[16] = {0, 64, 16, 80, 128, 192, 144, 208, 32, 96, 48, 112, 160, 224, 176, 240}; const int blkOffsetUV[4] = {0, 32, 16, 48}; const int blkOffsetUV_422[8] = {0, 64, 16, 80, 32, 96, 48, 112}; const int dctIndex[3][16] = { /** permutation matrix tailored to the transform, nothing to do with ZZS **/ {0,5,1,6, 10,12,8,14, 2,4,3,7, 9,13,11,15}, //AC 444 {0,5,1,6, 10,12,8,14, 2,4,3,7, 9,13,11,15}, //AC 420 {0,128,64,208, 32,240,48,224, 16,192,80,144, 112,176,96,160 }, //DC 444 }; //================================================================ // Color conversion index table //================================================================ const U8 idxCC[16][16] = { {0x00, 0x01, 0x05, 0x04, 0x40, 0x41, 0x45, 0x44, 0x80, 0x81, 0x85, 0x84, 0xc0, 0xc1, 0xc5, 0xc4, }, {0x02, 0x03, 0x07, 0x06, 0x42, 0x43, 0x47, 0x46, 0x82, 0x83, 0x87, 0x86, 0xc2, 0xc3, 0xc7, 0xc6, }, {0x0a, 0x0b, 0x0f, 0x0e, 0x4a, 0x4b, 0x4f, 0x4e, 0x8a, 0x8b, 0x8f, 0x8e, 0xca, 0xcb, 0xcf, 0xce, }, {0x08, 0x09, 0x0d, 0x0c, 0x48, 0x49, 0x4d, 0x4c, 0x88, 0x89, 0x8d, 0x8c, 0xc8, 0xc9, 0xcd, 0xcc, }, {0x10, 0x11, 0x15, 0x14, 0x50, 0x51, 0x55, 0x54, 0x90, 0x91, 0x95, 0x94, 0xd0, 0xd1, 0xd5, 0xd4, }, {0x12, 0x13, 0x17, 0x16, 0x52, 0x53, 0x57, 0x56, 0x92, 0x93, 0x97, 0x96, 0xd2, 0xd3, 0xd7, 0xd6, }, {0x1a, 0x1b, 0x1f, 0x1e, 0x5a, 0x5b, 0x5f, 0x5e, 0x9a, 0x9b, 0x9f, 0x9e, 0xda, 0xdb, 0xdf, 0xde, }, {0x18, 0x19, 0x1d, 0x1c, 0x58, 0x59, 0x5d, 0x5c, 0x98, 0x99, 0x9d, 0x9c, 0xd8, 0xd9, 0xdd, 0xdc, }, {0x20, 0x21, 0x25, 0x24, 0x60, 0x61, 0x65, 0x64, 0xa0, 0xa1, 0xa5, 0xa4, 0xe0, 0xe1, 0xe5, 0xe4, }, {0x22, 0x23, 0x27, 0x26, 0x62, 0x63, 0x67, 0x66, 0xa2, 0xa3, 0xa7, 0xa6, 0xe2, 0xe3, 0xe7, 0xe6, }, {0x2a, 0x2b, 0x2f, 0x2e, 0x6a, 0x6b, 0x6f, 0x6e, 0xaa, 0xab, 0xaf, 0xae, 0xea, 0xeb, 0xef, 0xee, }, {0x28, 0x29, 0x2d, 0x2c, 0x68, 0x69, 0x6d, 0x6c, 0xa8, 0xa9, 0xad, 0xac, 0xe8, 0xe9, 0xed, 0xec, }, {0x30, 0x31, 0x35, 0x34, 0x70, 0x71, 0x75, 0x74, 0xb0, 0xb1, 0xb5, 0xb4, 0xf0, 0xf1, 0xf5, 0xf4, }, {0x32, 0x33, 0x37, 0x36, 0x72, 0x73, 0x77, 0x76, 0xb2, 0xb3, 0xb7, 0xb6, 0xf2, 0xf3, 0xf7, 0xf6, }, {0x3a, 0x3b, 0x3f, 0x3e, 0x7a, 0x7b, 0x7f, 0x7e, 0xba, 0xbb, 0xbf, 0xbe, 0xfa, 0xfb, 0xff, 0xfe, }, {0x38, 0x39, 0x3d, 0x3c, 0x78, 0x79, 0x7d, 0x7c, 0xb8, 0xb9, 0xbd, 0xbc, 0xf8, 0xf9, 0xfd, 0xfc, }, }; const U8 idxCC_420[8][8] = { {0x00, 0x01, 0x05, 0x04, 0x20, 0x21, 0x25, 0x24, }, {0x02, 0x03, 0x07, 0x06, 0x22, 0x23, 0x27, 0x26, }, {0x0a, 0x0b, 0x0f, 0x0e, 0x2a, 0x2b, 0x2f, 0x2e, }, {0x08, 0x09, 0x0d, 0x0c, 0x28, 0x29, 0x2d, 0x2c, }, {0x10, 0x11, 0x15, 0x14, 0x30, 0x31, 0x35, 0x34, }, {0x12, 0x13, 0x17, 0x16, 0x32, 0x33, 0x37, 0x36, }, {0x1a, 0x1b, 0x1f, 0x1e, 0x3a, 0x3b, 0x3f, 0x3e, }, {0x18, 0x19, 0x1d, 0x1c, 0x38, 0x39, 0x3d, 0x3c, }, }; /************************************************************************* gGDISignature *************************************************************************/ const Char gGDISignature[] = {'W', 'M', 'P', 'H', 'O', 'T', 'O', '\0'}; // check if enough memory allocated for the image buffer Int checkImageBuffer(CWMImageStrCodec * pSC, size_t cWidth, size_t cRows) { const BITDEPTH_BITS bd = pSC->WMISCP.bYUVData ? BD_32S : pSC->WMII.bdBitDepth; const COLORFORMAT cf = pSC->WMISCP.bYUVData ? pSC->m_param.cfColorFormat : pSC->WMII.cfColorFormat; size_t cBytes; Bool bLessThan64Bit = sizeof(void *) < 8; if(cf == YUV_420) cRows = (cRows + 1) / 2; if(cRows > pSC->WMIBI.cLine) return ICERR_ERROR; if(cf == YUV_422 || cf == YUV_420) cWidth = (cWidth + 1) / 2; if (bLessThan64Bit && (cWidth >> ((sizeof(size_t) * 8 - 5)))) { /** potential overflow - 32 bit pointers insufficient to address cache **/ /** this uses 2 macroblock row constraint, which is tighter than ensuring rollover doesn't occur below **/ return ICERR_ERROR; } cBytes = pSC->WMISCP.bYUVData ? cWidth * sizeof(PixelI) * (cf == YUV_420 ? 6 : (cf == YUV_422 ? 4 : (cf == YUV_444 ? 3 : 1))) : (bd == BD_1 ? (pSC->WMII.cBitsPerUnit * cWidth + 7) / 8 : (pSC->WMII.cBitsPerUnit + 7) / 8 * cWidth); return (cBytes > pSC->WMIBI.cbStride ? ICERR_ERROR : ICERR_OK); } Void writeQPIndex(BitIOInfo * pIO, U8 uiIndex, U32 cBits) { if(uiIndex == 0) putBit16(pIO, 1, 1); // default QP else{ putBit16(pIO, 0, 1); // non default QP putBit16(pIO, uiIndex - 1, cBits); } } U8 readQPIndex(BitIOInfo * pIO, U32 cBits) { if(getBit16(pIO, 1)) return 0; // default QP return (U8) getBit16(pIO, cBits) + 1; } Void getTilePos(CWMImageStrCodec* pSC, size_t mbX, size_t mbY) { if(mbX == 0){ // left image boundary pSC->cTileColumn = 0; } else if(pSC->cTileColumn < pSC->WMISCP.cNumOfSliceMinus1V && mbX == pSC->WMISCP.uiTileX[pSC->cTileColumn + 1]){ // left tile boundary pSC->cTileColumn ++; } if(mbY == 0){ // top image boundary pSC->cTileRow = 0; } else if(pSC->cTileRow < pSC->WMISCP.cNumOfSliceMinus1H && mbY == pSC->WMISCP.uiTileY[pSC->cTileRow + 1]){ // top tile boundary pSC->cTileRow ++; } pSC->m_bCtxLeft = (mbX == pSC->WMISCP.uiTileX[pSC->cTileColumn]); pSC->m_bCtxTop = (mbY == pSC->WMISCP.uiTileY[pSC->cTileRow]); pSC->m_bResetContext = pSC->m_bResetRGITotals = (((mbX - pSC->WMISCP.uiTileX[pSC->cTileColumn]) & 0xf) == 0); if(pSC->cTileColumn == pSC->WMISCP.cNumOfSliceMinus1V){ // last tile column if(mbX + 1 == pSC->cmbWidth) pSC->m_bResetContext = TRUE; } else if(mbX + 1 == pSC->WMISCP.uiTileX[pSC->cTileColumn + 1]) pSC->m_bResetContext = TRUE; } //================================================================ // utility functions for 2 macro block rows //================================================================ Void initMRPtr(CWMImageStrCodec* pSC) { size_t j, jend = (pSC->m_pNextSC != NULL); for (j = 0; j <= jend; j++) { memcpy (pSC->p0MBbuffer, pSC->a0MBbuffer, sizeof (pSC->p0MBbuffer)); memcpy (pSC->p1MBbuffer, pSC->a1MBbuffer, sizeof (pSC->p1MBbuffer)); pSC = pSC->m_pNextSC; } } Void advanceMRPtr(CWMImageStrCodec* pSC) { const COLORFORMAT cf = pSC->m_param.cfColorFormat; const int cpChroma = cblkChromas[cf] * 16; size_t i, j, jend = (pSC->m_pNextSC != NULL); assert(pSC->m_bSecondary == FALSE); for (j = 0; j <= jend; j++) { int cpStride = 16 * 16; for (i = 0; i < pSC->m_param.cNumChannels; i++) { pSC->pPlane[i] = pSC->p0MBbuffer[i]; pSC->p0MBbuffer[i] += cpStride; pSC->p1MBbuffer[i] += cpStride; cpStride = cpChroma; } pSC = pSC->m_pNextSC; } } /* advance to next MB row */ Void advanceOneMBRow(CWMImageStrCodec *pSC) { size_t i, j, jend = (pSC->m_pNextSC != NULL); CWMIPredInfo *pPredInfo; for (j = 0; j <= jend; j++) { for(i = 0; i < pSC->m_param.cNumChannels; i ++){ // swap current row and previous row pPredInfo = pSC->PredInfo[i]; pSC->PredInfo[i] = pSC->PredInfoPrevRow[i]; pSC->PredInfoPrevRow[i] = pPredInfo; } pSC = pSC->m_pNextSC; } } Void swapMRPtr(CWMImageStrCodec* pSC) { PixelI *pTemp[MAX_CHANNELS]; size_t j, jend = (pSC->m_pNextSC != NULL); for (j = 0; j <= jend; j++) { memcpy (pTemp, pSC->a0MBbuffer, sizeof (pSC->a0MBbuffer)); memcpy (pSC->a0MBbuffer, pSC->a1MBbuffer, sizeof (pSC->a0MBbuffer)); memcpy (pSC->a1MBbuffer, pTemp, sizeof (pSC->a0MBbuffer)); pSC = pSC->m_pNextSC; } } //================================================================ // Empty function to fill slot //================================================================ Int IDPEmpty(CWMImageStrCodec* pSC) { UNREFERENCED_PARAMETER( pSC ); return ICERR_OK; } ERR WMPAlloc(void** ppv, size_t cb) { *ppv = calloc(1, cb); return *ppv ? WMP_errSuccess : WMP_errOutOfMemory; } ERR WMPFree(void** ppv) { if (*ppv) { free(*ppv); *ppv = NULL; } return WMP_errSuccess; } //================================================================ // Streaming I/O functions //================================================================ ERR CreateWS_File(struct WMPStream** ppWS, const char* szFilename, const char* szMode) { ERR err = WMP_errSuccess; struct WMPStream* pWS = NULL; Call(WMPAlloc((void** )ppWS, sizeof(**ppWS))); pWS = *ppWS; pWS->Close = CloseWS_File; pWS->EOS = EOSWS_File; pWS->Read = ReadWS_File; pWS->Write = WriteWS_File; //pWS->GetLine = GetLineWS_File; pWS->SetPos = SetPosWS_File; pWS->GetPos = GetPosWS_File; #if defined(WIN32) && !defined(__MINGW32__) FailIf(0 != fopen_s(&pWS->state.file.pFile, szFilename, szMode), WMP_errFileIO); #else pWS->state.file.pFile = fopen(szFilename, szMode); FailIf(NULL == pWS->state.file.pFile, WMP_errFileIO); #endif Cleanup: return err; } ERR CloseWS_File(struct WMPStream** ppWS) { ERR err = WMP_errSuccess; struct WMPStream* pWS = *ppWS; fclose(pWS->state.file.pFile); Call(WMPFree((void**)ppWS)); Cleanup: return err; } Bool EOSWS_File(struct WMPStream* pWS) { return feof(pWS->state.file.pFile); } ERR ReadWS_File(struct WMPStream* pWS, void* pv, size_t cb) { // ERR err = WMP_errSuccess; return (fread(pv, cb, 1, pWS->state.file.pFile) == 1) ? WMP_errSuccess : WMP_errFileIO; } ERR WriteWS_File(struct WMPStream* pWS, const void* pv, size_t cb) { ERR err = WMP_errSuccess; if(0 != cb) { FailIf(1 != fwrite(pv, cb, 1, pWS->state.file.pFile), WMP_errFileIO); } Cleanup: return err; } ERR SetPosWS_File(struct WMPStream* pWS, size_t offPos) { ERR err = WMP_errSuccess; FailIf(0 != fseek(pWS->state.file.pFile, (long)offPos, SEEK_SET), WMP_errFileIO); Cleanup: return err; } ERR GetPosWS_File(struct WMPStream* pWS, size_t* poffPos) { ERR err = WMP_errSuccess; long lOff = 0; FailIf(-1 == (lOff = ftell(pWS->state.file.pFile)), WMP_errFileIO); *poffPos = (size_t)lOff; Cleanup: return err; } //---------------------------------------------------------------- ERR CreateWS_Memory(struct WMPStream** ppWS, void* pv, size_t cb) { ERR err = WMP_errSuccess; struct WMPStream* pWS = NULL; Call(WMPAlloc((void** )ppWS, sizeof(**ppWS))); pWS = *ppWS; pWS->state.buf.pbBuf = pv; pWS->state.buf.cbBuf = cb; pWS->state.buf.cbCur = 0; pWS->Close = CloseWS_Memory; pWS->EOS = EOSWS_Memory; pWS->Read = ReadWS_Memory; pWS->Write = WriteWS_Memory; pWS->SetPos = SetPosWS_Memory; pWS->GetPos = GetPosWS_Memory; Cleanup: return err; } ERR CloseWS_Memory(struct WMPStream** ppWS) { ERR err = WMP_errSuccess; Call(WMPFree((void**)ppWS)); Cleanup: return err; } Bool EOSWS_Memory(struct WMPStream* pWS) { return pWS->state.buf.cbBuf <= pWS->state.buf.cbCur; } ERR ReadWS_Memory(struct WMPStream* pWS, void* pv, size_t cb) { ERR err = WMP_errSuccess; // FailIf(pWS->state.buf.cbBuf < pWS->state.buf.cbCur, WMP_errBufferOverflow); if(pWS->state.buf.cbBuf < pWS->state.buf.cbCur) return err; FailIf(pWS->state.buf.cbCur + cb < pWS->state.buf.cbCur, WMP_errBufferOverflow); if (pWS->state.buf.cbBuf < pWS->state.buf.cbCur + cb) { cb = pWS->state.buf.cbBuf - pWS->state.buf.cbCur; } memcpy(pv, pWS->state.buf.pbBuf + pWS->state.buf.cbCur, cb); pWS->state.buf.cbCur += cb; Cleanup: return err; } ERR WriteWS_Memory(struct WMPStream* pWS, const void* pv, size_t cb) { ERR err = WMP_errSuccess; FailIf(pWS->state.buf.cbCur + cb < pWS->state.buf.cbCur, WMP_errBufferOverflow); FailIf(pWS->state.buf.cbBuf < pWS->state.buf.cbCur + cb, WMP_errBufferOverflow); memcpy(pWS->state.buf.pbBuf + pWS->state.buf.cbCur, pv, cb); pWS->state.buf.cbCur += cb; Cleanup: return err; } ERR SetPosWS_Memory(struct WMPStream* pWS, size_t offPos) { ERR err = WMP_errSuccess; //While the following condition is possibly useful, failure occurs //at the end of a file since packets beyond the end may be accessed //FailIf(pWS->state.buf.cbBuf < offPos, WMP_errBufferOverflow); pWS->state.buf.cbCur = offPos; //Cleanup: return err; } ERR GetPosWS_Memory(struct WMPStream* pWS, size_t* poffPos) { *poffPos = pWS->state.buf.cbCur; return WMP_errSuccess; } //================================================================= // Linked list based WMPStream // - for indefinite size, multiple stream out // - reads not supported in this mode //================================================================= ERR CreateWS_List(struct WMPStream** ppWS) { ERR err = WMP_errSuccess; struct WMPStream* pWS = NULL; Call(WMPAlloc((void** )ppWS, sizeof(**ppWS) + PACKETLENGTH + sizeof(void *))); pWS = *ppWS; pWS->state.buf.pbBuf = (U8 *)pWS + sizeof(**ppWS) + sizeof(void *); // first buffer points here memset(pWS->state.buf.pbBuf - sizeof(void *), 0, sizeof(void *)); pWS->state.buf.cbBuf = PACKETLENGTH; pWS->state.buf.cbCur = 0; pWS->state.buf.cbBufCount = 0; pWS->Close = CloseWS_List; pWS->EOS = NULL; // doesn't get called pWS->Read = ReadWS_List; pWS->Write = WriteWS_List; pWS->SetPos = SetPosWS_List; pWS->GetPos = GetPosWS_List; //printf ("create buffer %d: %x\n", pWS->state.buf.cbBufCount, pWS->state.buf.pbBuf); Cleanup: return err; } ERR CloseWS_List(struct WMPStream** ppWS) { ERR err = WMP_errSuccess; if (ppWS) { U8 *pBuf = (U8 *)(ppWS[0] + 1); // pointer to buffer U8 *pNext = (U8 *)(((void **)pBuf)[0]); while (pNext) { //struct WMPStream *pWS = ppWS[0]; pBuf = pNext; pNext = (U8 *)(((void **)(pBuf))[0]); //printf ("delete buffer %x\n", pBuf); Call(WMPFree((void**)&pBuf)); } } Call(WMPFree((void**)ppWS)); Cleanup: return err; } ERR ReadWS_List(struct WMPStream* pWS, void* pv, size_t cb) { ERR err = WMP_errSuccess; FailIf(pWS->state.buf.cbCur + cb < pWS->state.buf.cbCur, WMP_errBufferOverflow); if (pWS->state.buf.cbBuf < pWS->state.buf.cbCur + PACKETLENGTH * pWS->state.buf.cbBufCount + cb) { cb = pWS->state.buf.cbBuf - pWS->state.buf.cbCur - PACKETLENGTH * pWS->state.buf.cbBufCount; } while (cb) { size_t cl = PACKETLENGTH - pWS->state.buf.cbCur; if (cl > cb) cl = cb; memcpy(pv, pWS->state.buf.pbBuf + pWS->state.buf.cbCur, cl); pWS->state.buf.cbCur += cl; pv = (void *)((U8 *)pv + cl); cb -= cl; if (pWS->state.buf.cbCur == PACKETLENGTH) { pWS->state.buf.pbBuf = (U8 *)((void **)(pWS->state.buf.pbBuf - sizeof(void *)))[0] + sizeof(void *); pWS->state.buf.cbCur = 0; pWS->state.buf.cbBufCount++; //printf ("read buffer %d: %x\n", pWS->state.buf.cbBufCount, pWS->state.buf.pbBuf); } } Cleanup: return err; } ERR WriteWS_List(struct WMPStream* pWS, const void* pv, size_t cb) { ERR err = WMP_errSuccess; FailIf(pWS->state.buf.cbCur + cb < pWS->state.buf.cbCur, WMP_errBufferOverflow); FailIf(pWS->state.buf.cbBuf < pWS->state.buf.cbCur + cb, WMP_errBufferOverflow); while (cb) { size_t cl = PACKETLENGTH - pWS->state.buf.cbCur; if (cl > cb) cl = cb; memcpy(pWS->state.buf.pbBuf + pWS->state.buf.cbCur, pv, cl); pWS->state.buf.cbCur += cl; pv = (const void *)((U8 *)pv + cl); cb -= cl; if (pWS->state.buf.cbCur == PACKETLENGTH) { // allocate next packet in list U8 *pBuf = NULL; void **pPtrLoc = (void **)(pWS->state.buf.pbBuf - sizeof(void *)); Call(WMPAlloc((void **)&pBuf, PACKETLENGTH + sizeof(void *))); pPtrLoc[0] = (void *)pBuf; pWS->state.buf.pbBuf = pBuf + sizeof(void *); pWS->state.buf.cbBuf += PACKETLENGTH; memset(pBuf, 0, sizeof(void *)); pWS->state.buf.cbCur = 0; pWS->state.buf.cbBufCount++; //printf ("create buffer %d: %x\n", pWS->state.buf.cbBufCount, pWS->state.buf.pbBuf); } } Cleanup: return err; } ERR SetPosWS_List(struct WMPStream* pWS, size_t offPos) { ERR err = WMP_errSuccess; // get the first buffer U8 *pBuf = (U8 *)(pWS + 1); // pointer to buffer pWS->state.buf.cbCur = 0; pWS->state.buf.cbBufCount = 0; while (offPos >= PACKETLENGTH && pBuf != NULL) { pBuf = (U8 *)(((void **)pBuf)[0]); offPos -= PACKETLENGTH; pWS->state.buf.cbBufCount++; } if (pBuf == NULL) goto Cleanup; pWS->state.buf.cbCur = offPos; pWS->state.buf.pbBuf = pBuf + sizeof(void *); //printf ("seek buffer %d: %x\n", pWS->state.buf.cbBufCount, pWS->state.buf.pbBuf); Cleanup: return err; } ERR GetPosWS_List(struct WMPStream* pWS, size_t* poffPos) { *poffPos = pWS->state.buf.cbCur + PACKETLENGTH * pWS->state.buf.cbBufCount; return WMP_errSuccess; } //================================================================ // Simple BitIO access functions //================================================================ // init SimpleBitIO ERR attach_SB(SimpleBitIO* pSB, struct WMPStream* pWS) { pSB->pWS = pWS; pSB->cbRead = 0; pSB->bAccumulator = 0; pSB->cBitLeft = 0; return WMP_errSuccess; } // extract upto 32bit from input stream U32 getBit32_SB(SimpleBitIO* pSB, U32 cBits) { U32 rc = 0; while (pSB->cBitLeft < cBits) { rc <<= pSB->cBitLeft; rc |= pSB->bAccumulator >> (8 - pSB->cBitLeft); cBits -= pSB->cBitLeft; pSB->pWS->Read(pSB->pWS, &pSB->bAccumulator, 1); pSB->cbRead++; pSB->cBitLeft = 8; } rc <<= cBits; rc |= pSB->bAccumulator >> (8 - cBits); pSB->bAccumulator <<= cBits; pSB->cBitLeft -= cBits; return rc; } // ignore input to byte boundary Void flushToByte_SB(SimpleBitIO* pSB) { pSB->bAccumulator = 0; pSB->cBitLeft = 0; } // return read byte count U32 getByteRead_SB(SimpleBitIO* pSB) { return pSB->cbRead; } ERR detach_SB(SimpleBitIO* pSB) { assert(0 == pSB->cBitLeft); pSB->pWS = NULL; return WMP_errSuccess; } //================================================================ // Memory access functions //================================================================ #if (defined(WIN32) && !defined(UNDER_CE) && (!defined(__MINGW32__) || defined(__MINGW64_TOOLCHAIN__))) || (defined(UNDER_CE) && defined(_ARM_)) // WinCE ARM and Desktop x86 #else // other platform #ifdef _BIG__ENDIAN_ #define _byteswap_ulong(x) (x) #else // _BIG__ENDIAN_ U32 _byteswap_ulong(U32 bits) { U32 r = (bits & 0xffu) << 24; r |= (bits << 8) & 0xff0000u; r |= ((bits >> 8) & 0xff00u); r |= ((bits >> 24) & 0xffu); return r; } #endif // _BIG__ENDIAN_ #endif U32 load4BE(void* pv) { #ifdef _BIG__ENDIAN_ return (*(U32*)pv); #else // _BIG__ENDIAN_ #if defined(_M_IA64) || defined(_ARM_) U32 v; v = ((U16 *) pv)[0]; v |= ((U32)((U16 *) pv)[1]) << 16; return _byteswap_ulong(v); #else // _M_IA64 return _byteswap_ulong(*(U32*)pv); #endif // _M_IA64 #endif // _BIG__ENDIAN_ } #define LOAD16 load4BE #ifdef _BIG__ENDIAN_ #define WRITESWAP_ENDIAN(a) ((a)>>16) #else // _BIG__ENDIAN_ #define WRITESWAP_ENDIAN(a) _byteswap_ulong(a) #endif // _BIG__ENDIAN_ //================================================================ // Bit I/O functions //================================================================ Int allocateBitIOInfo(CWMImageStrCodec* pSC) { U32 cNumBitIO; SUBBAND sbSubband = pSC->WMISCP.sbSubband; pSC->cSB = (sbSubband == SB_DC_ONLY ? 1 : (sbSubband == SB_NO_HIGHPASS ? 2 : (sbSubband == SB_NO_FLEXBITS ? 3 : 4))); // # of additional BitIOs other than pSC->pIOHeader if (!pSC->m_param.bIndexTable) { // pure streaming mode, no index table, no additional BitIO! assert (pSC->WMISCP.bfBitstreamFormat == SPATIAL && pSC->WMISCP.cNumOfSliceMinus1H + pSC->WMISCP.cNumOfSliceMinus1V == 0); cNumBitIO = 0; } else if(pSC->WMISCP.bfBitstreamFormat == SPATIAL) cNumBitIO = pSC->WMISCP.cNumOfSliceMinus1V + 1; else cNumBitIO = (pSC->WMISCP.cNumOfSliceMinus1V + 1) * pSC->cSB; if(cNumBitIO > MAX_TILES * 4) return ICERR_ERROR; // allocate additional BitIos if(cNumBitIO > 0){ U32 i = 0; size_t cb = sizeof(BitIOInfo) * cNumBitIO + (PACKETLENGTH * 4 - 1) + PACKETLENGTH * 4 * cNumBitIO; U8* pb = (U8*)malloc(cb); if (NULL == pb) return ICERR_ERROR; memset(pb, 0, cb); pSC->m_ppBitIO = (BitIOInfo**)pb; pb += sizeof(BitIOInfo) * cNumBitIO; pb = (U8*)ALIGNUP(pb, PACKETLENGTH * 4) + PACKETLENGTH * 2; for (i = 0; i < cNumBitIO; ++i){ pSC->m_ppBitIO[i] = (BitIOInfo*)pb; pb += PACKETLENGTH * 4; } // allocate index table if(cNumBitIO > MAX_TILES * 4 || pSC->WMISCP.cNumOfSliceMinus1H >= MAX_TILES) return ICERR_ERROR; pSC->pIndexTable = malloc(cNumBitIO * (pSC->WMISCP.cNumOfSliceMinus1H + 1) * sizeof(size_t)); if(NULL == pSC->pIndexTable) return ICERR_ERROR; } pSC->cNumBitIO = cNumBitIO; return ICERR_OK; } Int setBitIOPointers(CWMImageStrCodec* pSC) { if(pSC->cNumBitIO > 0){ U32 i; for(i = 0; i <= pSC->WMISCP.cNumOfSliceMinus1V; i ++){ CCodingContext * pContext = &pSC->m_pCodingContext[i]; if(pSC->WMISCP.bfBitstreamFormat == SPATIAL){ pContext->m_pIODC = pContext->m_pIOLP = pContext->m_pIOAC = pContext->m_pIOFL = pSC->m_ppBitIO[i]; } else{ U32 j = pSC->cSB; pContext->m_pIODC = pSC->m_ppBitIO[i * j]; if(j > 1) pContext->m_pIOLP = pSC->m_ppBitIO[i * j + 1]; if(j > 2) pContext->m_pIOAC = pSC->m_ppBitIO[i * j + 2]; if(j > 3) pContext->m_pIOFL = pSC->m_ppBitIO[i * j + 3]; } } } else{ // streamimg mode CCodingContext * pContext = &pSC->m_pCodingContext[0]; pContext->m_pIODC = pContext->m_pIOLP = pContext->m_pIOAC = pContext->m_pIOFL = pSC->pIOHeader; } return ICERR_OK; } Int allocateTileInfo(CWMImageStrCodec * pSC) { size_t i; if(pSC->WMISCP.cNumOfSliceMinus1V >= MAX_TILES) return ICERR_ERROR; pSC->pTile = (CWMITile *)malloc((pSC->WMISCP.cNumOfSliceMinus1V + 1) * sizeof(CWMITile)); if(pSC->pTile == NULL) return ICERR_ERROR; memset(pSC->pTile, 0, (pSC->WMISCP.cNumOfSliceMinus1V + 1) * sizeof(CWMITile)); for(i = 0; i <= pSC->WMISCP.cNumOfSliceMinus1V; i ++) pSC->pTile[i].cNumQPHP = pSC->pTile[i].cNumQPLP = 1, pSC->pTile[i].cBitsHP = pSC->pTile[i].cBitsLP = 0; return ICERR_OK; } Void freeTileInfo(CWMImageStrCodec * pSC) { size_t iTile; if((pSC->m_param.uQPMode & 1) != 0) // not DC uniform for(iTile = 0; iTile <= pSC->WMISCP.cNumOfSliceMinus1V; iTile ++) freeQuantizer(pSC->pTile[iTile].pQuantizerDC); else freeQuantizer(pSC->pTile[0].pQuantizerDC); if(pSC->WMISCP.sbSubband != SB_DC_ONLY) if((pSC->m_param.uQPMode & 2) != 0) // not LP uniform for(iTile = 0; iTile <= pSC->WMISCP.cNumOfSliceMinus1V; iTile ++) freeQuantizer(pSC->pTile[iTile].pQuantizerLP); else freeQuantizer(pSC->pTile[0].pQuantizerLP); if(pSC->WMISCP.sbSubband != SB_DC_ONLY && pSC->WMISCP.sbSubband != SB_NO_HIGHPASS) if((pSC->m_param.uQPMode & 4) != 0) // not HP uniform for(iTile = 0; iTile <= pSC->WMISCP.cNumOfSliceMinus1V; iTile ++) freeQuantizer(pSC->pTile[iTile].pQuantizerHP); else freeQuantizer(pSC->pTile[0].pQuantizerHP); if(pSC->pTile != NULL) free(pSC->pTile); } Int allocateQuantizer(CWMIQuantizer * pQuantizer[MAX_CHANNELS], size_t cChannel, size_t cQP) { size_t iCh; if(cQP > 16 || cChannel > MAX_CHANNELS) return ICERR_ERROR; pQuantizer[0] = (CWMIQuantizer *)malloc(cQP * sizeof(CWMIQuantizer) * cChannel); if(pQuantizer[0] == NULL) return ICERR_ERROR; for(iCh = 1; iCh < cChannel; iCh ++) pQuantizer[iCh] = pQuantizer[iCh - 1] + cQP; return ICERR_OK; } Void freeQuantizer(CWMIQuantizer * pQuantizer[MAX_CHANNELS]) { if(pQuantizer[0] != NULL) free(pQuantizer[0]); } Void formatQuantizer(CWMIQuantizer * pQuantizer[MAX_CHANNELS], U8 cChMode, size_t cCh, size_t iPos, Bool bShiftedUV, Bool bScaledArith) { size_t iCh; for(iCh = 0; iCh < cCh; iCh ++){ if(iCh > 0) if(cChMode == 0) // uniform pQuantizer[iCh][iPos] = pQuantizer[0][iPos]; else if(cChMode == 1) // mixed pQuantizer[iCh][iPos] = pQuantizer[1][iPos]; remapQP(pQuantizer[iCh] + iPos, (iCh > 0 && bShiftedUV == TRUE) ? SHIFTZERO - 1 : SHIFTZERO, bScaledArith); } } Void setUniformQuantizer(CWMImageStrCodec * pSC, size_t sb) { size_t iCh, iTile; for(iCh = 0; iCh < pSC->m_param.cNumChannels; iCh ++) for(iTile = 1; iTile <= pSC->WMISCP.cNumOfSliceMinus1V; iTile ++) if(sb == 0) // DC pSC->pTile[iTile].pQuantizerDC[iCh] = pSC->pTile[0].pQuantizerDC[iCh]; else if(sb == 1) // LP pSC->pTile[iTile].pQuantizerLP[iCh] = pSC->pTile[0].pQuantizerLP[iCh]; else // HP pSC->pTile[iTile].pQuantizerHP[iCh] = pSC->pTile[0].pQuantizerHP[iCh]; } Void useDCQuantizer(CWMImageStrCodec * pSC, size_t iTile) { size_t iCh; for(iCh = 0; iCh < pSC->m_param.cNumChannels; iCh ++) pSC->pTile[iTile].pQuantizerLP[iCh][0] = *pSC->pTile[iTile].pQuantizerDC[iCh]; } Void useLPQuantizer(CWMImageStrCodec * pSC, size_t cQP, size_t iTile) { size_t iCh, iQP; for(iCh = 0; iCh < pSC->m_param.cNumChannels; iCh ++) for(iQP = 0; iQP < cQP; iQP ++) pSC->pTile[iTile].pQuantizerHP[iCh][iQP] = pSC->pTile[iTile].pQuantizerLP[iCh][iQP]; } U8 dquantBits(U8 cQP) { return (cQP < 2 ? 0 : (cQP < 4 ? 1 : (cQP < 6 ? 2 : (cQP < 10 ? 3 : 4)))); } #ifndef ARMOPT_BITIO U32 peekBit16(BitIOInfo* pIO, U32 cBits) { PEEKBIT16(pIO, cBits); } U32 flushBit16(BitIOInfo* pIO, U32 cBits) { FLUSHBIT16(pIO, cBits); } U32 getBit16(BitIOInfo* pIO, U32 cBits) { U32 uiRet = peekBit16(pIO, cBits); flushBit16(pIO, cBits); return uiRet; } U32 getBool16(BitIOInfo* pIO) { U32 uiRet = peekBit16(pIO, 1); flushBit16(pIO, 1); return uiRet; } /** this function returns cBits if zero is read, or a signed value if first cBits are not all zero **/ I32 getBit16s(BitIOInfo* pIO, U32 cBits) { U32 uiRet = peekBit16(pIO, cBits + 1); if (uiRet < 2) { flushBit16(pIO, cBits); return 0; } else { flushBit16(pIO, cBits + 1); if (uiRet & 1) return (-(I32)(uiRet >> 1)); else return (I32)(uiRet >> 1); } } U32 getBit32(BitIOInfo* pIO, U32 cBits) { U32 uiRet = 0; assert(0 <= (I32)cBits && cBits <= 32); if (16 < cBits) { uiRet = getBit16(pIO, 16); cBits -= 16; uiRet <<= cBits; } uiRet |= getBit16(pIO, cBits); return uiRet; } U32 flushToByte(BitIOInfo* pIO) { return flushBit16(pIO, (16 - pIO->cBitsUsed) & 7); } #endif // ARMOPT_BITIO //---------------------------------------------------------------- Void putBit16z(BitIOInfo* pIO, U32 uiBits, U32 cBits) { assert(cBits <= 16); assert(0 == uiBits >> cBits); pIO->uiAccumulator = (pIO->uiAccumulator << cBits) | uiBits; pIO->cBitsUsed += cBits; *(U16*)pIO->pbCurrent = (U16)WRITESWAP_ENDIAN(pIO->uiAccumulator << (32 - pIO->cBitsUsed)); pIO->pbCurrent = MASKPTR(pIO->pbCurrent + ((pIO->cBitsUsed >> 3) & 2), pIO->iMask); pIO->cBitsUsed &= 16 - 1; } Void putBit16(BitIOInfo* pIO, U32 uiBits, U32 cBits) { assert(cBits <= 16); uiBits &= ~(-1 << cBits); putBit16z(pIO, uiBits, cBits); } Void putBit32(BitIOInfo* pIO, U32 uiBits, U32 cBits) { assert(0 <= (I32)cBits && cBits <= 32); if (16 < cBits) { putBit16(pIO, uiBits >> (cBits - 16), 16); cBits -= 16; } putBit16(pIO, uiBits, cBits); } Void fillToByte(BitIOInfo* pIO) { putBit16z(pIO, 0, (16 - pIO->cBitsUsed) & 7); } //---------------------------------------------------------------- U32 getBit16_S(CWMImageStrCodec* pSC, BitIOInfo* pIO, U32 cBits) { U32 rc = getBit16(pIO, cBits); readIS_L1(pSC, pIO); return rc; } U32 putBit16_S(CWMImageStrCodec* pSC, BitIOInfo* pIO, U32 uiBits, U32 cBits) { putBit16(pIO, uiBits, cBits); writeIS_L1(pSC, pIO); return 0; } //---------------------------------------------------------------- // Query buffered data size held in BitIOInfo // Write() for Enc, Read() for Dec //---------------------------------------------------------------- U32 getSizeRead(BitIOInfo* pIO) { return (U32)(UINTPTR_T)(pIO->pbStart + PACKETLENGTH * 2 - pIO->pbCurrent) - pIO->cBitsUsed / 8; } U32 getSizeWrite(BitIOInfo* pIO) { return (U32)(UINTPTR_T)(pIO->pbCurrent + (pIO->pbStart <= pIO->pbCurrent ? 0 : PACKETLENGTH * 2) - pIO->pbStart) + pIO->cBitsUsed / 8; } //---------------------------------------------------------------- // Query stream offset from attached BitIO object for dec //---------------------------------------------------------------- U32 getPosRead(BitIOInfo* pIO) { size_t cbCached = (pIO->pbStart + PACKETLENGTH * 2 - pIO->pbCurrent) - pIO->cBitsUsed / 8; return (U32)(pIO->offRef - cbCached); } //================================================================ // Block I/O functions //================================================================ #ifndef ARMOPT_BITIO ERR attachISRead(BitIOInfo* pIO, struct WMPStream* pWS, CWMImageStrCodec* pSC) { UNREFERENCED_PARAMETER( pSC ); pWS->GetPos(pWS, &pIO->offRef); pIO->pbStart = (U8*)pIO - PACKETLENGTH * 2; pIO->pbCurrent = pIO->pbStart; PERFTIMER_STOP(pSC->m_fMeasurePerf, pSC->m_ptEncDecPerf); pWS->SetPos(pWS, pIO->offRef); pWS->Read(pWS, pIO->pbStart, PACKETLENGTH * 2); PERFTIMER_START(pSC->m_fMeasurePerf, pSC->m_ptEncDecPerf); pIO->offRef += PACKETLENGTH * 2; pIO->uiAccumulator = load4BE(pIO->pbStart); pIO->cBitsUsed = 0; pIO->iMask = ~(PACKETLENGTH * 2); pIO->iMask &= ~1; pIO->pWS = pWS; return WMP_errSuccess; } ERR readIS(CWMImageStrCodec* pSC, BitIOInfo* pIO) { ERR err = WMP_errSuccess; UNREFERENCED_PARAMETER( pSC ); if (PACKET1(pIO->pbStart, pIO->pbCurrent, PACKETLENGTH)) { struct WMPStream *pWS = pIO->pWS; PERFTIMER_STOP(pSC->m_fMeasurePerf, pSC->m_ptEncDecPerf); //Call(0 != pIO->pWS->Read(pIO->pWS, pIO->pbStart, PACKETLENGTH)); // TODO: add error checking code pWS->SetPos(pWS, pIO->offRef); pWS->Read(pWS, pIO->pbStart, PACKETLENGTH); pIO->offRef += PACKETLENGTH; PERFTIMER_START(pSC->m_fMeasurePerf, pSC->m_ptEncDecPerf); // make shadow copy for first 4B pIO->uiShadow = *(U32*)pIO->pbStart; // reposition pbPacket pointer pIO->pbStart = MASKPTR(pIO->pbStart + PACKETLENGTH, pIO->iMask); } //Cleanup: return err; } ERR detachISRead(CWMImageStrCodec* pSC, BitIOInfo* pIO) { ERR err = WMP_errSuccess; struct WMPStream* pWS = pIO->pWS; size_t cbRemain = 0; // we can ONLY detach IStream at byte boundary flushToByte(pIO); assert(0 == (pIO->cBitsUsed % 8)); Call(readIS_L1(pSC, pIO)); // set stream to right offset, undo buffering cbRemain = (pIO->pbStart + PACKETLENGTH * 2) - (pIO->pbCurrent + pIO->cBitsUsed / 8); pWS->SetPos(pWS, pIO->offRef - cbRemain); pIO->pWS = NULL; Cleanup: return err; } #endif // ARMOPT_BITIO //---------------------------------------------------------------- ERR attachISWrite(BitIOInfo* pIO, struct WMPStream* pWS) { pWS->GetPos(pWS, &pIO->offRef); pIO->pbStart = (U8*)pIO - PACKETLENGTH * 2; pIO->pbCurrent = pIO->pbStart; pIO->uiAccumulator = 0; pIO->cBitsUsed = 0; pIO->iMask = ~(PACKETLENGTH * 2); pIO->pWS = pWS; return WMP_errSuccess; } // write out packet if we have >=1 packet data filled ERR writeIS(CWMImageStrCodec* pSC, BitIOInfo* pIO) { ERR err = WMP_errSuccess; UNREFERENCED_PARAMETER( pSC ); if (PACKET1(pIO->pbStart, pIO->pbCurrent, PACKETLENGTH)) { PERFTIMER_STOP(pSC->m_fMeasurePerf, pSC->m_ptEncDecPerf); err = pIO->pWS->Write(pIO->pWS, pIO->pbStart, PACKETLENGTH); PERFTIMER_START(pSC->m_fMeasurePerf, pSC->m_ptEncDecPerf); Call(err); // reposition pbStart pointer pIO->pbStart = MASKPTR(pIO->pbStart + PACKETLENGTH, pIO->iMask); } Cleanup: return err; } // write out partially filled buffer and detach bitIO from IStream ERR detachISWrite(CWMImageStrCodec* pSC, BitIOInfo* pIO) { ERR err = WMP_errSuccess; // we can ONLY detach IStream at byte boundary assert(0 == (pIO->cBitsUsed % 8)); Call(writeIS_L1(pSC, pIO)); PERFTIMER_STOP(pSC->m_fMeasurePerf, pSC->m_ptEncDecPerf); err = pIO->pWS->Write(pIO->pWS, pIO->pbStart, pIO->pbCurrent + pIO->cBitsUsed / 8 - pIO->pbStart); PERFTIMER_START(pSC->m_fMeasurePerf, pSC->m_ptEncDecPerf); Call(err); pIO->pWS = NULL; Cleanup: return err; } //========================= // Performance Measurement //========================= #ifndef DISABLE_PERF_MEASUREMENT void OutputIndivPerfTimer(struct PERFTIMERSTATE *pPerfTimer, char *pszTimerName, char *pszDescription, float fltMegaPixels) { PERFTIMERRESULTS rResults; Bool fResult; fResult = FALSE; printf("%s (%s): ", pszTimerName, pszDescription); if (pPerfTimer) { fResult = PerfTimerGetResults(pPerfTimer, &rResults); if (fResult) { printf("%.3f milliseconds, %.6f MP/sec\n", (float)rResults.iElapsedTime / 1000000, 1000000000 * fltMegaPixels / rResults.iElapsedTime); if (rResults.iZeroTimeIntervals > 0) { printf(" *** WARNING: %d time intervals were measured as zero. " "This perf timer has insufficient precision!\n\n", (int) rResults.iZeroTimeIntervals); } } } if (FALSE == fResult) printf("Results not available!\n"); } void OutputPerfTimerReport(CWMImageStrCodec *pState) { float fltMegaPixels; assert(pState->m_fMeasurePerf); printf("***************************************************************************\n"); printf("* Perf Report\n"); printf("***************************************************************************\n\n"); fltMegaPixels = (float)pState->WMII.cWidth * pState->WMII.cHeight / 1000000; printf("Image Width = %d, Height = %d, total MegaPixels = %.1f MP\n", (int) pState->WMII.cWidth, (int) pState->WMII.cHeight, fltMegaPixels); OutputIndivPerfTimer(pState->m_ptEncDecPerf, "m_ptEncDecPerf", "excl I/O", fltMegaPixels); OutputIndivPerfTimer(pState->m_ptEndToEndPerf, "m_ptEndToEndPerf", "incl I/O", fltMegaPixels); } #endif // DISABLE_PERF_MEASUREMENT