24#include <winpr/wtypes.h> 
   25#include <winpr/assert.h> 
   26#include <winpr/cast.h> 
   28#include <freerdp/config.h> 
   30#include <freerdp/types.h> 
   31#include <freerdp/primitives.h> 
   32#include <freerdp/codec/color.h> 
   33#include "prim_internal.h" 
   36static inline pstatus_t general_LumaToYUV444(
const BYTE* WINPR_RESTRICT pSrcRaw[3],
 
   37                                             const UINT32 srcStep[3],
 
   38                                             BYTE* WINPR_RESTRICT pDstRaw[3],
 
   39                                             const UINT32 dstStep[3],
 
   42  const UINT32 nWidth = roi->right - roi->left;
 
   43  const UINT32 nHeight = roi->bottom - roi->top;
 
   44  const UINT32 halfWidth = (nWidth + 1) / 2;
 
   45  const UINT32 halfHeight = (nHeight + 1) / 2;
 
   46  const UINT32 oddY = 1;
 
   47  const UINT32 evenY = 0;
 
   48  const UINT32 oddX = 1;
 
   49  const UINT32 evenX = 0;
 
   50  const BYTE* pSrc[3] = { pSrcRaw[0] + 1ULL * roi->top * srcStep[0] + roi->left,
 
   51                        pSrcRaw[1] + 1ULL * roi->top / 2 * srcStep[1] + roi->left / 2,
 
   52                        pSrcRaw[2] + 1ULL * roi->top / 2 * srcStep[2] + roi->left / 2 };
 
   53  BYTE* pDst[3] = { pDstRaw[0] + 1ULL * roi->top * dstStep[0] + roi->left,
 
   54                  pDstRaw[1] + 1ULL * roi->top * dstStep[1] + roi->left,
 
   55                  pDstRaw[2] + 1ULL * roi->top * dstStep[2] + roi->left };
 
   59  for (
size_t y = 0; y < nHeight; y++)
 
   61    const BYTE* Ym = pSrc[0] + y * srcStep[0];
 
   62    BYTE* pY = pDst[0] + dstStep[0] * y;
 
   63    memcpy(pY, Ym, nWidth);
 
   68  for (UINT32 y = 0; y < halfHeight; y++)
 
   70    const UINT32 val2y = (2UL * y + evenY);
 
   71    const UINT32 val2y1 = val2y + oddY;
 
   72    const BYTE* Um = pSrc[1] + 1ULL * y * srcStep[1];
 
   73    const BYTE* Vm = pSrc[2] + 1ULL * y * srcStep[2];
 
   74    BYTE* pU = pDst[1] + 1ULL * dstStep[1] * val2y;
 
   75    BYTE* pV = pDst[2] + 1ULL * dstStep[2] * val2y;
 
   76    BYTE* pU1 = pDst[1] + 1ULL * dstStep[1] * val2y1;
 
   77    BYTE* pV1 = pDst[2] + 1ULL * dstStep[2] * val2y1;
 
   79    for (UINT32 x = 0; x < halfWidth; x++)
 
   81      const UINT32 val2x = 2UL * x + evenX;
 
   82      const UINT32 val2x1 = val2x + oddX;
 
   94  return PRIMITIVES_SUCCESS;
 
   97static inline pstatus_t general_ChromaV1ToYUV444(
const BYTE* WINPR_RESTRICT pSrcRaw[3],
 
   98                                                 const UINT32 srcStep[3],
 
   99                                                 BYTE* WINPR_RESTRICT pDstRaw[3],
 
  100                                                 const UINT32 dstStep[3],
 
  103  const UINT32 mod = 16;
 
  106  const UINT32 nWidth = roi->right - roi->left;
 
  107  const UINT32 nHeight = roi->bottom - roi->top;
 
  108  const UINT32 halfWidth = (nWidth) / 2;
 
  109  const UINT32 halfHeight = (nHeight) / 2;
 
  110  const UINT32 oddY = 1;
 
  111  const UINT32 evenY = 0;
 
  112  const UINT32 oddX = 1;
 
  115  const UINT32 padHeigth = nHeight + 16 - nHeight % 16;
 
  116  const BYTE* pSrc[3] = { pSrcRaw[0] + 1ULL * roi->top * srcStep[0] + roi->left,
 
  117                        pSrcRaw[1] + 1ULL * roi->top / 2 * srcStep[1] + roi->left / 2,
 
  118                        pSrcRaw[2] + 1ULL * roi->top / 2 * srcStep[2] + roi->left / 2 };
 
  119  BYTE* pDst[3] = { pDstRaw[0] + 1ULL * roi->top * dstStep[0] + roi->left,
 
  120                  pDstRaw[1] + 1ULL * roi->top * dstStep[1] + roi->left,
 
  121                  pDstRaw[2] + 1ULL * roi->top * dstStep[2] + roi->left };
 
  125  for (
size_t y = 0; y < padHeigth; y++)
 
  127    const BYTE* Ya = pSrc[0] + y * srcStep[0];
 
  130    if ((y) % mod < (mod + 1) / 2)
 
  132      const size_t pos = (2 * uY++ + oddY);
 
  137      pX = pDst[1] + dstStep[1] * pos;
 
  141      const size_t pos = (2 * vY++ + oddY);
 
  146      pX = pDst[2] + dstStep[2] * pos;
 
  149    memcpy(pX, Ya, nWidth);
 
  153  for (UINT32 y = 0; y < halfHeight; y++)
 
  155    const UINT32 val2y = (y * 2UL + evenY);
 
  156    const BYTE* Ua = pSrc[1] + 1ULL * y * srcStep[1];
 
  157    const BYTE* Va = pSrc[2] + 1ULL * y * srcStep[2];
 
  158    BYTE* pU = pDst[1] + 1ULL * dstStep[1] * val2y;
 
  159    BYTE* pV = pDst[2] + 1ULL * dstStep[2] * val2y;
 
  161    for (UINT32 x = 0; x < halfWidth; x++)
 
  163      const UINT32 val2x1 = (x * 2 + oddX);
 
  169  return PRIMITIVES_SUCCESS;
 
  172static inline pstatus_t general_ChromaV2ToYUV444(
const BYTE* WINPR_RESTRICT pSrc[3],
 
  173                                                 const UINT32 srcStep[3], UINT32 nTotalWidth,
 
  174                                                 WINPR_ATTR_UNUSED UINT32 nTotalHeight,
 
  175                                                 BYTE* WINPR_RESTRICT pDst[3],
 
  176                                                 const UINT32 dstStep[3],
 
  179  const UINT32 nWidth = roi->right - roi->left;
 
  180  const UINT32 nHeight = roi->bottom - roi->top;
 
  181  const UINT32 halfWidth = (nWidth + 1) / 2;
 
  182  const UINT32 halfHeight = (nHeight + 1) / 2;
 
  183  const UINT32 quaterWidth = (nWidth + 3) / 4;
 
  186  for (UINT32 y = 0; y < nHeight; y++)
 
  188    const UINT32 yTop = y + roi->top;
 
  189    const BYTE* pYaU = pSrc[0] + 1ULL * srcStep[0] * yTop + roi->left / 2;
 
  190    const BYTE* pYaV = pYaU + nTotalWidth / 2;
 
  191    BYTE* pU = pDst[1] + 1ULL * dstStep[1] * yTop + roi->left;
 
  192    BYTE* pV = pDst[2] + 1ULL * dstStep[2] * yTop + roi->left;
 
  194    for (UINT32 x = 0; x < halfWidth; x++)
 
  196      const UINT32 odd = 2UL * x + 1UL;
 
  203  for (
size_t y = 0; y < halfHeight; y++)
 
  205    const BYTE* pUaU = pSrc[1] + srcStep[1] * (y + roi->top / 2) + roi->left / 4;
 
  206    const BYTE* pUaV = pUaU + nTotalWidth / 4;
 
  207    const BYTE* pVaU = pSrc[2] + srcStep[2] * (y + roi->top / 2) + roi->left / 4;
 
  208    const BYTE* pVaV = pVaU + nTotalWidth / 4;
 
  209    BYTE* pU = pDst[1] + 1ULL * dstStep[1] * (2ULL * y + 1 + roi->top) + roi->left;
 
  210    BYTE* pV = pDst[2] + 1ULL * dstStep[2] * (2ULL * y + 1 + roi->top) + roi->left;
 
  212    for (
size_t x = 0; x < quaterWidth; x++)
 
  214      pU[4 * x + 0] = *pUaU++;
 
  215      pV[4 * x + 0] = *pUaV++;
 
  216      pU[4 * x + 2] = *pVaU++;
 
  217      pV[4 * x + 2] = *pVaV++;
 
  221  return PRIMITIVES_SUCCESS;
 
  224static pstatus_t general_YUV420CombineToYUV444(avc444_frame_type type,
 
  225                                               const BYTE* WINPR_RESTRICT pSrc[3],
 
  226                                               const UINT32 srcStep[3], UINT32 nWidth,
 
  227                                               UINT32 nHeight, BYTE* WINPR_RESTRICT pDst[3],
 
  228                                               const UINT32 dstStep[3],
 
  231  if (!pSrc || !pSrc[0] || !pSrc[1] || !pSrc[2])
 
  234  if (!pDst || !pDst[0] || !pDst[1] || !pDst[2])
 
  243      return general_LumaToYUV444(pSrc, srcStep, pDst, dstStep, roi);
 
  245    case AVC444_CHROMAv1:
 
  246      return general_ChromaV1ToYUV444(pSrc, srcStep, pDst, dstStep, roi);
 
  248    case AVC444_CHROMAv2:
 
  249      return general_ChromaV2ToYUV444(pSrc, srcStep, nWidth, nHeight, pDst, dstStep, roi);
 
  257general_YUV444SplitToYUV420(
const BYTE* WINPR_RESTRICT pSrc[3], 
const UINT32 srcStep[3],
 
  258                            BYTE* WINPR_RESTRICT pMainDst[3], 
const UINT32 dstMainStep[3],
 
  259                            BYTE* WINPR_RESTRICT pAuxDst[3], 
const UINT32 dstAuxStep[3],
 
  267  const UINT32 padHeigth = roi->height + 16 - roi->height % 16;
 
  268  const UINT32 halfWidth = (roi->width + 1) / 2;
 
  269  const UINT32 halfHeight = (roi->height + 1) / 2;
 
  272  for (
size_t y = 0; y < roi->height; y++)
 
  274    const BYTE* pSrcY = pSrc[0] + y * srcStep[0];
 
  275    BYTE* pY = pMainDst[0] + y * dstMainStep[0];
 
  276    memcpy(pY, pSrcY, roi->width);
 
  280  for (
size_t y = 0; y < halfHeight; y++)
 
  282    const BYTE* pSrcU = pSrc[1] + 2ULL * y * srcStep[1];
 
  283    const BYTE* pSrcV = pSrc[2] + 2ULL * y * srcStep[2];
 
  284    BYTE* pU = pMainDst[1] + y * dstMainStep[1];
 
  285    BYTE* pV = pMainDst[2] + y * dstMainStep[2];
 
  287    for (
size_t x = 0; x < halfWidth; x++)
 
  289      pU[x] = pSrcV[2 * x];
 
  290      pV[x] = pSrcU[2 * x];
 
  295  for (
size_t y = 0; y < padHeigth; y++)
 
  297    BYTE* pY = pAuxDst[0] + y * dstAuxStep[0];
 
  301      const size_t pos = (2 * uY++ + 1);
 
  302      const BYTE* pSrcU = pSrc[1] + pos * srcStep[1];
 
  304      if (pos >= roi->height)
 
  307      memcpy(pY, pSrcU, roi->width);
 
  311      const size_t pos = (2 * vY++ + 1);
 
  312      const BYTE* pSrcV = pSrc[2] + pos * srcStep[2];
 
  314      if (pos >= roi->height)
 
  317      memcpy(pY, pSrcV, roi->width);
 
  322  for (
size_t y = 0; y < halfHeight; y++)
 
  324    const BYTE* pSrcU = pSrc[1] + 2 * y * srcStep[1];
 
  325    const BYTE* pSrcV = pSrc[2] + 2 * y * srcStep[2];
 
  326    BYTE* pU = pAuxDst[1] + y * dstAuxStep[1];
 
  327    BYTE* pV = pAuxDst[2] + y * dstAuxStep[2];
 
  329    for (
size_t x = 0; x < halfWidth; x++)
 
  331      pU[x] = pSrcU[2 * x + 1];
 
  332      pV[x] = pSrcV[2 * x + 1];
 
  336  return PRIMITIVES_SUCCESS;
 
  339static inline void general_YUV444ToRGB_DOUBLE_ROW(BYTE* WINPR_RESTRICT pRGB[2], UINT32 DstFormat,
 
  340                                                  const BYTE* WINPR_RESTRICT pY[2],
 
  341                                                  const BYTE* WINPR_RESTRICT pU[2],
 
  342                                                  const BYTE* WINPR_RESTRICT pV[2], 
size_t nWidth)
 
  344  fkt_writePixel writePixel = getPixelWriteFunction(DstFormat, FALSE);
 
  346  WINPR_ASSERT(nWidth % 2 == 0);
 
  347  for (
size_t x = 0; x < nWidth; x += 2)
 
  349    for (
size_t i = 0; i < 2; i++)
 
  351      for (
size_t j = 0; j < 2; j++)
 
  353        const BYTE y = pY[i][x + j];
 
  354        INT32 u = pU[i][x + j];
 
  355        INT32 v = pV[i][x + j];
 
  356        if ((i == 0) && (j == 0))
 
  358          const INT32 subU = (INT32)pU[0][x + 1] + pU[1][x] + pU[1][x + 1];
 
  359          const INT32 avgU = ((4 * u) - subU);
 
  360          u = CONDITIONAL_CLIP(avgU, WINPR_ASSERTING_INT_CAST(BYTE, u));
 
  362          const INT32 subV = (INT32)pV[0][x + 1] + pV[1][x] + pV[1][x + 1];
 
  363          const INT32 avgV = ((4 * v) - subV);
 
  364          v = CONDITIONAL_CLIP(avgV, WINPR_ASSERTING_INT_CAST(BYTE, v));
 
  366        pRGB[i] = writeYUVPixel(pRGB[i], DstFormat, y, u, v, writePixel);
 
  372static inline void general_YUV444ToRGB_SINGLE_ROW(BYTE* WINPR_RESTRICT pRGB, UINT32 DstFormat,
 
  373                                                  const BYTE* WINPR_RESTRICT pY,
 
  374                                                  const BYTE* WINPR_RESTRICT pU,
 
  375                                                  const BYTE* WINPR_RESTRICT pV, 
size_t nWidth)
 
  377  fkt_writePixel writePixel = getPixelWriteFunction(DstFormat, FALSE);
 
  379  WINPR_ASSERT(nWidth % 2 == 0);
 
  380  for (
size_t x = 0; x < nWidth; x += 2)
 
  382    for (
size_t j = 0; j < 2; j++)
 
  384      const BYTE y = pY[x + j];
 
  385      const BYTE u = pU[x + j];
 
  386      const BYTE v = pV[x + j];
 
  387      pRGB = writeYUVPixel(pRGB, DstFormat, y, u, v, writePixel);
 
  392static inline pstatus_t general_YUV444ToRGB_8u_P3AC4R_general(
const BYTE* WINPR_RESTRICT pSrc[3],
 
  393                                                              const UINT32 srcStep[3],
 
  394                                                              BYTE* WINPR_RESTRICT pDst,
 
  395                                                              UINT32 dstStep, UINT32 DstFormat,
 
  402  const UINT32 nWidth = roi->width;
 
  403  const UINT32 nHeight = roi->height;
 
  406  for (; y < nHeight - nHeight % 2; y += 2)
 
  408    const BYTE* WINPR_RESTRICT pY[2] = { pSrc[0] + y * srcStep[0],
 
  409                                       pSrc[0] + (y + 1) * srcStep[0] };
 
  410    const BYTE* WINPR_RESTRICT pU[2] = { pSrc[1] + y * srcStep[1],
 
  411                                       pSrc[1] + (y + 1) * srcStep[1] };
 
  412    const BYTE* WINPR_RESTRICT pV[2] = { pSrc[2] + y * srcStep[2],
 
  413                                       pSrc[2] + (y + 1) * srcStep[2] };
 
  414    BYTE* WINPR_RESTRICT pRGB[] = { pDst + y * dstStep, pDst + (y + 1) * dstStep };
 
  416    general_YUV444ToRGB_DOUBLE_ROW(pRGB, DstFormat, pY, pU, pV, nWidth);
 
  418  for (; y < nHeight; y++)
 
  420    const BYTE* WINPR_RESTRICT pY = pSrc[0] + y * srcStep[0];
 
  421    const BYTE* WINPR_RESTRICT pU = pSrc[1] + y * srcStep[1];
 
  422    const BYTE* WINPR_RESTRICT pV = pSrc[2] + y * srcStep[2];
 
  423    BYTE* WINPR_RESTRICT pRGB = pDst + y * dstStep;
 
  425    general_YUV444ToRGB_SINGLE_ROW(pRGB, DstFormat, pY, pU, pV, nWidth);
 
  428  return PRIMITIVES_SUCCESS;
 
  431static inline void general_YUV444ToBGRX_DOUBLE_ROW(BYTE* WINPR_RESTRICT pRGB[2], UINT32 DstFormat,
 
  432                                                   const BYTE* WINPR_RESTRICT pY[2],
 
  433                                                   const BYTE* WINPR_RESTRICT pU[2],
 
  434                                                   const BYTE* WINPR_RESTRICT pV[2], 
size_t nWidth)
 
  436  WINPR_ASSERT(nWidth % 2 == 0);
 
  437  for (
size_t x = 0; x < nWidth; x += 2)
 
  439    const INT32 subU = pU[0][x + 1] + pU[1][x] + pU[1][x + 1];
 
  440    const INT32 avgU = ((4 * pU[0][x]) - subU);
 
  441    const BYTE useU = CONDITIONAL_CLIP(avgU, pU[0][x]);
 
  442    const INT32 subV = pV[0][x + 1] + pV[1][x] + pV[1][x + 1];
 
  443    const INT32 avgV = ((4 * pV[0][x]) - subV);
 
  444    const BYTE useV = CONDITIONAL_CLIP(avgV, pV[0][x]);
 
  446    const BYTE U[2][2] = { { useU, pU[0][x + 1] }, { pU[1][x], pU[1][x + 1] } };
 
  447    const BYTE V[2][2] = { { useV, pV[0][x + 1] }, { pV[1][x], pV[1][x + 1] } };
 
  449    for (
size_t i = 0; i < 2; i++)
 
  451      for (
size_t j = 0; j < 2; j++)
 
  453        const BYTE y = pY[i][x + j];
 
  454        const BYTE u = U[i][j];
 
  455        const BYTE v = V[i][j];
 
  456        pRGB[i] = writeYUVPixel(pRGB[i], DstFormat, y, u, v, writePixelBGRX);
 
  462static inline void general_YUV444ToBGRX_SINGLE_ROW(BYTE* WINPR_RESTRICT pRGB, UINT32 DstFormat,
 
  463                                                   const BYTE* WINPR_RESTRICT pY,
 
  464                                                   const BYTE* WINPR_RESTRICT pU,
 
  465                                                   const BYTE* WINPR_RESTRICT pV, 
size_t nWidth)
 
  467  WINPR_ASSERT(nWidth % 2 == 0);
 
  468  for (
size_t x = 0; x < nWidth; x += 2)
 
  470    for (
size_t j = 0; j < 2; j++)
 
  472      const BYTE Y = pY[x + j];
 
  473      const BYTE U = pU[x + j];
 
  474      const BYTE V = pV[x + j];
 
  475      pRGB = writeYUVPixel(pRGB, DstFormat, Y, U, V, writePixelBGRX);
 
  480static inline pstatus_t general_YUV444ToRGB_8u_P3AC4R_BGRX(
const BYTE* WINPR_RESTRICT pSrc[3],
 
  481                                                           const UINT32 srcStep[3],
 
  482                                                           BYTE* WINPR_RESTRICT pDst,
 
  483                                                           UINT32 dstStep, UINT32 DstFormat,
 
  490  const UINT32 nWidth = roi->width;
 
  491  const UINT32 nHeight = roi->height;
 
  494  for (; y < nHeight - nHeight % 2; y += 2)
 
  496    const BYTE* pY[2] = { pSrc[0] + y * srcStep[0], pSrc[0] + (y + 1) * srcStep[0] };
 
  497    const BYTE* pU[2] = { pSrc[1] + y * srcStep[1], pSrc[1] + (y + 1) * srcStep[1] };
 
  498    const BYTE* pV[2] = { pSrc[2] + y * srcStep[2], pSrc[2] + (y + 1) * srcStep[2] };
 
  499    BYTE* pRGB[] = { pDst + y * dstStep, pDst + (y + 1) * dstStep };
 
  501    general_YUV444ToBGRX_DOUBLE_ROW(pRGB, DstFormat, pY, pU, pV, nWidth);
 
  504  for (; y < nHeight; y++)
 
  506    const BYTE* WINPR_RESTRICT pY = pSrc[0] + y * srcStep[0];
 
  507    const BYTE* WINPR_RESTRICT pU = pSrc[1] + y * srcStep[1];
 
  508    const BYTE* WINPR_RESTRICT pV = pSrc[2] + y * srcStep[2];
 
  509    BYTE* WINPR_RESTRICT pRGB = pDst + y * dstStep;
 
  511    general_YUV444ToBGRX_SINGLE_ROW(pRGB, DstFormat, pY, pU, pV, nWidth);
 
  513  return PRIMITIVES_SUCCESS;
 
  516static pstatus_t general_YUV444ToRGB_8u_P3AC4R(
const BYTE* WINPR_RESTRICT pSrc[3],
 
  517                                               const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst,
 
  518                                               UINT32 dstStep, UINT32 DstFormat,
 
  523    case PIXEL_FORMAT_BGRA32:
 
  524    case PIXEL_FORMAT_BGRX32:
 
  525      return general_YUV444ToRGB_8u_P3AC4R_BGRX(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
 
  528      return general_YUV444ToRGB_8u_P3AC4R_general(pSrc, srcStep, pDst, dstStep, DstFormat,
 
  537static void general_YUV420ToRGB_8u_P3AC4R_double_line(BYTE* WINPR_RESTRICT pEven,
 
  538                                                      BYTE* WINPR_RESTRICT pOdd, UINT32 DstFormat,
 
  539                                                      const BYTE* WINPR_RESTRICT pYeven,
 
  540                                                      const BYTE* WINPR_RESTRICT pYodd,
 
  541                                                      const BYTE* WINPR_RESTRICT pU,
 
  542                                                      const BYTE* WINPR_RESTRICT pV, UINT32 width,
 
  543                                                      fkt_writePixel writePixel, UINT32 formatSize)
 
  547  for (; x < width / 2; x++)
 
  549    const BYTE U = pU[x];
 
  550    const BYTE V = pV[x];
 
  551    const BYTE eY0 = pYeven[2ULL * x + 0];
 
  552    const BYTE eY1 = pYeven[2ULL * x + 1];
 
  553    writeYUVPixel(&pEven[2ULL * x * formatSize], DstFormat, eY0, U, V, writePixel);
 
  554    writeYUVPixel(&pEven[(2ULL * x + 1) * formatSize], DstFormat, eY1, U, V, writePixel);
 
  556    const BYTE oY0 = pYodd[2ULL * x + 0];
 
  557    const BYTE oY1 = pYodd[2ULL * x + 1];
 
  558    writeYUVPixel(&pOdd[2ULL * x * formatSize], DstFormat, oY0, U, V, writePixel);
 
  559    writeYUVPixel(&pOdd[(2ULL * x + 1) * formatSize], DstFormat, oY1, U, V, writePixel);
 
  562  for (; x < (width + 1) / 2; x++)
 
  564    const BYTE U = pU[x];
 
  565    const BYTE V = pV[x];
 
  566    const BYTE eY0 = pYeven[2ULL * x + 0];
 
  567    writeYUVPixel(&pEven[2ULL * x * formatSize], DstFormat, eY0, U, V, writePixel);
 
  569    const BYTE oY0 = pYodd[2ULL * x + 0];
 
  570    writeYUVPixel(&pOdd[2ULL * x * formatSize], DstFormat, oY0, U, V, writePixel);
 
  574static void general_YUV420ToRGB_8u_P3AC4R_single_line(BYTE* WINPR_RESTRICT pEven, UINT32 DstFormat,
 
  575                                                      const BYTE* WINPR_RESTRICT pYeven,
 
  576                                                      const BYTE* WINPR_RESTRICT pU,
 
  577                                                      const BYTE* WINPR_RESTRICT pV, UINT32 width,
 
  578                                                      fkt_writePixel writePixel, UINT32 formatSize)
 
  582  for (; x < width / 2; x++)
 
  584    const BYTE U = pU[x];
 
  585    const BYTE V = pV[x];
 
  586    const BYTE eY0 = pYeven[2ULL * x + 0];
 
  587    const BYTE eY1 = pYeven[2ULL * x + 1];
 
  588    writeYUVPixel(&pEven[2ULL * x * formatSize], DstFormat, eY0, U, V, writePixel);
 
  589    writeYUVPixel(&pEven[(2ULL * x + 1) * formatSize], DstFormat, eY1, U, V, writePixel);
 
  592  for (; x < (width + 1) / 2; x++)
 
  594    const BYTE U = pU[x];
 
  595    const BYTE V = pV[x];
 
  596    const BYTE eY0 = pYeven[2ULL * x + 0];
 
  597    writeYUVPixel(&pEven[2ULL * x * formatSize], DstFormat, eY0, U, V, writePixel);
 
  601static pstatus_t general_YUV420ToRGB_8u_P3AC4R(
const BYTE* WINPR_RESTRICT pSrc[3],
 
  602                                               const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst,
 
  603                                               UINT32 dstStep, UINT32 DstFormat,
 
  607  const DWORD formatSize = FreeRDPGetBytesPerPixel(DstFormat);
 
  608  fkt_writePixel writePixel = getPixelWriteFunction(DstFormat, FALSE);
 
  609  const UINT32 nWidth = roi->width;
 
  610  const UINT32 nHeight = roi->height;
 
  613  for (; y < nHeight / 2; y++)
 
  615    const BYTE* pYe = &pSrc[0][(2ULL * y + 0) * srcStep[0]];
 
  616    const BYTE* pYo = &pSrc[0][(2ULL * y + 1) * srcStep[0]];
 
  617    const BYTE* pU = &pSrc[1][1ULL * srcStep[1] * y];
 
  618    const BYTE* pV = &pSrc[2][1ULL * srcStep[2] * y];
 
  619    BYTE* pRGBeven = &pDst[2ULL * y * dstStep];
 
  620    BYTE* pRGBodd = &pDst[(2ULL * y + 1) * dstStep];
 
  621    general_YUV420ToRGB_8u_P3AC4R_double_line(pRGBeven, pRGBodd, DstFormat, pYe, pYo, pU, pV,
 
  622                                              nWidth, writePixel, formatSize);
 
  626  for (; y < (nHeight + 1) / 2; y++)
 
  628    const BYTE* pY = &pSrc[0][2ULL * srcStep[0] * y];
 
  629    const BYTE* pU = &pSrc[1][1ULL * srcStep[1] * y];
 
  630    const BYTE* pV = &pSrc[2][1ULL * srcStep[2] * y];
 
  631    BYTE* pEven = &pDst[2ULL * y * dstStep];
 
  633    general_YUV420ToRGB_8u_P3AC4R_single_line(pEven, DstFormat, pY, pU, pV, nWidth, writePixel,
 
  637  return PRIMITIVES_SUCCESS;
 
  640static inline void BGRX_fillYUV(
size_t offset, 
const BYTE* WINPR_RESTRICT pRGB[2],
 
  641                                BYTE* WINPR_RESTRICT pY[2], BYTE* WINPR_RESTRICT pU[2],
 
  642                                BYTE* WINPR_RESTRICT pV[2])
 
  649  const UINT32 SrcFormat = PIXEL_FORMAT_BGRX32;
 
  650  const UINT32 bpp = 4;
 
  652  for (
size_t i = 0; i < 2; i++)
 
  654    for (
size_t j = 0; j < 2; j++)
 
  659      const UINT32 color = FreeRDPReadColor(&pRGB[i][(offset + j) * bpp], SrcFormat);
 
  660      FreeRDPSplitColor(color, SrcFormat, &R, &G, &B, NULL, NULL);
 
  661      pY[i][offset + j] = RGB2Y(R, G, B);
 
  662      pU[i][offset + j] = RGB2U(R, G, B);
 
  663      pV[i][offset + j] = RGB2V(R, G, B);
 
  668  const INT32 avgU = (pU[0][offset] + pU[0][offset + 1] + pU[1][offset] + pU[1][offset + 1]) / 4;
 
  669  pU[0][offset] = CONDITIONAL_CLIP(avgU, pU[0][offset]);
 
  670  const INT32 avgV = (pV[0][offset] + pV[0][offset + 1] + pV[1][offset] + pV[1][offset + 1]) / 4;
 
  671  pV[0][offset] = CONDITIONAL_CLIP(avgV, pV[0][offset]);
 
  674static inline void BGRX_fillYUV_single(
size_t offset, 
const BYTE* WINPR_RESTRICT pRGB,
 
  675                                       BYTE* WINPR_RESTRICT pY, BYTE* WINPR_RESTRICT pU,
 
  676                                       BYTE* WINPR_RESTRICT pV)
 
  683  const UINT32 SrcFormat = PIXEL_FORMAT_BGRX32;
 
  684  const UINT32 bpp = 4;
 
  686  for (
size_t j = 0; j < 2; j++)
 
  691    const UINT32 color = FreeRDPReadColor(&pRGB[(offset + j) * bpp], SrcFormat);
 
  692    FreeRDPSplitColor(color, SrcFormat, &R, &G, &B, NULL, NULL);
 
  693    pY[offset + j] = RGB2Y(R, G, B);
 
  694    pU[offset + j] = RGB2U(R, G, B);
 
  695    pV[offset + j] = RGB2V(R, G, B);
 
  699static inline void general_BGRXToYUV444_DOUBLE_ROW(
const BYTE* WINPR_RESTRICT pRGB[2],
 
  700                                                   BYTE* WINPR_RESTRICT pY[2],
 
  701                                                   BYTE* WINPR_RESTRICT pU[2],
 
  702                                                   BYTE* WINPR_RESTRICT pV[2], UINT32 nWidth)
 
  705  WINPR_ASSERT((nWidth % 2) == 0);
 
  706  for (
size_t x = 0; x < nWidth; x += 2)
 
  708    BGRX_fillYUV(x, pRGB, pY, pU, pV);
 
  712static inline void general_BGRXToYUV444_SINGLE_ROW(
const BYTE* WINPR_RESTRICT pRGB,
 
  713                                                   BYTE* WINPR_RESTRICT pY, BYTE* WINPR_RESTRICT pU,
 
  714                                                   BYTE* WINPR_RESTRICT pV, UINT32 nWidth)
 
  717  WINPR_ASSERT((nWidth % 2) == 0);
 
  718  for (
size_t x = 0; x < nWidth; x += 2)
 
  720    BGRX_fillYUV_single(x, pRGB, pY, pU, pV);
 
  724static inline pstatus_t general_RGBToYUV444_8u_P3AC4R_BGRX(
const BYTE* WINPR_RESTRICT pSrc,
 
  725                                                           const UINT32 srcStep,
 
  726                                                           BYTE* WINPR_RESTRICT pDst[3],
 
  727                                                           const UINT32 dstStep[3],
 
  730  const UINT32 nWidth = roi->width;
 
  731  const UINT32 nHeight = roi->height;
 
  734  for (; y < nHeight - nHeight % 2; y += 2)
 
  736    const BYTE* pRGB[] = { pSrc + y * srcStep, pSrc + (y + 1) * srcStep };
 
  737    BYTE* pY[] = { pDst[0] + y * dstStep[0], pDst[0] + (y + 1) * dstStep[0] };
 
  738    BYTE* pU[] = { pDst[1] + y * dstStep[1], pDst[1] + (y + 1) * dstStep[1] };
 
  739    BYTE* pV[] = { pDst[2] + y * dstStep[2], pDst[2] + (y + 1) * dstStep[2] };
 
  741    general_BGRXToYUV444_DOUBLE_ROW(pRGB, pY, pU, pV, nWidth);
 
  744  for (; y < nHeight; y++)
 
  746    const BYTE* pRGB = pSrc + y * srcStep;
 
  747    BYTE* pY = pDst[0] + y * dstStep[0];
 
  748    BYTE* pU = pDst[1] + y * dstStep[1];
 
  749    BYTE* pV = pDst[2] + y * dstStep[2];
 
  751    general_BGRXToYUV444_SINGLE_ROW(pRGB, pY, pU, pV, nWidth);
 
  754  return PRIMITIVES_SUCCESS;
 
  757static inline void fillYUV(
size_t offset, 
const BYTE* WINPR_RESTRICT pRGB[2], UINT32 SrcFormat,
 
  758                           BYTE* WINPR_RESTRICT pY[2], BYTE* WINPR_RESTRICT pU[2],
 
  759                           BYTE* WINPR_RESTRICT pV[2])
 
  765  const UINT32 bpp = FreeRDPGetBytesPerPixel(SrcFormat);
 
  769  for (
size_t i = 0; i < 2; i++)
 
  771    for (
size_t j = 0; j < 2; j++)
 
  776      const UINT32 color = FreeRDPReadColor(&pRGB[i][(offset + j) * bpp], SrcFormat);
 
  777      FreeRDPSplitColor(color, SrcFormat, &R, &G, &B, NULL, NULL);
 
  778      const BYTE y = RGB2Y(R, G, B);
 
  779      const BYTE u = RGB2U(R, G, B);
 
  780      const BYTE v = RGB2V(R, G, B);
 
  783      pY[i][offset + j] = y;
 
  784      pU[i][offset + j] = u;
 
  785      pV[i][offset + j] = v;
 
  791  pU[0][offset] = CLIP(avgU);
 
  794  pV[0][offset] = CLIP(avgV);
 
  797static inline void fillYUV_single(
size_t offset, 
const BYTE* WINPR_RESTRICT pRGB, UINT32 SrcFormat,
 
  798                                  BYTE* WINPR_RESTRICT pY, BYTE* WINPR_RESTRICT pU,
 
  799                                  BYTE* WINPR_RESTRICT pV)
 
  805  const UINT32 bpp = FreeRDPGetBytesPerPixel(SrcFormat);
 
  807  for (
size_t j = 0; j < 2; j++)
 
  812    const UINT32 color = FreeRDPReadColor(&pRGB[(offset + j) * bpp], SrcFormat);
 
  813    FreeRDPSplitColor(color, SrcFormat, &R, &G, &B, NULL, NULL);
 
  814    const BYTE y = RGB2Y(R, G, B);
 
  815    const BYTE u = RGB2U(R, G, B);
 
  816    const BYTE v = RGB2V(R, G, B);
 
  823static inline void general_RGBToYUV444_DOUBLE_ROW(
const BYTE* WINPR_RESTRICT pRGB[2],
 
  824                                                  UINT32 SrcFormat, BYTE* WINPR_RESTRICT pY[2],
 
  825                                                  BYTE* WINPR_RESTRICT pU[2],
 
  826                                                  BYTE* WINPR_RESTRICT pV[2], UINT32 nWidth)
 
  829  WINPR_ASSERT((nWidth % 2) == 0);
 
  830  for (
size_t x = 0; x < nWidth; x += 2)
 
  832    fillYUV(x, pRGB, SrcFormat, pY, pU, pV);
 
  836static inline void general_RGBToYUV444_SINGLE_ROW(
const BYTE* WINPR_RESTRICT pRGB, UINT32 SrcFormat,
 
  837                                                  BYTE* WINPR_RESTRICT pY, BYTE* WINPR_RESTRICT pU,
 
  838                                                  BYTE* WINPR_RESTRICT pV, UINT32 nWidth)
 
  841  WINPR_ASSERT((nWidth % 2) == 0);
 
  842  for (
size_t x = 0; x < nWidth; x += 2)
 
  844    fillYUV_single(x, pRGB, SrcFormat, pY, pU, pV);
 
  848static inline pstatus_t general_RGBToYUV444_8u_P3AC4R_RGB(
const BYTE* WINPR_RESTRICT pSrc,
 
  849                                                          UINT32 SrcFormat, 
const UINT32 srcStep,
 
  850                                                          BYTE* WINPR_RESTRICT pDst[3],
 
  851                                                          const UINT32 dstStep[3],
 
  854  const UINT32 nWidth = roi->width;
 
  855  const UINT32 nHeight = roi->height;
 
  858  for (; y < nHeight - nHeight % 2; y += 2)
 
  860    const BYTE* pRGB[] = { pSrc + y * srcStep, pSrc + (y + 1) * srcStep };
 
  861    BYTE* pY[] = { &pDst[0][y * dstStep[0]], &pDst[0][(y + 1) * dstStep[0]] };
 
  862    BYTE* pU[] = { &pDst[1][y * dstStep[1]], &pDst[1][(y + 1) * dstStep[1]] };
 
  863    BYTE* pV[] = { &pDst[2][y * dstStep[2]], &pDst[2][(y + 1) * dstStep[2]] };
 
  865    general_RGBToYUV444_DOUBLE_ROW(pRGB, SrcFormat, pY, pU, pV, nWidth);
 
  867  for (; y < nHeight; y++)
 
  869    const BYTE* pRGB = pSrc + y * srcStep;
 
  870    BYTE* pY = &pDst[0][y * dstStep[0]];
 
  871    BYTE* pU = &pDst[1][y * dstStep[1]];
 
  872    BYTE* pV = &pDst[2][y * dstStep[2]];
 
  874    general_RGBToYUV444_SINGLE_ROW(pRGB, SrcFormat, pY, pU, pV, nWidth);
 
  877  return PRIMITIVES_SUCCESS;
 
  880static pstatus_t general_RGBToYUV444_8u_P3AC4R(
const BYTE* WINPR_RESTRICT pSrc, UINT32 SrcFormat,
 
  881                                               const UINT32 srcStep, BYTE* WINPR_RESTRICT pDst[3],
 
  882                                               const UINT32 dstStep[3],
 
  887    case PIXEL_FORMAT_BGRA32:
 
  888    case PIXEL_FORMAT_BGRX32:
 
  889      return general_RGBToYUV444_8u_P3AC4R_BGRX(pSrc, srcStep, pDst, dstStep, roi);
 
  891      return general_RGBToYUV444_8u_P3AC4R_RGB(pSrc, SrcFormat, srcStep, pDst, dstStep, roi);
 
  895static inline pstatus_t general_RGBToYUV420_BGRX(
const BYTE* WINPR_RESTRICT pSrc, UINT32 srcStep,
 
  896                                                 BYTE* WINPR_RESTRICT pDst[3],
 
  897                                                 const UINT32 dstStep[3],
 
  903  size_t x4 = srcStep + 4;
 
  906  size_t y3 = dstStep[0];
 
  907  size_t y4 = dstStep[0] + 1;
 
  908  UINT32 max_x = roi->width - 1;
 
  911  for (
size_t i = 0; y < roi->height - roi->height % 2; y += 2, i++)
 
  913    const BYTE* src = pSrc + y * srcStep;
 
  914    BYTE* ydst = pDst[0] + y * dstStep[0];
 
  915    BYTE* udst = pDst[1] + i * dstStep[1];
 
  916    BYTE* vdst = pDst[2] + i * dstStep[2];
 
  918    for (
size_t x = 0; x < roi->width; x += 2)
 
  927      Ba = B = *(src + x1 + 0);
 
  928      Ga = G = *(src + x1 + 1);
 
  929      Ra = R = *(src + x1 + 2);
 
  930      ydst[y1] = RGB2Y(R, G, B);
 
  935        Ba += B = *(src + x2 + 0);
 
  936        Ga += G = *(src + x2 + 1);
 
  937        Ra += R = *(src + x2 + 2);
 
  938        ydst[y2] = RGB2Y(R, G, B);
 
  942      Ba += B = *(src + x3 + 0);
 
  943      Ga += G = *(src + x3 + 1);
 
  944      Ra += R = *(src + x3 + 2);
 
  945      ydst[y3] = RGB2Y(R, G, B);
 
  950        Ba += B = *(src + x4 + 0);
 
  951        Ga += G = *(src + x4 + 1);
 
  952        Ra += R = *(src + x4 + 2);
 
  953        ydst[y4] = RGB2Y(R, G, B);
 
  959      *udst++ = RGB2U(Ra, Ga, Ba);
 
  960      *vdst++ = RGB2V(Ra, Ga, Ba);
 
  966  for (; y < roi->height; y++)
 
  968    const BYTE* src = pSrc + y * srcStep;
 
  969    BYTE* ydst = pDst[0] + y * dstStep[0];
 
  970    BYTE* udst = pDst[1] + (y / 2) * dstStep[1];
 
  971    BYTE* vdst = pDst[2] + (y / 2) * dstStep[2];
 
  973    for (
size_t x = 0; x < roi->width; x += 2)
 
  982      Ba = B = *(src + x1 + 0);
 
  983      Ga = G = *(src + x1 + 1);
 
  984      Ra = R = *(src + x1 + 2);
 
  985      ydst[y1] = RGB2Y(R, G, B);
 
  990        Ba += B = *(src + x2 + 0);
 
  991        Ga += G = *(src + x2 + 1);
 
  992        Ra += R = *(src + x2 + 2);
 
  993        ydst[y2] = RGB2Y(R, G, B);
 
  999      *udst++ = RGB2U(Ra, Ga, Ba);
 
 1000      *vdst++ = RGB2V(Ra, Ga, Ba);
 
 1006  return PRIMITIVES_SUCCESS;
 
 1009static inline pstatus_t general_RGBToYUV420_RGBX(
const BYTE* WINPR_RESTRICT pSrc, UINT32 srcStep,
 
 1010                                                 BYTE* WINPR_RESTRICT pDst[3],
 
 1011                                                 const UINT32 dstStep[3],
 
 1016  size_t x3 = srcStep;
 
 1017  size_t x4 = srcStep + 4;
 
 1020  size_t y3 = dstStep[0];
 
 1021  size_t y4 = dstStep[0] + 1;
 
 1022  UINT32 max_x = roi->width - 1;
 
 1025  for (
size_t i = 0; y < roi->height - roi->height % 2; y += 2, i++)
 
 1027    const BYTE* src = pSrc + y * srcStep;
 
 1028    BYTE* ydst = pDst[0] + y * dstStep[0];
 
 1029    BYTE* udst = pDst[1] + i * dstStep[1];
 
 1030    BYTE* vdst = pDst[2] + i * dstStep[2];
 
 1032    for (UINT32 x = 0; x < roi->width; x += 2)
 
 1034      BYTE R = *(src + x1 + 0);
 
 1035      BYTE G = *(src + x1 + 1);
 
 1036      BYTE B = *(src + x1 + 2);
 
 1041      ydst[y1] = RGB2Y(R, G, B);
 
 1046        R = *(src + x2 + 0);
 
 1047        G = *(src + x2 + 1);
 
 1048        B = *(src + x2 + 2);
 
 1052        ydst[y2] = RGB2Y(R, G, B);
 
 1056      R = *(src + x3 + 0);
 
 1057      G = *(src + x3 + 1);
 
 1058      B = *(src + x3 + 2);
 
 1063      ydst[y3] = RGB2Y(R, G, B);
 
 1068        R = *(src + x4 + 0);
 
 1069        G = *(src + x4 + 1);
 
 1070        B = *(src + x4 + 2);
 
 1075        ydst[y4] = RGB2Y(R, G, B);
 
 1081      *udst++ = RGB2U(Ra, Ga, Ba);
 
 1082      *vdst++ = RGB2V(Ra, Ga, Ba);
 
 1088  for (; y < roi->height; y++)
 
 1090    const BYTE* src = pSrc + y * srcStep;
 
 1091    BYTE* ydst = pDst[0] + y * dstStep[0];
 
 1092    BYTE* udst = pDst[1] + (y / 2) * dstStep[1];
 
 1093    BYTE* vdst = pDst[2] + (y / 2) * dstStep[2];
 
 1095    for (UINT32 x = 0; x < roi->width; x += 2)
 
 1097      BYTE R = *(src + x1 + 0);
 
 1098      BYTE G = *(src + x1 + 1);
 
 1099      BYTE B = *(src + x1 + 2);
 
 1104      ydst[y1] = RGB2Y(R, G, B);
 
 1109        R = *(src + x2 + 0);
 
 1110        G = *(src + x2 + 1);
 
 1111        B = *(src + x2 + 2);
 
 1115        ydst[y2] = RGB2Y(R, G, B);
 
 1121      *udst++ = RGB2U(Ra, Ga, Ba);
 
 1122      *vdst++ = RGB2V(Ra, Ga, Ba);
 
 1128  return PRIMITIVES_SUCCESS;
 
 1131static inline pstatus_t general_RGBToYUV420_ANY(
const BYTE* WINPR_RESTRICT pSrc, UINT32 srcFormat,
 
 1132                                                UINT32 srcStep, BYTE* WINPR_RESTRICT pDst[3],
 
 1133                                                const UINT32 dstStep[3],
 
 1136  const UINT32 bpp = FreeRDPGetBytesPerPixel(srcFormat);
 
 1139  size_t x3 = srcStep;
 
 1140  size_t x4 = srcStep + bpp;
 
 1143  size_t y3 = dstStep[0];
 
 1144  size_t y4 = dstStep[0] + 1;
 
 1145  UINT32 max_x = roi->width - 1;
 
 1148  for (
size_t i = 0; y < roi->height - roi->height % 2; y += 2, i++)
 
 1150    const BYTE* src = pSrc + y * srcStep;
 
 1151    BYTE* ydst = pDst[0] + y * dstStep[0];
 
 1152    BYTE* udst = pDst[1] + i * dstStep[1];
 
 1153    BYTE* vdst = pDst[2] + i * dstStep[2];
 
 1155    for (
size_t x = 0; x < roi->width; x += 2)
 
 1165      color = FreeRDPReadColor(src + x1, srcFormat);
 
 1166      FreeRDPSplitColor(color, srcFormat, &R, &G, &B, NULL, NULL);
 
 1170      ydst[y1] = RGB2Y(R, G, B);
 
 1175        color = FreeRDPReadColor(src + x2, srcFormat);
 
 1176        FreeRDPSplitColor(color, srcFormat, &R, &G, &B, NULL, NULL);
 
 1180        ydst[y2] = RGB2Y(R, G, B);
 
 1184      color = FreeRDPReadColor(src + x3, srcFormat);
 
 1185      FreeRDPSplitColor(color, srcFormat, &R, &G, &B, NULL, NULL);
 
 1189      ydst[y3] = RGB2Y(R, G, B);
 
 1194        color = FreeRDPReadColor(src + x4, srcFormat);
 
 1195        FreeRDPSplitColor(color, srcFormat, &R, &G, &B, NULL, NULL);
 
 1199        ydst[y4] = RGB2Y(R, G, B);
 
 1205      *udst++ = RGB2U(Ra, Ga, Ba);
 
 1206      *vdst++ = RGB2V(Ra, Ga, Ba);
 
 1212  for (; y < roi->height; y++)
 
 1214    const BYTE* src = pSrc + y * srcStep;
 
 1215    BYTE* ydst = pDst[0] + y * dstStep[0];
 
 1216    BYTE* udst = pDst[1] + (y / 2) * dstStep[1];
 
 1217    BYTE* vdst = pDst[2] + (y / 2) * dstStep[2];
 
 1219    for (
size_t x = 0; x < roi->width; x += 2)
 
 1225      UINT32 color = FreeRDPReadColor(src + x1, srcFormat);
 
 1226      FreeRDPSplitColor(color, srcFormat, &R, &G, &B, NULL, NULL);
 
 1230      ydst[y1] = RGB2Y(R, G, B);
 
 1235        color = FreeRDPReadColor(src + x2, srcFormat);
 
 1236        FreeRDPSplitColor(color, srcFormat, &R, &G, &B, NULL, NULL);
 
 1240        ydst[y2] = RGB2Y(R, G, B);
 
 1246      *udst++ = RGB2U(Ra, Ga, Ba);
 
 1247      *vdst++ = RGB2V(Ra, Ga, Ba);
 
 1253  return PRIMITIVES_SUCCESS;
 
 1256static pstatus_t general_RGBToYUV420_8u_P3AC4R(
const BYTE* WINPR_RESTRICT pSrc, UINT32 srcFormat,
 
 1257                                               UINT32 srcStep, BYTE* WINPR_RESTRICT pDst[3],
 
 1258                                               const UINT32 dstStep[3],
 
 1263    case PIXEL_FORMAT_BGRA32:
 
 1264    case PIXEL_FORMAT_BGRX32:
 
 1265      return general_RGBToYUV420_BGRX(pSrc, srcStep, pDst, dstStep, roi);
 
 1267    case PIXEL_FORMAT_RGBA32:
 
 1268    case PIXEL_FORMAT_RGBX32:
 
 1269      return general_RGBToYUV420_RGBX(pSrc, srcStep, pDst, dstStep, roi);
 
 1272      return general_RGBToYUV420_ANY(pSrc, srcFormat, srcStep, pDst, dstStep, roi);
 
 1276static inline void int_general_RGBToAVC444YUV_BGRX_DOUBLE_ROW(
 
 1277    size_t offset, 
const BYTE* WINPR_RESTRICT pSrcEven, 
const BYTE* WINPR_RESTRICT pSrcOdd,
 
 1278    BYTE* WINPR_RESTRICT b1Even, BYTE* WINPR_RESTRICT b1Odd, BYTE* WINPR_RESTRICT b2,
 
 1279    BYTE* WINPR_RESTRICT b3, BYTE* WINPR_RESTRICT b4, BYTE* WINPR_RESTRICT b5,
 
 1280    BYTE* WINPR_RESTRICT b6, BYTE* WINPR_RESTRICT b7, UINT32 width)
 
 1282  WINPR_ASSERT((width % 2) == 0);
 
 1283  for (
size_t x = offset; x < width; x += 2)
 
 1285    const BYTE* srcEven = &pSrcEven[4ULL * x];
 
 1286    const BYTE* srcOdd = &pSrcOdd[4ULL * x];
 
 1287    const BOOL lastX = (x + 1) >= width;
 
 1302      const BYTE b = *srcEven++;
 
 1303      const BYTE g = *srcEven++;
 
 1304      const BYTE r = *srcEven++;
 
 1306      Y1e = Y2e = Y1o = Y2o = RGB2Y(r, g, b);
 
 1307      U1e = U2e = U1o = U2o = RGB2U(r, g, b);
 
 1308      V1e = V2e = V1o = V2o = RGB2V(r, g, b);
 
 1313      const BYTE b = *srcEven++;
 
 1314      const BYTE g = *srcEven++;
 
 1315      const BYTE r = *srcEven++;
 
 1317      Y2e = RGB2Y(r, g, b);
 
 1318      U2e = RGB2U(r, g, b);
 
 1319      V2e = RGB2V(r, g, b);
 
 1324      const BYTE b = *srcOdd++;
 
 1325      const BYTE g = *srcOdd++;
 
 1326      const BYTE r = *srcOdd++;
 
 1328      Y1o = Y2o = RGB2Y(r, g, b);
 
 1329      U1o = U2o = RGB2U(r, g, b);
 
 1330      V1o = V2o = RGB2V(r, g, b);
 
 1333    if (b1Odd && !lastX)
 
 1335      const BYTE b = *srcOdd++;
 
 1336      const BYTE g = *srcOdd++;
 
 1337      const BYTE r = *srcOdd++;
 
 1339      Y2o = RGB2Y(r, g, b);
 
 1340      U2o = RGB2U(r, g, b);
 
 1341      V2o = RGB2V(r, g, b);
 
 1357      const BYTE Uavg = WINPR_ASSERTING_INT_CAST(BYTE, ((UINT16)U1e + U2e + U1o + U2o) / 4);
 
 1358      const BYTE Vavg = WINPR_ASSERTING_INT_CAST(BYTE, ((UINT16)V1e + V2e + V1o + V2o) / 4);
 
 1385void general_RGBToAVC444YUV_BGRX_DOUBLE_ROW(
size_t offset, 
const BYTE* WINPR_RESTRICT pSrcEven,
 
 1386                                            const BYTE* WINPR_RESTRICT pSrcOdd,
 
 1387                                            BYTE* WINPR_RESTRICT b1Even, BYTE* WINPR_RESTRICT b1Odd,
 
 1388                                            BYTE* WINPR_RESTRICT b2, BYTE* WINPR_RESTRICT b3,
 
 1389                                            BYTE* WINPR_RESTRICT b4, BYTE* WINPR_RESTRICT b5,
 
 1390                                            BYTE* WINPR_RESTRICT b6, BYTE* WINPR_RESTRICT b7,
 
 1393  int_general_RGBToAVC444YUV_BGRX_DOUBLE_ROW(offset, pSrcEven, pSrcOdd, b1Even, b1Odd, b2, b3, b4,
 
 1397static inline pstatus_t general_RGBToAVC444YUV_BGRX(
const BYTE* WINPR_RESTRICT pSrc, UINT32 srcStep,
 
 1398                                                    BYTE* WINPR_RESTRICT pDst1[3],
 
 1399                                                    const UINT32 dst1Step[3],
 
 1400                                                    BYTE* WINPR_RESTRICT pDst2[3],
 
 1401                                                    const UINT32 dst2Step[3],
 
 1409  for (; y < roi->height - roi->height % 2; y += 2)
 
 1411    const BYTE* srcEven = pSrc + 1ULL * y * srcStep;
 
 1412    const BYTE* srcOdd = pSrc + 1ULL * (y + 1) * srcStep;
 
 1413    const size_t i = y >> 1;
 
 1414    const size_t n = (i & (uint32_t)~7) + i;
 
 1415    BYTE* b1Even = pDst1[0] + 1ULL * y * dst1Step[0];
 
 1416    BYTE* b1Odd = (b1Even + dst1Step[0]);
 
 1417    BYTE* b2 = pDst1[1] + 1ULL * (y / 2) * dst1Step[1];
 
 1418    BYTE* b3 = pDst1[2] + 1ULL * (y / 2) * dst1Step[2];
 
 1419    BYTE* b4 = pDst2[0] + 1ULL * dst2Step[0] * n;
 
 1420    BYTE* b5 = b4 + 8ULL * dst2Step[0];
 
 1421    BYTE* b6 = pDst2[1] + 1ULL * (y / 2) * dst2Step[1];
 
 1422    BYTE* b7 = pDst2[2] + 1ULL * (y / 2) * dst2Step[2];
 
 1423    int_general_RGBToAVC444YUV_BGRX_DOUBLE_ROW(0, srcEven, srcOdd, b1Even, b1Odd, b2, b3, b4,
 
 1424                                               b5, b6, b7, roi->width);
 
 1426  for (; y < roi->height; y++)
 
 1428    const BYTE* srcEven = pSrc + 1ULL * y * srcStep;
 
 1429    BYTE* b1Even = pDst1[0] + 1ULL * y * dst1Step[0];
 
 1430    BYTE* b2 = pDst1[1] + 1ULL * (y / 2) * dst1Step[1];
 
 1431    BYTE* b3 = pDst1[2] + 1ULL * (y / 2) * dst1Step[2];
 
 1432    BYTE* b6 = pDst2[1] + 1ULL * (y / 2) * dst2Step[1];
 
 1433    BYTE* b7 = pDst2[2] + 1ULL * (y / 2) * dst2Step[2];
 
 1434    int_general_RGBToAVC444YUV_BGRX_DOUBLE_ROW(0, srcEven, NULL, b1Even, NULL, b2, b3, NULL,
 
 1435                                               NULL, b6, b7, roi->width);
 
 1438  return PRIMITIVES_SUCCESS;
 
 1441static inline void general_RGBToAVC444YUV_RGBX_DOUBLE_ROW(
 
 1442    const BYTE* WINPR_RESTRICT srcEven, 
const BYTE* WINPR_RESTRICT srcOdd,
 
 1443    BYTE* WINPR_RESTRICT b1Even, BYTE* WINPR_RESTRICT b1Odd, BYTE* WINPR_RESTRICT b2,
 
 1444    BYTE* WINPR_RESTRICT b3, BYTE* WINPR_RESTRICT b4, BYTE* WINPR_RESTRICT b5,
 
 1445    BYTE* WINPR_RESTRICT b6, BYTE* WINPR_RESTRICT b7, UINT32 width)
 
 1447  WINPR_ASSERT((width % 2) == 0);
 
 1448  for (UINT32 x = 0; x < width; x += 2)
 
 1450    const BOOL lastX = (x + 1) >= width;
 
 1465      const BYTE r = *srcEven++;
 
 1466      const BYTE g = *srcEven++;
 
 1467      const BYTE b = *srcEven++;
 
 1469      Y1e = Y2e = Y1o = Y2o = RGB2Y(r, g, b);
 
 1470      U1e = U2e = U1o = U2o = RGB2U(r, g, b);
 
 1471      V1e = V2e = V1o = V2o = RGB2V(r, g, b);
 
 1476      const BYTE r = *srcEven++;
 
 1477      const BYTE g = *srcEven++;
 
 1478      const BYTE b = *srcEven++;
 
 1480      Y2e = RGB2Y(r, g, b);
 
 1481      U2e = RGB2U(r, g, b);
 
 1482      V2e = RGB2V(r, g, b);
 
 1487      const BYTE r = *srcOdd++;
 
 1488      const BYTE g = *srcOdd++;
 
 1489      const BYTE b = *srcOdd++;
 
 1491      Y1o = Y2o = RGB2Y(r, g, b);
 
 1492      U1o = U2o = RGB2U(r, g, b);
 
 1493      V1o = V2o = RGB2V(r, g, b);
 
 1496    if (b1Odd && !lastX)
 
 1498      const BYTE r = *srcOdd++;
 
 1499      const BYTE g = *srcOdd++;
 
 1500      const BYTE b = *srcOdd++;
 
 1502      Y2o = RGB2Y(r, g, b);
 
 1503      U2o = RGB2U(r, g, b);
 
 1504      V2o = RGB2V(r, g, b);
 
 1520      const BYTE Uavg = WINPR_ASSERTING_INT_CAST(BYTE, ((UINT16)U1e + U2e + U1o + U2o) / 4);
 
 1521      const BYTE Vavg = WINPR_ASSERTING_INT_CAST(BYTE, ((UINT16)V1e + V2e + V1o + V2o) / 4);
 
 1548static inline pstatus_t general_RGBToAVC444YUV_RGBX(
const BYTE* WINPR_RESTRICT pSrc, UINT32 srcStep,
 
 1549                                                    BYTE* WINPR_RESTRICT pDst1[3],
 
 1550                                                    const UINT32 dst1Step[3],
 
 1551                                                    BYTE* WINPR_RESTRICT pDst2[3],
 
 1552                                                    const UINT32 dst2Step[3],
 
 1561  for (; y < roi->height - roi->height % 2; y += 2)
 
 1563    const BOOL last = (y >= (roi->height - 1));
 
 1564    const BYTE* srcEven = pSrc + 1ULL * y * srcStep;
 
 1565    const BYTE* srcOdd = pSrc + 1ULL * (y + 1) * srcStep;
 
 1566    const size_t i = y >> 1;
 
 1567    const size_t n = (i & (size_t)~7) + i;
 
 1568    BYTE* b1Even = pDst1[0] + 1ULL * y * dst1Step[0];
 
 1569    BYTE* b1Odd = !last ? (b1Even + dst1Step[0]) : NULL;
 
 1570    BYTE* b2 = pDst1[1] + 1ULL * (y / 2) * dst1Step[1];
 
 1571    BYTE* b3 = pDst1[2] + 1ULL * (y / 2) * dst1Step[2];
 
 1572    BYTE* b4 = pDst2[0] + 1ULL * dst2Step[0] * n;
 
 1573    BYTE* b5 = b4 + 8ULL * dst2Step[0];
 
 1574    BYTE* b6 = pDst2[1] + 1ULL * (y / 2) * dst2Step[1];
 
 1575    BYTE* b7 = pDst2[2] + 1ULL * (y / 2) * dst2Step[2];
 
 1576    general_RGBToAVC444YUV_RGBX_DOUBLE_ROW(srcEven, srcOdd, b1Even, b1Odd, b2, b3, b4, b5, b6,
 
 1579  for (; y < roi->height; y++)
 
 1581    const BYTE* srcEven = pSrc + 1ULL * y * srcStep;
 
 1582    BYTE* b1Even = pDst1[0] + 1ULL * y * dst1Step[0];
 
 1583    BYTE* b2 = pDst1[1] + 1ULL * (y / 2) * dst1Step[1];
 
 1584    BYTE* b3 = pDst1[2] + 1ULL * (y / 2) * dst1Step[2];
 
 1585    BYTE* b6 = pDst2[1] + 1ULL * (y / 2) * dst2Step[1];
 
 1586    BYTE* b7 = pDst2[2] + 1ULL * (y / 2) * dst2Step[2];
 
 1587    general_RGBToAVC444YUV_RGBX_DOUBLE_ROW(srcEven, NULL, b1Even, NULL, b2, b3, NULL, NULL, b6,
 
 1590  return PRIMITIVES_SUCCESS;
 
 1593static inline void general_RGBToAVC444YUV_ANY_DOUBLE_ROW(
 
 1594    const BYTE* WINPR_RESTRICT srcEven, 
const BYTE* WINPR_RESTRICT srcOdd, UINT32 srcFormat,
 
 1595    BYTE* WINPR_RESTRICT b1Even, BYTE* WINPR_RESTRICT b1Odd, BYTE* WINPR_RESTRICT b2,
 
 1596    BYTE* WINPR_RESTRICT b3, BYTE* WINPR_RESTRICT b4, BYTE* WINPR_RESTRICT b5,
 
 1597    BYTE* WINPR_RESTRICT b6, BYTE* WINPR_RESTRICT b7, UINT32 width)
 
 1599  const UINT32 bpp = FreeRDPGetBytesPerPixel(srcFormat);
 
 1600  for (UINT32 x = 0; x < width; x += 2)
 
 1602    const BOOL lastX = (x + 1) >= width;
 
 1620      const UINT32 color = FreeRDPReadColor(srcEven, srcFormat);
 
 1622      FreeRDPSplitColor(color, srcFormat, &r, &g, &b, NULL, NULL);
 
 1623      Y1e = Y2e = Y1o = Y2o = RGB2Y(r, g, b);
 
 1624      U1e = U2e = U1o = U2o = RGB2U(r, g, b);
 
 1625      V1e = V2e = V1o = V2o = RGB2V(r, g, b);
 
 1633      const UINT32 color = FreeRDPReadColor(srcEven, srcFormat);
 
 1635      FreeRDPSplitColor(color, srcFormat, &r, &g, &b, NULL, NULL);
 
 1636      Y2e = RGB2Y(r, g, b);
 
 1637      U2e = RGB2U(r, g, b);
 
 1638      V2e = RGB2V(r, g, b);
 
 1646      const UINT32 color = FreeRDPReadColor(srcOdd, srcFormat);
 
 1648      FreeRDPSplitColor(color, srcFormat, &r, &g, &b, NULL, NULL);
 
 1649      Y1o = Y2o = RGB2Y(r, g, b);
 
 1650      U1o = U2o = RGB2U(r, g, b);
 
 1651      V1o = V2o = RGB2V(r, g, b);
 
 1654    if (b1Odd && !lastX)
 
 1659      const UINT32 color = FreeRDPReadColor(srcOdd, srcFormat);
 
 1661      FreeRDPSplitColor(color, srcFormat, &r, &g, &b, NULL, NULL);
 
 1662      Y2o = RGB2Y(r, g, b);
 
 1663      U2o = RGB2U(r, g, b);
 
 1664      V2o = RGB2V(r, g, b);
 
 1680      const BYTE Uavg = WINPR_ASSERTING_INT_CAST(
 
 1681          BYTE, ((UINT16)U1e + (UINT16)U2e + (UINT16)U1o + (UINT16)U2o) / 4);
 
 1682      const BYTE Vavg = WINPR_ASSERTING_INT_CAST(
 
 1683          BYTE, ((UINT16)V1e + (UINT16)V2e + (UINT16)V1o + (UINT16)V2o) / 4);
 
 1710static inline pstatus_t
 
 1711general_RGBToAVC444YUV_ANY(
const BYTE* WINPR_RESTRICT pSrc, UINT32 srcFormat, UINT32 srcStep,
 
 1712                           BYTE* WINPR_RESTRICT pDst1[3], 
const UINT32 dst1Step[3],
 
 1713                           BYTE* WINPR_RESTRICT pDst2[3], 
const UINT32 dst2Step[3],
 
 1773  const BYTE* pMaxSrc = pSrc + 1ULL * (roi->height - 1) * srcStep;
 
 1775  for (
size_t y = 0; y < roi->height; y += 2)
 
 1777    WINPR_ASSERT(y < UINT32_MAX);
 
 1779    const BOOL last = (y >= (roi->height - 1));
 
 1780    const BYTE* srcEven = y < roi->height ? pSrc + y * srcStep : pMaxSrc;
 
 1781    const BYTE* srcOdd = !last ? pSrc + (y + 1) * srcStep : pMaxSrc;
 
 1782    const UINT32 i = (UINT32)y >> 1;
 
 1783    const UINT32 n = (i & (uint32_t)~7) + i;
 
 1784    BYTE* b1Even = pDst1[0] + y * dst1Step[0];
 
 1785    BYTE* b1Odd = !last ? (b1Even + dst1Step[0]) : NULL;
 
 1786    BYTE* b2 = pDst1[1] + (y / 2) * dst1Step[1];
 
 1787    BYTE* b3 = pDst1[2] + (y / 2) * dst1Step[2];
 
 1788    BYTE* b4 = pDst2[0] + 1ULL * dst2Step[0] * n;
 
 1789    BYTE* b5 = b4 + 8ULL * dst2Step[0];
 
 1790    BYTE* b6 = pDst2[1] + (y / 2) * dst2Step[1];
 
 1791    BYTE* b7 = pDst2[2] + (y / 2) * dst2Step[2];
 
 1792    general_RGBToAVC444YUV_ANY_DOUBLE_ROW(srcEven, srcOdd, srcFormat, b1Even, b1Odd, b2, b3, b4,
 
 1793                                          b5, b6, b7, roi->width);
 
 1796  return PRIMITIVES_SUCCESS;
 
 1799static inline pstatus_t general_RGBToAVC444YUV(
const BYTE* WINPR_RESTRICT pSrc, UINT32 srcFormat,
 
 1800                                               UINT32 srcStep, BYTE* WINPR_RESTRICT pDst1[3],
 
 1801                                               const UINT32 dst1Step[3],
 
 1802                                               BYTE* WINPR_RESTRICT pDst2[3],
 
 1803                                               const UINT32 dst2Step[3],
 
 1806  if (!pSrc || !pDst1 || !dst1Step || !pDst2 || !dst2Step)
 
 1809  if (!pDst1[0] || !pDst1[1] || !pDst1[2])
 
 1812  if (!dst1Step[0] || !dst1Step[1] || !dst1Step[2])
 
 1815  if (!pDst2[0] || !pDst2[1] || !pDst2[2])
 
 1818  if (!dst2Step[0] || !dst2Step[1] || !dst2Step[2])
 
 1824    case PIXEL_FORMAT_BGRA32:
 
 1825    case PIXEL_FORMAT_BGRX32:
 
 1826      return general_RGBToAVC444YUV_BGRX(pSrc, srcStep, pDst1, dst1Step, pDst2, dst2Step,
 
 1829    case PIXEL_FORMAT_RGBA32:
 
 1830    case PIXEL_FORMAT_RGBX32:
 
 1831      return general_RGBToAVC444YUV_RGBX(pSrc, srcStep, pDst1, dst1Step, pDst2, dst2Step,
 
 1835      return general_RGBToAVC444YUV_ANY(pSrc, srcFormat, srcStep, pDst1, dst1Step, pDst2,
 
 1839  return !PRIMITIVES_SUCCESS;
 
 1842static inline void general_RGBToAVC444YUVv2_ANY_DOUBLE_ROW(
 
 1843    const BYTE* WINPR_RESTRICT srcEven, 
const BYTE* WINPR_RESTRICT srcOdd, UINT32 srcFormat,
 
 1844    BYTE* WINPR_RESTRICT yLumaDstEven, BYTE* WINPR_RESTRICT yLumaDstOdd,
 
 1845    BYTE* WINPR_RESTRICT uLumaDst, BYTE* WINPR_RESTRICT vLumaDst,
 
 1846    BYTE* WINPR_RESTRICT yEvenChromaDst1, BYTE* WINPR_RESTRICT yEvenChromaDst2,
 
 1847    BYTE* WINPR_RESTRICT yOddChromaDst1, BYTE* WINPR_RESTRICT yOddChromaDst2,
 
 1848    BYTE* WINPR_RESTRICT uChromaDst1, BYTE* WINPR_RESTRICT uChromaDst2,
 
 1849    BYTE* WINPR_RESTRICT vChromaDst1, BYTE* WINPR_RESTRICT vChromaDst2, UINT32 width)
 
 1851  const UINT32 bpp = FreeRDPGetBytesPerPixel(srcFormat);
 
 1853  WINPR_ASSERT((width % 2) == 0);
 
 1854  for (UINT32 x = 0; x < width; x += 2)
 
 1872      const UINT32 color = FreeRDPReadColor(srcEven, srcFormat);
 
 1874      FreeRDPSplitColor(color, srcFormat, &r, &g, &b, NULL, NULL);
 
 1875      Ya = RGB2Y(r, g, b);
 
 1876      Ua = RGB2U(r, g, b);
 
 1877      Va = RGB2V(r, g, b);
 
 1885      const UINT32 color = FreeRDPReadColor(srcEven, srcFormat);
 
 1887      FreeRDPSplitColor(color, srcFormat, &r, &g, &b, NULL, NULL);
 
 1888      Yb = RGB2Y(r, g, b);
 
 1889      Ub = RGB2U(r, g, b);
 
 1890      Vb = RGB2V(r, g, b);
 
 1904      const UINT32 color = FreeRDPReadColor(srcOdd, srcFormat);
 
 1906      FreeRDPSplitColor(color, srcFormat, &r, &g, &b, NULL, NULL);
 
 1907      Yc = RGB2Y(r, g, b);
 
 1908      Uc = RGB2U(r, g, b);
 
 1909      Vc = RGB2V(r, g, b);
 
 1918    if (srcOdd && (x < width - 1))
 
 1923      const UINT32 color = FreeRDPReadColor(srcOdd, srcFormat);
 
 1925      FreeRDPSplitColor(color, srcFormat, &r, &g, &b, NULL, NULL);
 
 1926      Yd = RGB2Y(r, g, b);
 
 1927      Ud = RGB2U(r, g, b);
 
 1928      Vd = RGB2V(r, g, b);
 
 1938    *yLumaDstEven++ = Ya;
 
 1941      *yLumaDstEven++ = Yb;
 
 1944      *yLumaDstOdd++ = Yc;
 
 1946    if (srcOdd && (x < width - 1))
 
 1947      *yLumaDstOdd++ = Yd;
 
 1950    *uLumaDst++ = (Ua + Ub + Uc + Ud) / 4;
 
 1951    *vLumaDst++ = (Va + Vb + Vc + Vd) / 4;
 
 1956      *yEvenChromaDst1++ = Ub;
 
 1957      *yEvenChromaDst2++ = Vb;
 
 1965        *yOddChromaDst1++ = Ud;
 
 1966        *yOddChromaDst2++ = Vd;
 
 1972        *uChromaDst1++ = Uc;
 
 1973        *uChromaDst2++ = Vc;
 
 1978        *vChromaDst1++ = Uc;
 
 1979        *vChromaDst2++ = Vc;
 
 1985static inline pstatus_t
 
 1986general_RGBToAVC444YUVv2_ANY(
const BYTE* WINPR_RESTRICT pSrc, UINT32 srcFormat, UINT32 srcStep,
 
 1987                             BYTE* WINPR_RESTRICT pDst1[3], 
const UINT32 dst1Step[3],
 
 1988                             BYTE* WINPR_RESTRICT pDst2[3], 
const UINT32 dst2Step[3],
 
 2041  if (roi->height < 1 || roi->width < 1)
 
 2042    return !PRIMITIVES_SUCCESS;
 
 2045  for (; y < roi->height - roi->height % 2; y += 2)
 
 2047    const BYTE* srcEven = (pSrc + y * srcStep);
 
 2048    const BYTE* srcOdd = (y < roi->height - 1) ? (srcEven + srcStep) : NULL;
 
 2049    BYTE* dstLumaYEven = (pDst1[0] + y * dst1Step[0]);
 
 2050    BYTE* dstLumaYOdd = (dstLumaYEven + dst1Step[0]);
 
 2051    BYTE* dstLumaU = (pDst1[1] + (y / 2) * dst1Step[1]);
 
 2052    BYTE* dstLumaV = (pDst1[2] + (y / 2) * dst1Step[2]);
 
 2053    BYTE* dstEvenChromaY1 = (pDst2[0] + y * dst2Step[0]);
 
 2054    BYTE* dstEvenChromaY2 = dstEvenChromaY1 + roi->width / 2;
 
 2055    BYTE* dstOddChromaY1 = dstEvenChromaY1 + dst2Step[0];
 
 2056    BYTE* dstOddChromaY2 = dstEvenChromaY2 + dst2Step[0];
 
 2057    BYTE* dstChromaU1 = (pDst2[1] + (y / 2) * dst2Step[1]);
 
 2058    BYTE* dstChromaV1 = (pDst2[2] + (y / 2) * dst2Step[2]);
 
 2059    BYTE* dstChromaU2 = dstChromaU1 + roi->width / 4;
 
 2060    BYTE* dstChromaV2 = dstChromaV1 + roi->width / 4;
 
 2061    general_RGBToAVC444YUVv2_ANY_DOUBLE_ROW(
 
 2062        srcEven, srcOdd, srcFormat, dstLumaYEven, dstLumaYOdd, dstLumaU, dstLumaV,
 
 2063        dstEvenChromaY1, dstEvenChromaY2, dstOddChromaY1, dstOddChromaY2, dstChromaU1,
 
 2064        dstChromaU2, dstChromaV1, dstChromaV2, roi->width);
 
 2066  for (; y < roi->height; y++)
 
 2068    const BYTE* srcEven = (pSrc + y * srcStep);
 
 2069    BYTE* dstLumaYEven = (pDst1[0] + y * dst1Step[0]);
 
 2070    BYTE* dstLumaU = (pDst1[1] + (y / 2) * dst1Step[1]);
 
 2071    BYTE* dstLumaV = (pDst1[2] + (y / 2) * dst1Step[2]);
 
 2072    BYTE* dstEvenChromaY1 = (pDst2[0] + y * dst2Step[0]);
 
 2073    BYTE* dstEvenChromaY2 = dstEvenChromaY1 + roi->width / 2;
 
 2074    general_RGBToAVC444YUVv2_ANY_DOUBLE_ROW(
 
 2075        srcEven, NULL, srcFormat, dstLumaYEven, NULL, dstLumaU, dstLumaV, dstEvenChromaY1,
 
 2076        dstEvenChromaY2, NULL, NULL, NULL, NULL, NULL, NULL, roi->width);
 
 2079  return PRIMITIVES_SUCCESS;
 
 2082static inline void int_general_RGBToAVC444YUVv2_BGRX_DOUBLE_ROW(
 
 2083    size_t offset, 
const BYTE* WINPR_RESTRICT pSrcEven, 
const BYTE* WINPR_RESTRICT pSrcOdd,
 
 2084    BYTE* WINPR_RESTRICT yLumaDstEven, BYTE* WINPR_RESTRICT yLumaDstOdd,
 
 2085    BYTE* WINPR_RESTRICT uLumaDst, BYTE* WINPR_RESTRICT vLumaDst,
 
 2086    BYTE* WINPR_RESTRICT yEvenChromaDst1, BYTE* WINPR_RESTRICT yEvenChromaDst2,
 
 2087    BYTE* WINPR_RESTRICT yOddChromaDst1, BYTE* WINPR_RESTRICT yOddChromaDst2,
 
 2088    BYTE* WINPR_RESTRICT uChromaDst1, BYTE* WINPR_RESTRICT uChromaDst2,
 
 2089    BYTE* WINPR_RESTRICT vChromaDst1, BYTE* WINPR_RESTRICT vChromaDst2, UINT32 width)
 
 2091  WINPR_ASSERT((width % 2) == 0);
 
 2092  WINPR_ASSERT(pSrcEven);
 
 2093  WINPR_ASSERT(yLumaDstEven);
 
 2094  WINPR_ASSERT(uLumaDst);
 
 2095  WINPR_ASSERT(vLumaDst);
 
 2097  for (
size_t x = offset; x < width; x += 2)
 
 2099    const BYTE* srcEven = &pSrcEven[4ULL * x];
 
 2100    const BYTE* srcOdd = pSrcOdd ? &pSrcOdd[4ULL * x] : NULL;
 
 2114      const BYTE b = *srcEven++;
 
 2115      const BYTE g = *srcEven++;
 
 2116      const BYTE r = *srcEven++;
 
 2118      Ya = RGB2Y(r, g, b);
 
 2119      Ua = RGB2U(r, g, b);
 
 2120      Va = RGB2V(r, g, b);
 
 2125      const BYTE b = *srcEven++;
 
 2126      const BYTE g = *srcEven++;
 
 2127      const BYTE r = *srcEven++;
 
 2129      Yb = RGB2Y(r, g, b);
 
 2130      Ub = RGB2U(r, g, b);
 
 2131      Vb = RGB2V(r, g, b);
 
 2142      const BYTE b = *srcOdd++;
 
 2143      const BYTE g = *srcOdd++;
 
 2144      const BYTE r = *srcOdd++;
 
 2146      Yc = RGB2Y(r, g, b);
 
 2147      Uc = RGB2U(r, g, b);
 
 2148      Vc = RGB2V(r, g, b);
 
 2157    if (srcOdd && (x < width - 1))
 
 2159      const BYTE b = *srcOdd++;
 
 2160      const BYTE g = *srcOdd++;
 
 2161      const BYTE r = *srcOdd++;
 
 2163      Yd = RGB2Y(r, g, b);
 
 2164      Ud = RGB2U(r, g, b);
 
 2165      Vd = RGB2V(r, g, b);
 
 2175    *yLumaDstEven++ = Ya;
 
 2178      *yLumaDstEven++ = Yb;
 
 2180    if (srcOdd && yLumaDstOdd)
 
 2181      *yLumaDstOdd++ = Yc;
 
 2183    if (srcOdd && (x < width - 1) && yLumaDstOdd)
 
 2184      *yLumaDstOdd++ = Yd;
 
 2187    *uLumaDst++ = (Ua + Ub + Uc + Ud) / 4;
 
 2188    *vLumaDst++ = (Va + Vb + Vc + Vd) / 4;
 
 2193      *yEvenChromaDst1++ = Ub;
 
 2194      *yEvenChromaDst2++ = Vb;
 
 2202        *yOddChromaDst1++ = Ud;
 
 2203        *yOddChromaDst2++ = Vd;
 
 2209        *uChromaDst1++ = Uc;
 
 2210        *uChromaDst2++ = Vc;
 
 2215        *vChromaDst1++ = Uc;
 
 2216        *vChromaDst2++ = Vc;
 
 2222void general_RGBToAVC444YUVv2_BGRX_DOUBLE_ROW(
 
 2223    size_t offset, 
const BYTE* WINPR_RESTRICT pSrcEven, 
const BYTE* WINPR_RESTRICT pSrcOdd,
 
 2224    BYTE* WINPR_RESTRICT yLumaDstEven, BYTE* WINPR_RESTRICT yLumaDstOdd,
 
 2225    BYTE* WINPR_RESTRICT uLumaDst, BYTE* WINPR_RESTRICT vLumaDst,
 
 2226    BYTE* WINPR_RESTRICT yEvenChromaDst1, BYTE* WINPR_RESTRICT yEvenChromaDst2,
 
 2227    BYTE* WINPR_RESTRICT yOddChromaDst1, BYTE* WINPR_RESTRICT yOddChromaDst2,
 
 2228    BYTE* WINPR_RESTRICT uChromaDst1, BYTE* WINPR_RESTRICT uChromaDst2,
 
 2229    BYTE* WINPR_RESTRICT vChromaDst1, BYTE* WINPR_RESTRICT vChromaDst2, UINT32 width)
 
 2231  int_general_RGBToAVC444YUVv2_BGRX_DOUBLE_ROW(
 
 2232      offset, pSrcEven, pSrcOdd, yLumaDstEven, yLumaDstOdd, uLumaDst, vLumaDst, yEvenChromaDst1,
 
 2233      yEvenChromaDst2, yOddChromaDst1, yOddChromaDst2, uChromaDst1, uChromaDst2, vChromaDst1,
 
 2234      vChromaDst2, width);
 
 2237static inline pstatus_t general_RGBToAVC444YUVv2_BGRX(
const BYTE* WINPR_RESTRICT pSrc,
 
 2238                                                      UINT32 srcStep, BYTE* WINPR_RESTRICT pDst1[3],
 
 2239                                                      const UINT32 dst1Step[3],
 
 2240                                                      BYTE* WINPR_RESTRICT pDst2[3],
 
 2241                                                      const UINT32 dst2Step[3],
 
 2244  if (roi->height < 1 || roi->width < 1)
 
 2245    return !PRIMITIVES_SUCCESS;
 
 2248  for (; y < roi->height - roi->height % 2; y += 2)
 
 2250    const BYTE* srcEven = (pSrc + y * srcStep);
 
 2251    const BYTE* srcOdd = (srcEven + srcStep);
 
 2252    BYTE* dstLumaYEven = (pDst1[0] + y * dst1Step[0]);
 
 2253    BYTE* dstLumaYOdd = (dstLumaYEven + dst1Step[0]);
 
 2254    BYTE* dstLumaU = (pDst1[1] + (y / 2) * dst1Step[1]);
 
 2255    BYTE* dstLumaV = (pDst1[2] + (y / 2) * dst1Step[2]);
 
 2256    BYTE* dstEvenChromaY1 = (pDst2[0] + y * dst2Step[0]);
 
 2257    BYTE* dstEvenChromaY2 = dstEvenChromaY1 + roi->width / 2;
 
 2258    BYTE* dstOddChromaY1 = dstEvenChromaY1 + dst2Step[0];
 
 2259    BYTE* dstOddChromaY2 = dstEvenChromaY2 + dst2Step[0];
 
 2260    BYTE* dstChromaU1 = (pDst2[1] + (y / 2) * dst2Step[1]);
 
 2261    BYTE* dstChromaV1 = (pDst2[2] + (y / 2) * dst2Step[2]);
 
 2262    BYTE* dstChromaU2 = dstChromaU1 + roi->width / 4;
 
 2263    BYTE* dstChromaV2 = dstChromaV1 + roi->width / 4;
 
 2264    int_general_RGBToAVC444YUVv2_BGRX_DOUBLE_ROW(
 
 2265        0, srcEven, srcOdd, dstLumaYEven, dstLumaYOdd, dstLumaU, dstLumaV, dstEvenChromaY1,
 
 2266        dstEvenChromaY2, dstOddChromaY1, dstOddChromaY2, dstChromaU1, dstChromaU2, dstChromaV1,
 
 2267        dstChromaV2, roi->width);
 
 2269  for (; y < roi->height; y++)
 
 2271    const BYTE* srcEven = (pSrc + y * srcStep);
 
 2272    BYTE* dstLumaYEven = (pDst1[0] + y * dst1Step[0]);
 
 2273    BYTE* dstLumaU = (pDst1[1] + (y / 2) * dst1Step[1]);
 
 2274    BYTE* dstLumaV = (pDst1[2] + (y / 2) * dst1Step[2]);
 
 2275    BYTE* dstEvenChromaY1 = (pDst2[0] + y * dst2Step[0]);
 
 2276    BYTE* dstEvenChromaY2 = dstEvenChromaY1 + roi->width / 2;
 
 2277    int_general_RGBToAVC444YUVv2_BGRX_DOUBLE_ROW(
 
 2278        0, srcEven, NULL, dstLumaYEven, NULL, dstLumaU, dstLumaV, dstEvenChromaY1,
 
 2279        dstEvenChromaY2, NULL, NULL, NULL, NULL, NULL, NULL, roi->width);
 
 2282  return PRIMITIVES_SUCCESS;
 
 2285static pstatus_t general_RGBToAVC444YUVv2(
const BYTE* WINPR_RESTRICT pSrc, UINT32 srcFormat,
 
 2286                                          UINT32 srcStep, BYTE* WINPR_RESTRICT pDst1[3],
 
 2287                                          const UINT32 dst1Step[3], BYTE* WINPR_RESTRICT pDst2[3],
 
 2288                                          const UINT32 dst2Step[3],
 
 2293    case PIXEL_FORMAT_BGRA32:
 
 2294    case PIXEL_FORMAT_BGRX32:
 
 2295      return general_RGBToAVC444YUVv2_BGRX(pSrc, srcStep, pDst1, dst1Step, pDst2, dst2Step,
 
 2299      return general_RGBToAVC444YUVv2_ANY(pSrc, srcFormat, srcStep, pDst1, dst1Step, pDst2,
 
 2303  return !PRIMITIVES_SUCCESS;
 
 2306void primitives_init_YUV(
primitives_t* WINPR_RESTRICT prims)
 
 2308  prims->YUV420ToRGB_8u_P3AC4R = general_YUV420ToRGB_8u_P3AC4R;
 
 2309  prims->YUV444ToRGB_8u_P3AC4R = general_YUV444ToRGB_8u_P3AC4R;
 
 2310  prims->RGBToYUV420_8u_P3AC4R = general_RGBToYUV420_8u_P3AC4R;
 
 2311  prims->RGBToYUV444_8u_P3AC4R = general_RGBToYUV444_8u_P3AC4R;
 
 2312  prims->YUV420CombineToYUV444 = general_YUV420CombineToYUV444;
 
 2313  prims->YUV444SplitToYUV420 = general_YUV444SplitToYUV420;
 
 2314  prims->RGBToAVC444YUV = general_RGBToAVC444YUV;
 
 2315  prims->RGBToAVC444YUVv2 = general_RGBToAVC444YUVv2;
 
 2318void primitives_init_YUV_opt(
primitives_t* WINPR_RESTRICT prims)
 
 2320  primitives_init_YUV(prims);
 
 2321  primitives_init_YUV_sse41(prims);
 
 2322  primitives_init_YUV_neon(prims);