23#include <freerdp/config.h> 
   25#include <freerdp/types.h> 
   26#include <freerdp/primitives.h> 
   27#include <winpr/sysinfo.h> 
   29#include "prim_alphaComp.h" 
   31#include "prim_internal.h" 
   32#include "prim_avxsse.h" 
   35#if defined(SSE_AVX_INTRINSICS_ENABLED) 
   41static pstatus_t sse2_alphaComp_argb(
const BYTE* WINPR_RESTRICT pSrc1, UINT32 src1Step,
 
   42                                     const BYTE* WINPR_RESTRICT pSrc2, UINT32 src2Step,
 
   43                                     BYTE* WINPR_RESTRICT pDst, UINT32 dstStep, UINT32 width,
 
   46  const UINT32* sptr1 = (
const UINT32*)pSrc1;
 
   47  const UINT32* sptr2 = (
const UINT32*)pSrc2;
 
   49  if ((width <= 0) || (height <= 0))
 
   50    return PRIMITIVES_SUCCESS;
 
   54    return generic->alphaComp_argb(pSrc1, src1Step, pSrc2, src2Step, pDst, dstStep, width,
 
   58  UINT32* dptr = (UINT32*)pDst;
 
   59  const size_t linebytes = width * 
sizeof(UINT32);
 
   60  const size_t src1Jump = (src1Step - linebytes) / 
sizeof(UINT32);
 
   61  const size_t src2Jump = (src2Step - linebytes) / 
sizeof(UINT32);
 
   62  const size_t dstJump = (dstStep - linebytes) / 
sizeof(UINT32);
 
   63  __m128i xmm0 = mm_set1_epu32(0);
 
   64  __m128i xmm1 = _mm_set1_epi16(1);
 
   66  for (UINT32 y = 0; y < height; ++y)
 
   68    uint32_t pixels = width;
 
   73    switch ((ULONG_PTR)dptr & 0x0f)
 
  101      pstatus_t status = 0;
 
  102      status = 
generic->alphaComp_argb((
const BYTE*)sptr1, src1Step, (
const BYTE*)sptr2,
 
  103                                       src2Step, (BYTE*)dptr, dstStep, leadIn, 1);
 
  104      if (status != PRIMITIVES_SUCCESS)
 
  115    pixels -= count << 2;
 
  126      xmm2 = LOAD_SI128(sptr1);
 
  129      xmm3 = LOAD_SI128(sptr2);
 
  132      xmm4 = _mm_unpackhi_epi8(xmm2, xmm0);
 
  134      xmm5 = _mm_unpackhi_epi8(xmm3, xmm0);
 
  136      xmm6 = _mm_subs_epi16(xmm4, xmm5);
 
  138      xmm4 = _mm_shufflelo_epi16(xmm4, 0xff);
 
  140      xmm4 = _mm_shufflehi_epi16(xmm4, 0xff);
 
  142      xmm4 = _mm_adds_epi16(xmm4, xmm1);
 
  144      xmm4 = _mm_mullo_epi16(xmm4, xmm6);
 
  146      xmm4 = _mm_srai_epi16(xmm4, 8);
 
  148      xmm4 = _mm_adds_epi16(xmm4, xmm5);
 
  151      xmm5 = _mm_unpacklo_epi8(xmm2, xmm0);
 
  153      xmm6 = _mm_unpacklo_epi8(xmm3, xmm0);
 
  155      xmm7 = _mm_subs_epi16(xmm5, xmm6);
 
  157      xmm5 = _mm_shufflelo_epi16(xmm5, 0xff);
 
  159      xmm5 = _mm_shufflehi_epi16(xmm5, 0xff);
 
  161      xmm5 = _mm_adds_epi16(xmm5, xmm1);
 
  163      xmm5 = _mm_mullo_epi16(xmm5, xmm7);
 
  165      xmm5 = _mm_srai_epi16(xmm5, 8);
 
  167      xmm5 = _mm_adds_epi16(xmm5, xmm6);
 
  170      xmm3 = _mm_set1_epi16(0x00ffU);
 
  171      xmm4 = _mm_and_si128(xmm4, xmm3);
 
  172      xmm5 = _mm_and_si128(xmm5, xmm3);
 
  174      xmm5 = _mm_packus_epi16(xmm5, xmm4);
 
  175      STORE_SI128(dptr, xmm5);
 
  182      pstatus_t status = 0;
 
  183      status = 
generic->alphaComp_argb((
const BYTE*)sptr1, src1Step, (
const BYTE*)sptr2,
 
  184                                       src2Step, (BYTE*)dptr, dstStep, pixels, 1);
 
  185      if (status != PRIMITIVES_SUCCESS)
 
  199  return PRIMITIVES_SUCCESS;
 
  204void primitives_init_alphaComp_sse3_int(
primitives_t* WINPR_RESTRICT prims)
 
  206#if defined(SSE_AVX_INTRINSICS_ENABLED) 
  207  generic = primitives_get_generic();
 
  208  WLog_VRB(PRIM_TAG, 
"SSE2/SSE3 optimizations");
 
  209  prims->alphaComp_argb = sse2_alphaComp_argb;
 
  212  WLog_VRB(PRIM_TAG, 
"undefined WITH_SIMD or SSE3 intrinsics not available");