  | 
  
    FreeRDP
    
   | 
 
 
 
 
Loading...
Searching...
No Matches
 
 
 
 
   18#include "prim_avxsse.h" 
   43#define SSE3_SCD_ROUTINE(_name_, _type_, _fallback_, _op_, _op_type_, _slowWay_) \ 
   44  static pstatus_t _name_(const _type_* WINPR_RESTRICT pSrc, UINT32 val,       \ 
   45                          _type_* WINPR_RESTRICT pDst, UINT32 ulen)            \ 
   49    const _type_* sptr = pSrc;                                               \ 
   50    _type_* dptr = pDst;                                                     \ 
   52      return PRIMITIVES_SUCCESS;                                           \ 
   55    if (sizeof(_type_) == 1)                                                 \ 
   57    else if (sizeof(_type_) == 2)                                            \ 
   59    else if (sizeof(_type_) == 4)                                            \ 
   61    else if (sizeof(_type_) == 8)                                            \ 
   64    size_t count = len >> (8 - shifts);                                      \ 
   65    len -= count << (8 - shifts);                                            \ 
   69      __m128i xmm0 = LOAD_SI128(sptr);                                     \ 
   70      sptr += (16 / sizeof(_type_));                                       \ 
   71      __m128i xmm1 = LOAD_SI128(sptr);                                     \ 
   72      sptr += (16 / sizeof(_type_));                                       \ 
   73      __m128i xmm2 = LOAD_SI128(sptr);                                     \ 
   74      sptr += (16 / sizeof(_type_));                                       \ 
   75      __m128i xmm3 = LOAD_SI128(sptr);                                     \ 
   76      sptr += (16 / sizeof(_type_));                                       \ 
   77      __m128i xmm4 = LOAD_SI128(sptr);                                     \ 
   78      sptr += (16 / sizeof(_type_));                                       \ 
   79      __m128i xmm5 = LOAD_SI128(sptr);                                     \ 
   80      sptr += (16 / sizeof(_type_));                                       \ 
   81      __m128i xmm6 = LOAD_SI128(sptr);                                     \ 
   82      sptr += (16 / sizeof(_type_));                                       \ 
   83      __m128i xmm7 = LOAD_SI128(sptr);                                     \ 
   84      sptr += (16 / sizeof(_type_));                                       \ 
   85      xmm0 = _op_(xmm0, (_op_type_)val);                                   \ 
   86      xmm1 = _op_(xmm1, (_op_type_)val);                                   \ 
   87      xmm2 = _op_(xmm2, (_op_type_)val);                                   \ 
   88      xmm3 = _op_(xmm3, (_op_type_)val);                                   \ 
   89      xmm4 = _op_(xmm4, (_op_type_)val);                                   \ 
   90      xmm5 = _op_(xmm5, (_op_type_)val);                                   \ 
   91      xmm6 = _op_(xmm6, (_op_type_)val);                                   \ 
   92      xmm7 = _op_(xmm7, (_op_type_)val);                                   \ 
   93      STORE_SI128(dptr, xmm0);                                             \ 
   94      dptr += (16 / sizeof(_type_));                                       \ 
   95      STORE_SI128(dptr, xmm1);                                             \ 
   96      dptr += (16 / sizeof(_type_));                                       \ 
   97      STORE_SI128(dptr, xmm2);                                             \ 
   98      dptr += (16 / sizeof(_type_));                                       \ 
   99      STORE_SI128(dptr, xmm3);                                             \ 
  100      dptr += (16 / sizeof(_type_));                                       \ 
  101      STORE_SI128(dptr, xmm4);                                             \ 
  102      dptr += (16 / sizeof(_type_));                                       \ 
  103      STORE_SI128(dptr, xmm5);                                             \ 
  104      dptr += (16 / sizeof(_type_));                                       \ 
  105      STORE_SI128(dptr, xmm6);                                             \ 
  106      dptr += (16 / sizeof(_type_));                                       \ 
  107      STORE_SI128(dptr, xmm7);                                             \ 
  108      dptr += (16 / sizeof(_type_));                                       \ 
  112    count = len >> (5 - shifts);                                             \ 
  113    len -= count << (5 - shifts);                                            \ 
  116      __m128i xmm0 = LOAD_SI128(sptr);                                     \ 
  117      sptr += (16 / sizeof(_type_));                                       \ 
  118      xmm0 = _op_(xmm0, (_op_type_)val);                                   \ 
  119      STORE_SI128(dptr, xmm0);                                             \ 
  120      dptr += (16 / sizeof(_type_));                                       \ 
  127    return PRIMITIVES_SUCCESS;                                               \ 
  134#define SSE3_SCD_PRE_ROUTINE(_name_, _type_, _fallback_, _op_, _slowWay_)  \ 
  135  static pstatus_t _name_(const _type_* WINPR_RESTRICT pSrc, _type_ val, \ 
  136                          _type_* WINPR_RESTRICT pDst, INT32 ilen)       \ 
  138    size_t len = WINPR_ASSERTING_INT_CAST(size_t, ilen);               \ 
  140    const _type_* sptr = pSrc;                                         \ 
  141    _type_* dptr = pDst;                                               \ 
  143    if (sizeof(_type_) == 1)                                           \ 
  145    else if (sizeof(_type_) == 2)                                      \ 
  147    else if (sizeof(_type_) == 4)                                      \ 
  149    else if (sizeof(_type_) == 8)                                      \ 
  152    size_t count = len >> (7 - shifts);                                \ 
  153    len -= count << (7 - shifts);                                      \ 
  154    xmm0 = mm_set1_epu32(val);                                         \ 
  155    for (size_t x = 0; x < count; x++)                                 \ 
  157      __m128i xmm1 = LOAD_SI128(sptr);                               \ 
  158      sptr += (16 / sizeof(_type_));                                 \ 
  159      __m128i xmm2 = LOAD_SI128(sptr);                               \ 
  160      sptr += (16 / sizeof(_type_));                                 \ 
  161      __m128i xmm3 = LOAD_SI128(sptr);                               \ 
  162      sptr += (16 / sizeof(_type_));                                 \ 
  163      __m128i xmm4 = LOAD_SI128(sptr);                               \ 
  164      sptr += (16 / sizeof(_type_));                                 \ 
  165      xmm1 = _op_(xmm1, xmm0);                                       \ 
  166      xmm2 = _op_(xmm2, xmm0);                                       \ 
  167      xmm3 = _op_(xmm3, xmm0);                                       \ 
  168      xmm4 = _op_(xmm4, xmm0);                                       \ 
  169      STORE_SI128(dptr, xmm1);                                       \ 
  170      dptr += (16 / sizeof(_type_));                                 \ 
  171      STORE_SI128(dptr, xmm2);                                       \ 
  172      dptr += (16 / sizeof(_type_));                                 \ 
  173      STORE_SI128(dptr, xmm3);                                       \ 
  174      dptr += (16 / sizeof(_type_));                                 \ 
  175      STORE_SI128(dptr, xmm4);                                       \ 
  176      dptr += (16 / sizeof(_type_));                                 \ 
  179    count = len >> (5 - shifts);                                       \ 
  180    len -= count << (5 - shifts);                                      \ 
  181    for (size_t x = 0; x < count; x++)                                 \ 
  183      __m128i xmm1 = LOAD_SI128(sptr);                               \ 
  184      sptr += (16 / sizeof(_type_));                                 \ 
  185      xmm1 = _op_(xmm1, xmm0);                                       \ 
  186      STORE_SI128(dptr, xmm1);                                       \ 
  187      dptr += (16 / sizeof(_type_));                                 \ 
  190    for (size_t x = 0; x < len; x++)                                   \ 
  194    return PRIMITIVES_SUCCESS;                                         \ 
  200#define SSE3_SSD_ROUTINE(_name_, _type_, _fallback_, _op_, _slowWay_)                        \ 
  201  static pstatus_t _name_(const _type_* WINPR_RESTRICT pSrc1,                              \ 
  202                          const _type_* WINPR_RESTRICT pSrc2, _type_* WINPR_RESTRICT pDst, \ 
  207    const _type_* sptr1 = pSrc1;                                                         \ 
  208    const _type_* sptr2 = pSrc2;                                                         \ 
  209    _type_* dptr = pDst;                                                                 \ 
  211    if (sizeof(_type_) == 1)                                                             \ 
  213    else if (sizeof(_type_) == 2)                                                        \ 
  215    else if (sizeof(_type_) == 4)                                                        \ 
  217    else if (sizeof(_type_) == 8)                                                        \ 
  220    count = len >> (7 - shifts);                                                         \ 
  221    len -= count << (7 - shifts);                                                        \ 
  225      __m128i xmm0 = LOAD_SI128(sptr1);                                                \ 
  226      sptr1 += (16 / sizeof(_type_));                                                  \ 
  227      __m128i xmm1 = LOAD_SI128(sptr1);                                                \ 
  228      sptr1 += (16 / sizeof(_type_));                                                  \ 
  229      __m128i xmm2 = LOAD_SI128(sptr1);                                                \ 
  230      sptr1 += (16 / sizeof(_type_));                                                  \ 
  231      __m128i xmm3 = LOAD_SI128(sptr1);                                                \ 
  232      sptr1 += (16 / sizeof(_type_));                                                  \ 
  233      __m128i xmm4 = LOAD_SI128(sptr2);                                                \ 
  234      sptr2 += (16 / sizeof(_type_));                                                  \ 
  235      __m128i xmm5 = LOAD_SI128(sptr2);                                                \ 
  236      sptr2 += (16 / sizeof(_type_));                                                  \ 
  237      __m128i xmm6 = LOAD_SI128(sptr2);                                                \ 
  238      sptr2 += (16 / sizeof(_type_));                                                  \ 
  239      __m128i xmm7 = LOAD_SI128(sptr2);                                                \ 
  240      sptr2 += (16 / sizeof(_type_));                                                  \ 
  241      xmm0 = _op_(xmm0, xmm4);                                                         \ 
  242      xmm1 = _op_(xmm1, xmm5);                                                         \ 
  243      xmm2 = _op_(xmm2, xmm6);                                                         \ 
  244      xmm3 = _op_(xmm3, xmm7);                                                         \ 
  245      STORE_SI128(dptr, xmm0);                                                         \ 
  246      dptr += (16 / sizeof(_type_));                                                   \ 
  247      STORE_SI128(dptr, xmm1);                                                         \ 
  248      dptr += (16 / sizeof(_type_));                                                   \ 
  249      STORE_SI128(dptr, xmm2);                                                         \ 
  250      dptr += (16 / sizeof(_type_));                                                   \ 
  251      STORE_SI128(dptr, xmm3);                                                         \ 
  252      dptr += (16 / sizeof(_type_));                                                   \ 
  255    count = len >> (5 - shifts);                                                         \ 
  256    len -= count << (5 - shifts);                                                        \ 
  259      __m128i xmm0 = LOAD_SI128(sptr1);                                                \ 
  260      sptr1 += (16 / sizeof(_type_));                                                  \ 
  261      __m128i xmm1 = LOAD_SI128(sptr2);                                                \ 
  262      sptr2 += (16 / sizeof(_type_));                                                  \ 
  263      xmm0 = _op_(xmm0, xmm1);                                                         \ 
  264      STORE_SI128(dptr, xmm0);                                                         \ 
  265      dptr += (16 / sizeof(_type_));                                                   \ 
  272    return PRIMITIVES_SUCCESS;                                                           \