21#include <freerdp/config.h> 
   23#include <freerdp/types.h> 
   24#include <freerdp/primitives.h> 
   25#include "prim_internal.h" 
   27#if defined(WITH_OPENCL) 
   29#include "OpenCL/opencl.h" 
   33#include "primitives-opencl-program.h" 
   35#include <freerdp/log.h> 
   36#define TAG FREERDP_TAG("primitives") 
   41  cl_platform_id platformId;
 
   42  cl_device_id deviceId;
 
   44  cl_command_queue commandQueue;
 
   46} primitives_opencl_context;
 
   50  primitives_opencl_context* cl;
 
   56} primitives_cl_kernel;
 
   58static primitives_opencl_context* primitives_get_opencl_context(
void);
 
   60static void cl_kernel_free(primitives_cl_kernel* kernel)
 
   66    clReleaseMemObject(kernel->dstObj);
 
   68  for (
size_t i = 0; i < ARRAYSIZE(kernel->srcObjs); i++)
 
   70    cl_mem obj = kernel->srcObjs[i];
 
   71    kernel->srcObjs[i] = NULL;
 
   73      clReleaseMemObject(obj);
 
   77    clReleaseKernel(kernel->kernel);
 
   82static primitives_cl_kernel* cl_kernel_new(
const char* kernelName, 
const prim_size_t* roi)
 
   84  WINPR_ASSERT(kernelName);
 
   87  primitives_cl_kernel* kernel = calloc(1, 
sizeof(primitives_cl_kernel));
 
   92  kernel->cl = primitives_get_opencl_context();
 
   96  cl_int ret = CL_INVALID_VALUE;
 
   97  kernel->kernel = clCreateKernel(kernel->cl->program, kernelName, &ret);
 
   98  if (ret != CL_SUCCESS)
 
  100    WLog_ERR(TAG, 
"openCL: unable to create kernel %s", kernelName);
 
  106  cl_kernel_free(kernel);
 
  110static BOOL cl_kernel_set_sources(primitives_cl_kernel* ctx, 
const BYTE* WINPR_RESTRICT pSrc[3],
 
  111                                  const UINT32 srcStep[3])
 
  113  const char* sourceNames[] = { 
"Y", 
"U", 
"V" };
 
  117  WINPR_ASSERT(srcStep);
 
  119  for (cl_uint i = 0; i < ARRAYSIZE(ctx->srcObjs); i++)
 
  121    cl_int ret = CL_INVALID_VALUE;
 
  122    const BYTE* csrc = pSrc[i];
 
  123    void* WINPR_RESTRICT src = WINPR_CAST_CONST_PTR_AWAY(csrc, 
void* WINPR_RESTRICT);
 
  124    ctx->srcObjs[i] = clCreateBuffer(ctx->cl->context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
 
  125                                     1ull * srcStep[i] * ctx->roi.height, src, &ret);
 
  126    if (ret != CL_SUCCESS)
 
  128      WLog_ERR(TAG, 
"unable to create %sobj", sourceNames[i]);
 
  132    ret = clSetKernelArg(ctx->kernel, i * 2, 
sizeof(cl_mem), (
const void*)&ctx->srcObjs[i]);
 
  133    if (ret != CL_SUCCESS)
 
  135      WLog_ERR(TAG, 
"unable to set arg for %sobj", sourceNames[i]);
 
  139    ret = clSetKernelArg(ctx->kernel, i * 2 + 1, 
sizeof(cl_uint), &srcStep[i]);
 
  140    if (ret != CL_SUCCESS)
 
  142      WLog_ERR(TAG, 
"unable to set arg stride for %sobj", sourceNames[i]);
 
  150static BOOL cl_kernel_set_destination(primitives_cl_kernel* ctx, UINT32 dstStep)
 
  155  ctx->dstStep = dstStep;
 
  156  cl_int ret = CL_INVALID_VALUE;
 
  157  ctx->dstObj = clCreateBuffer(ctx->cl->context, CL_MEM_WRITE_ONLY,
 
  158                               1ull * dstStep * ctx->roi.height, NULL, &ret);
 
  159  if (ret != CL_SUCCESS)
 
  161    WLog_ERR(TAG, 
"unable to create dest obj");
 
  165  ret = clSetKernelArg(ctx->kernel, 6, 
sizeof(cl_mem), (
const void*)&ctx->dstObj);
 
  166  if (ret != CL_SUCCESS)
 
  168    WLog_ERR(TAG, 
"unable to set arg destObj");
 
  172  ret = clSetKernelArg(ctx->kernel, 7, 
sizeof(cl_uint), &dstStep);
 
  173  if (ret != CL_SUCCESS)
 
  175    WLog_ERR(TAG, 
"unable to set arg dstStep");
 
  182static BOOL cl_kernel_process(primitives_cl_kernel* ctx, BYTE* pDst)
 
  187  size_t indexes[2] = { 0 };
 
  188  indexes[0] = ctx->roi.width;
 
  189  indexes[1] = ctx->roi.height;
 
  191  cl_int ret = clEnqueueNDRangeKernel(ctx->cl->commandQueue, ctx->kernel, 2, NULL, indexes, NULL,
 
  193  if (ret != CL_SUCCESS)
 
  195    WLog_ERR(TAG, 
"unable to enqueue call kernel");
 
  200  ret = clEnqueueReadBuffer(ctx->cl->commandQueue, ctx->dstObj, CL_TRUE, 0,
 
  201                            ctx->roi.height * ctx->dstStep, pDst, 0, NULL, NULL);
 
  202  if (ret != CL_SUCCESS)
 
  204    WLog_ERR(TAG, 
"unable to read back buffer");
 
  211static pstatus_t opencl_YUVToRGB(
const char* kernelName, 
const BYTE* WINPR_RESTRICT pSrc[3],
 
  212                                 const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst, UINT32 dstStep,
 
  217  primitives_cl_kernel* ctx = cl_kernel_new(kernelName, roi);
 
  221  if (!cl_kernel_set_sources(ctx, pSrc, srcStep))
 
  224  if (!cl_kernel_set_destination(ctx, dstStep))
 
  227  if (!cl_kernel_process(ctx, pDst))
 
  230  res = PRIMITIVES_SUCCESS;
 
  237static primitives_opencl_context openclContext = { 0 };
 
  239static primitives_opencl_context* primitives_get_opencl_context(
void)
 
  241  return &openclContext;
 
  244static void cl_context_free(primitives_opencl_context* ctx)
 
  248  clReleaseProgram(ctx->program);
 
  249  clReleaseCommandQueue(ctx->commandQueue);
 
  250  clReleaseContext(ctx->context);
 
  251  clReleaseDevice(ctx->deviceId);
 
  252  ctx->support = FALSE;
 
  255static pstatus_t primitives_uninit_opencl(
void)
 
  257  if (!openclContext.support)
 
  258    return PRIMITIVES_SUCCESS;
 
  260  cl_context_free(&openclContext);
 
  261  return PRIMITIVES_SUCCESS;
 
  264static BOOL primitives_init_opencl_context(primitives_opencl_context* WINPR_RESTRICT prims)
 
  266  cl_uint ndevices = 0;
 
  267  cl_uint nplatforms = 0;
 
  268  cl_kernel kernel = NULL;
 
  271  size_t programLen = 0;
 
  273  cl_int ret = clGetPlatformIDs(0, NULL, &nplatforms);
 
  274  if (ret != CL_SUCCESS || nplatforms < 1)
 
  277  cl_platform_id* platform_ids = (cl_platform_id*)calloc(nplatforms, 
sizeof(cl_platform_id));
 
  281  ret = clGetPlatformIDs(nplatforms, platform_ids, &nplatforms);
 
  282  if (ret != CL_SUCCESS)
 
  284    free((
void*)platform_ids);
 
  288  for (cl_uint i = 0; (i < nplatforms) && !gotGPU; i++)
 
  290    cl_device_id device_id = NULL;
 
  291    cl_context context = NULL;
 
  292    char platformName[1000] = { 0 };
 
  293    char deviceName[1000] = { 0 };
 
  295    ret = clGetPlatformInfo(platform_ids[i], CL_PLATFORM_NAME, 
sizeof(platformName),
 
  297    if (ret != CL_SUCCESS)
 
  300    ret = clGetDeviceIDs(platform_ids[i], CL_DEVICE_TYPE_GPU, 1, &device_id, &ndevices);
 
  301    if (ret != CL_SUCCESS)
 
  304    ret = clGetDeviceInfo(device_id, CL_DEVICE_NAME, 
sizeof(deviceName), deviceName, NULL);
 
  305    if (ret != CL_SUCCESS)
 
  307      WLog_ERR(TAG, 
"openCL: unable get device name for platform %s", platformName);
 
  308      clReleaseDevice(device_id);
 
  312    context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
 
  313    if (ret != CL_SUCCESS)
 
  315      WLog_ERR(TAG, 
"openCL: unable to create context for platform %s, device %s",
 
  316               platformName, deviceName);
 
  317      clReleaseDevice(device_id);
 
  321#if defined(CL_VERSION_2_0) 
  322    prims->commandQueue = clCreateCommandQueueWithProperties(context, device_id, NULL, &ret);
 
  324    prims->commandQueue = clCreateCommandQueue(context, device_id, 0, &ret);
 
  326    if (ret != CL_SUCCESS)
 
  328      WLog_ERR(TAG, 
"openCL: unable to create command queue");
 
  329      clReleaseContext(context);
 
  330      clReleaseDevice(device_id);
 
  334    WLog_INFO(TAG, 
"openCL: using platform=%s device=%s", platformName, deviceName);
 
  336    prims->platformId = platform_ids[i];
 
  337    prims->deviceId = device_id;
 
  338    prims->context = context;
 
  342  free((
void*)platform_ids);
 
  346    WLog_ERR(TAG, 
"openCL: no GPU found");
 
  350  programLen = strnlen(openclProgram, 
sizeof(openclProgram));
 
  351  const char* ptr = openclProgram;
 
  352  prims->program = clCreateProgramWithSource(prims->context, 1, &ptr, &programLen, &ret);
 
  353  if (ret != CL_SUCCESS)
 
  355    WLog_ERR(TAG, 
"openCL: unable to create program");
 
  359  ret = clBuildProgram(prims->program, 1, &prims->deviceId, NULL, NULL, NULL);
 
  360  if (ret != CL_SUCCESS)
 
  364    ret = clGetProgramBuildInfo(prims->program, prims->deviceId, CL_PROGRAM_BUILD_LOG,
 
  365                                sizeof(buffer), buffer, &length);
 
  366    if (ret != CL_SUCCESS)
 
  369               "openCL: building program failed but unable to retrieve buildLog, error=%d",
 
  374      WLog_ERR(TAG, 
"openCL: unable to build program, errorLog=%s", buffer);
 
  379  kernel = clCreateKernel(prims->program, 
"yuv420_to_bgra_1b", &ret);
 
  380  if (ret != CL_SUCCESS)
 
  382    WLog_ERR(TAG, 
"openCL: unable to create yuv420_to_bgra_1b kernel");
 
  385  clReleaseKernel(kernel);
 
  387  prims->support = TRUE;
 
  391  cl_context_free(prims);
 
  395static pstatus_t opencl_YUV420ToRGB_8u_P3AC4R(
const BYTE* WINPR_RESTRICT pSrc[3],
 
  396                                              const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst,
 
  397                                              UINT32 dstStep, UINT32 DstFormat,
 
  400  const char* kernel_name = NULL;
 
  404    case PIXEL_FORMAT_ABGR32:
 
  405      kernel_name = 
"yuv420_to_abgr_1b";
 
  407    case PIXEL_FORMAT_XBGR32:
 
  408      kernel_name = 
"yuv420_to_xbgr_1b";
 
  410    case PIXEL_FORMAT_RGBX32:
 
  411      kernel_name = 
"yuv420_to_rgba_1b";
 
  413    case PIXEL_FORMAT_RGBA32:
 
  414      kernel_name = 
"yuv420_to_rgbx_1b";
 
  416    case PIXEL_FORMAT_BGRA32:
 
  417      kernel_name = 
"yuv420_to_bgra_1b";
 
  419    case PIXEL_FORMAT_BGRX32:
 
  420      kernel_name = 
"yuv420_to_bgrx_1b";
 
  422    case PIXEL_FORMAT_XRGB32:
 
  423      kernel_name = 
"yuv420_to_xrgb_1b";
 
  425    case PIXEL_FORMAT_ARGB32:
 
  426      kernel_name = 
"yuv420_to_argb_1b";
 
  430      primitives_t* p = primitives_get_by_type(PRIMITIVES_ONLY_CPU);
 
  433      return p->YUV420ToRGB_8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
 
  437  return opencl_YUVToRGB(kernel_name, pSrc, srcStep, pDst, dstStep, roi);
 
  440static pstatus_t opencl_YUV444ToRGB_8u_P3AC4R(
const BYTE* WINPR_RESTRICT pSrc[3],
 
  441                                              const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst,
 
  442                                              UINT32 dstStep, UINT32 DstFormat,
 
  445  const char* kernel_name = NULL;
 
  449    case PIXEL_FORMAT_ABGR32:
 
  450      kernel_name = 
"yuv444_to_abgr_1b";
 
  452    case PIXEL_FORMAT_XBGR32:
 
  453      kernel_name = 
"yuv444_to_xbgr_1b";
 
  455    case PIXEL_FORMAT_RGBX32:
 
  456      kernel_name = 
"yuv444_to_rgba_1b";
 
  458    case PIXEL_FORMAT_RGBA32:
 
  459      kernel_name = 
"yuv444_to_rgbx_1b";
 
  461    case PIXEL_FORMAT_BGRA32:
 
  462      kernel_name = 
"yuv444_to_bgra_1b";
 
  464    case PIXEL_FORMAT_BGRX32:
 
  465      kernel_name = 
"yuv444_to_bgrx_1b";
 
  467    case PIXEL_FORMAT_XRGB32:
 
  468      kernel_name = 
"yuv444_to_xrgb_1b";
 
  470    case PIXEL_FORMAT_ARGB32:
 
  471      kernel_name = 
"yuv444_to_argb_1b";
 
  475      primitives_t* p = primitives_get_by_type(PRIMITIVES_ONLY_CPU);
 
  478      return p->YUV444ToRGB_8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
 
  482  return opencl_YUVToRGB(kernel_name, pSrc, srcStep, pDst, dstStep, roi);
 
  487  primitives_t* p = primitives_get_by_type(PRIMITIVES_ONLY_CPU);
 
  492  if (!primitives_init_opencl_context(&openclContext))
 
  495  prims->YUV420ToRGB_8u_P3AC4R = opencl_YUV420ToRGB_8u_P3AC4R;
 
  496  prims->YUV444ToRGB_8u_P3AC4R = opencl_YUV444ToRGB_8u_P3AC4R;
 
  497  prims->flags |= PRIM_FLAGS_HAVE_EXTGPU;
 
  498  prims->uninit = primitives_uninit_opencl;