diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index a55abc83091e4..a8308ffd89add 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -105,8 +105,6 @@ set(SYCL_SOURCES "detail/scheduler/graph_builder.cpp" "detail/spec_constant_impl.cpp" "detail/sycl_mem_obj_t.cpp" - "detail/usm/clusm.cpp" - "detail/usm/usm_dispatch.cpp" "detail/usm/usm_impl.cpp" "detail/util.cpp" "accessor.cpp" diff --git a/sycl/source/detail/context_impl.hpp b/sycl/source/detail/context_impl.hpp index 99b74b3e252ac..266c3879366bc 100644 --- a/sycl/source/detail/context_impl.hpp +++ b/sycl/source/detail/context_impl.hpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include @@ -152,7 +151,6 @@ class context_impl { bool MPluginInterop; bool MHostContext; bool MUseCUDAPrimaryContext; - std::shared_ptr MUSMDispatch; std::map MCachedLibPrograms; mutable KernelProgramCache MKernelProgramCache; }; diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index 54df3781580d4..256c6c0b079de 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -10,7 +10,6 @@ #include #include #include -#include #include #include diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 317e6efde46a8..8a504c9d03438 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -12,7 +12,6 @@ #include #include #include -#include #include diff --git a/sycl/source/detail/usm/clusm.cpp b/sycl/source/detail/usm/clusm.cpp deleted file mode 100644 index cd7f021af52a2..0000000000000 --- a/sycl/source/detail/usm/clusm.cpp +++ /dev/null @@ -1,382 +0,0 @@ -//==---------------- clusm.cpp - USM for CL Utils -------------*- C++ -*---==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// ===--------------------------------------------------------------------=== // -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include // strdate - -cl::sycl::detail::usm::CLUSM *gCLUSM = nullptr; - -__SYCL_INLINE_NAMESPACE(cl) { -namespace sycl { -namespace detail { -namespace usm { - -void *CLUSM::hostMemAlloc(cl_context context, - cl_mem_properties_intel *properties, size_t size, - cl_uint alignment, cl_int *errcode_ret) { - std::lock_guard guard(mLock); - void *ptr = - clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, - size, alignment); - - cl_int errorCode = CL_SUCCESS; - - if (ptr != nullptr) { - // Record this allocation in the alloc map: - SUSMAllocInfo &allocInfo = mUSMContextInfo.AllocMap[ptr]; - allocInfo.Type = CL_MEM_TYPE_HOST_INTEL; - allocInfo.BaseAddress = ptr; - allocInfo.Size = size; - allocInfo.Alignment = alignment; - - mUSMContextInfo.HostAllocVector.push_back(ptr); - } else { - errorCode = CL_OUT_OF_HOST_MEMORY; // TODO: which error? - } - - if (errcode_ret) { - errcode_ret[0] = errorCode; - } - - return ptr; -} - -void *CLUSM::deviceMemAlloc(cl_context context, cl_device_id device, - cl_mem_properties_intel *properties, size_t size, - cl_uint alignment, cl_int *errcode_ret) { - std::lock_guard guard(mLock); - void *ptr = clSVMAlloc(context, CL_MEM_READ_WRITE, size, alignment); - - cl_int errorCode = CL_SUCCESS; - - if (ptr != nullptr) { - // Record this allocation in the alloc map: - SUSMAllocInfo &allocInfo = mUSMContextInfo.AllocMap[ptr]; - allocInfo.Type = CL_MEM_TYPE_DEVICE_INTEL; - allocInfo.BaseAddress = ptr; - allocInfo.Size = size; - allocInfo.Alignment = alignment; - - mUSMContextInfo.DeviceAllocVector.push_back(ptr); - } else { - errorCode = CL_OUT_OF_HOST_MEMORY; // TODO: which error? - } - - if (errcode_ret) { - errcode_ret[0] = errorCode; - } - - return ptr; -} - -void *CLUSM::sharedMemAlloc(cl_context context, cl_device_id device, - cl_mem_properties_intel *properties, size_t size, - cl_uint alignment, cl_int *errcode_ret) { - std::lock_guard guard(mLock); - void *ptr = - clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, - size, alignment); - - cl_int errorCode = CL_SUCCESS; - - if (ptr != nullptr) { - // Record this allocation in the alloc map: - SUSMAllocInfo &allocInfo = mUSMContextInfo.AllocMap[ptr]; - allocInfo.Type = CL_MEM_TYPE_SHARED_INTEL; - allocInfo.BaseAddress = ptr; - allocInfo.Size = size; - allocInfo.Alignment = alignment; - - mUSMContextInfo.SharedAllocVector.push_back(ptr); - } else { - errorCode = CL_OUT_OF_HOST_MEMORY; // TODO: which error? - } - - if (errcode_ret) { - errcode_ret[0] = errorCode; - } - - return ptr; -} - -cl_int CLUSM::memFree(cl_context context, const void *ptr) { - std::lock_guard guard(mLock); - - CUSMAllocMap::iterator iter = mUSMContextInfo.AllocMap.find(ptr); - if (iter != mUSMContextInfo.AllocMap.end()) { - const SUSMAllocInfo &allocInfo = iter->second; - - switch (allocInfo.Type) { - case CL_MEM_TYPE_HOST_INTEL: - mUSMContextInfo.HostAllocVector.erase( - std::find(mUSMContextInfo.HostAllocVector.begin(), - mUSMContextInfo.HostAllocVector.end(), ptr)); - break; - case CL_MEM_TYPE_DEVICE_INTEL: - mUSMContextInfo.DeviceAllocVector.erase( - std::find(mUSMContextInfo.DeviceAllocVector.begin(), - mUSMContextInfo.DeviceAllocVector.end(), ptr)); - break; - case CL_MEM_TYPE_SHARED_INTEL: - mUSMContextInfo.SharedAllocVector.erase( - std::find(mUSMContextInfo.SharedAllocVector.begin(), - mUSMContextInfo.SharedAllocVector.end(), ptr)); - break; - default: - assert(0 && "unsupported!"); - break; - } - - mUSMContextInfo.AllocMap.erase(ptr); - - clSVMFree(context, const_cast(ptr)); - ptr = nullptr; - - return CL_SUCCESS; - } - - return CL_INVALID_MEM_OBJECT; -} - -cl_int CLUSM::getMemAllocInfoINTEL(cl_context context, const void *ptr, - cl_mem_info_intel param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { - std::lock_guard guard(mLock); - if (ptr == nullptr) { - return CL_INVALID_VALUE; - } - - if (mUSMContextInfo.AllocMap.empty()) { - // No pointers allocated? - return CL_INVALID_MEM_OBJECT; // TODO: new error code? - } - - CUSMAllocMap::iterator iter = mUSMContextInfo.AllocMap.lower_bound(ptr); - - if (iter->first != ptr) { - if (iter == mUSMContextInfo.AllocMap.begin()) { - // This pointer is not in the map. - return CL_INVALID_MEM_OBJECT; - } - - // Go to the previous iterator. - --iter; - } - - const SUSMAllocInfo &allocInfo = iter->second; - - auto startPtr = static_cast(allocInfo.BaseAddress); - auto endPtr = startPtr + allocInfo.Size; - if (ptr < startPtr || ptr >= endPtr) { - return CL_INVALID_MEM_OBJECT; - } - - switch (param_name) { - case CL_MEM_ALLOC_TYPE_INTEL: { - auto ptr = - reinterpret_cast(param_value); - return writeParamToMemory(param_value_size, allocInfo.Type, - param_value_size_ret, ptr); - } - case CL_MEM_ALLOC_BASE_PTR_INTEL: { - auto ptr = reinterpret_cast(param_value); - return writeParamToMemory(param_value_size, allocInfo.BaseAddress, - param_value_size_ret, ptr); - } - case CL_MEM_ALLOC_SIZE_INTEL: { - auto ptr = reinterpret_cast(param_value); - return writeParamToMemory(param_value_size, allocInfo.Size, - param_value_size_ret, ptr); - } - default: - break; - } - - return CL_INVALID_VALUE; -} - -cl_int CLUSM::setKernelExecInfo(cl_kernel kernel, - cl_kernel_exec_info param_name, - size_t param_value_size, - const void *param_value) { - std::lock_guard guard(mLock); - - cl_int retVal = CL_INVALID_VALUE; - - switch (param_name) { - case CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL: - if (param_value_size == sizeof(cl_bool)) { - SUSMKernelInfo &kernelInfo = mUSMKernelInfoMap[kernel]; - auto pBool = reinterpret_cast(param_value); - - kernelInfo.IndirectHostAccess = (pBool[0] == CL_TRUE); - retVal = CL_SUCCESS; - } - break; - case CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL: - if (param_value_size == sizeof(cl_bool)) { - SUSMKernelInfo &kernelInfo = mUSMKernelInfoMap[kernel]; - auto pBool = reinterpret_cast(param_value); - - kernelInfo.IndirectDeviceAccess = (pBool[0] == CL_TRUE); - retVal = CL_SUCCESS; - } - break; - case CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL: - if (param_value_size == sizeof(cl_bool)) { - SUSMKernelInfo &kernelInfo = mUSMKernelInfoMap[kernel]; - auto pBool = reinterpret_cast(param_value); - - kernelInfo.IndirectSharedAccess = (pBool[0] == CL_TRUE); - retVal = CL_SUCCESS; - } - break; - case CL_KERNEL_EXEC_INFO_SVM_PTRS: { - SUSMKernelInfo &kernelInfo = mUSMKernelInfoMap[kernel]; - auto pPtrs = reinterpret_cast(const_cast(param_value)); - size_t numPtrs = param_value_size / sizeof(void *); - - kernelInfo.SVMPtrs.clear(); - kernelInfo.SVMPtrs.reserve(numPtrs); - kernelInfo.SVMPtrs.insert(kernelInfo.SVMPtrs.begin(), pPtrs, - pPtrs + numPtrs); - - // Don't set CL_SUCCESS so the call passes through. - } break; - default: - break; - } - - return retVal; -} - -cl_int CLUSM::setKernelIndirectUSMExecInfo(cl_command_queue commandQueue, - cl_kernel kernel) { - const SUSMKernelInfo &usmKernelInfo = mUSMKernelInfoMap[kernel]; - - cl_int errorCode = CL_SUCCESS; - - if (usmKernelInfo.IndirectHostAccess || usmKernelInfo.IndirectDeviceAccess || - usmKernelInfo.IndirectSharedAccess) { - // If we supported multiple contexts, we'd get the context from - // the queue, and map it to a USM context info structure here. - - const SUSMContextInfo &usmContextInfo = mUSMContextInfo; - - // If we supported multiple devices, we'd get the device from - // the queue and map it to the device's allocation vector here. - - std::lock_guard guard(mLock); - - bool hasSVMPtrs = !usmKernelInfo.SVMPtrs.empty(); - bool setHostAllocs = !usmContextInfo.HostAllocVector.empty() && - usmKernelInfo.IndirectHostAccess; - bool setDeviceAllocs = !usmContextInfo.DeviceAllocVector.empty() && - usmKernelInfo.IndirectDeviceAccess; - bool setSharedAllocs = !usmContextInfo.SharedAllocVector.empty() && - usmKernelInfo.IndirectSharedAccess; - - bool fastPath = (hasSVMPtrs == false) && - ((!setHostAllocs && !setDeviceAllocs && !setSharedAllocs) || - (setHostAllocs && !setDeviceAllocs && !setSharedAllocs) || - (!setHostAllocs && setDeviceAllocs && !setSharedAllocs) || - (!setHostAllocs && !setDeviceAllocs && setSharedAllocs)); - - if (fastPath) { - if (setHostAllocs) { - size_t count = usmContextInfo.HostAllocVector.size(); - - errorCode = clSetKernelExecInfo(kernel, CL_KERNEL_EXEC_INFO_SVM_PTRS, - count * sizeof(void *), - usmContextInfo.HostAllocVector.data()); - } - if (setDeviceAllocs) { - size_t count = usmContextInfo.DeviceAllocVector.size(); - - errorCode = clSetKernelExecInfo( - kernel, CL_KERNEL_EXEC_INFO_SVM_PTRS, count * sizeof(void *), - usmContextInfo.DeviceAllocVector.data()); - } - if (setSharedAllocs) { - size_t count = usmContextInfo.SharedAllocVector.size(); - - errorCode = clSetKernelExecInfo( - kernel, CL_KERNEL_EXEC_INFO_SVM_PTRS, count * sizeof(void *), - usmContextInfo.SharedAllocVector.data()); - } - } else { - size_t count = usmKernelInfo.SVMPtrs.size() + setHostAllocs - ? usmContextInfo.HostAllocVector.size() - : 0 + setDeviceAllocs - ? usmContextInfo.DeviceAllocVector.size() - : 0 + setSharedAllocs - ? usmContextInfo.SharedAllocVector.size() - : 0; - - std::vector combined; - combined.reserve(count); - - combined.insert(combined.end(), usmKernelInfo.SVMPtrs.begin(), - usmKernelInfo.SVMPtrs.end()); - if (setHostAllocs) { - combined.insert(combined.end(), usmContextInfo.HostAllocVector.begin(), - usmContextInfo.HostAllocVector.end()); - } - if (setDeviceAllocs) { - combined.insert(combined.end(), - usmContextInfo.DeviceAllocVector.begin(), - usmContextInfo.DeviceAllocVector.end()); - } - if (setSharedAllocs) { - combined.insert(combined.end(), - usmContextInfo.SharedAllocVector.begin(), - usmContextInfo.SharedAllocVector.end()); - } - - errorCode = clSetKernelExecInfo(kernel, CL_KERNEL_EXEC_INFO_SVM_PTRS, - count * sizeof(void *), combined.data()); - } - } - - return errorCode; -} - -template -cl_int CLUSM::writeParamToMemory(size_t param_value_size, T param, - size_t *param_value_size_ret, - T *pointer) const { - cl_int errorCode = CL_SUCCESS; - - if (pointer != nullptr) { - if (param_value_size < sizeof(param)) { - errorCode = CL_INVALID_VALUE; - } else { - *pointer = param; - } - } - - if (param_value_size_ret != nullptr) { - *param_value_size_ret = sizeof(param); - } - - return errorCode; -} - -} // namespace usm -} // namespace detail -} // namespace sycl -} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/usm/usm_dispatch.cpp b/sycl/source/detail/usm/usm_dispatch.cpp deleted file mode 100644 index 980410f778bf4..0000000000000 --- a/sycl/source/detail/usm/usm_dispatch.cpp +++ /dev/null @@ -1,402 +0,0 @@ -//==------------ usm_dispatch.cpp - USM Dispatch Impl ----------*- C++ -*---==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// ===--------------------------------------------------------------------=== // - -#include -#include -#include - -__SYCL_INLINE_NAMESPACE(cl) { - namespace sycl { - namespace detail { - namespace usm { - -/*** - - General philosophy: Try to use a CL extension for each function, - if it exists. Otherwise, fall back to CLUSM's USM-on-SVM. - - **/ -#define GET_EXTENSION(_funcname) \ - pfn_##_funcname = (_funcname##_fn)clGetExtensionFunctionAddressForPlatform( \ - platform, #_funcname); - -USMDispatcher::USMDispatcher(cl_platform_id platform, - const vector_class &DeviceIds) { - // Note: This function should be modified whenever a new BE is added. - // mSupported needs to be appropriately set to properly gate USM support. - - if (pi::useBackend(pi::Backend::SYCL_BE_PI_OPENCL)) { - GET_EXTENSION(clHostMemAllocINTEL); - GET_EXTENSION(clDeviceMemAllocINTEL); - GET_EXTENSION(clSharedMemAllocINTEL); - GET_EXTENSION(clMemFreeINTEL); - GET_EXTENSION(clGetMemAllocInfoINTEL); - GET_EXTENSION(clSetKernelArgMemPointerINTEL); - GET_EXTENSION(clEnqueueMemsetINTEL); - GET_EXTENSION(clEnqueueMemcpyINTEL); - GET_EXTENSION(clEnqueueMigrateMemINTEL); - GET_EXTENSION(clEnqueueMemAdviseINTEL); - mEmulated = !(pfn_clHostMemAllocINTEL && pfn_clDeviceMemAllocINTEL && - pfn_clSharedMemAllocINTEL && pfn_clMemFreeINTEL && - pfn_clSetKernelArgMemPointerINTEL && - pfn_clEnqueueMemsetINTEL && pfn_clEnqueueMemcpyINTEL); - mEmulator.reset(new CLUSM()); - - if (mEmulated) { - // See if every device in this context supports - // CL_DEVICE_SVM_FINE_GRAIN_BUFFER - // If not, disable USM - - if (CL_TARGET_OPENCL_VERSION >= 200) { - bool AnybodyNotSupportSVM = false; - for (const auto &D : DeviceIds) { - cl_device_svm_capabilities Caps; - cl_int Error = clGetDeviceInfo( - pi::cast(D), CL_DEVICE_SVM_CAPABILITIES, - sizeof(cl_device_svm_capabilities), &Caps, nullptr); - AnybodyNotSupportSVM |= ((Error != CL_SUCCESS) || - (!(Caps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER))); - } - mSupported = !AnybodyNotSupportSVM; - } else { - // USM isn't support on CL 1.2 - mSupported = false; - } - } else { - // We support the CL Extension - mSupported = true; - } - } else { - mSupported = false; - } -} - -void *USMDispatcher::hostMemAlloc(pi_context Context, - cl_mem_properties_intel *Properties, - size_t Size, pi_uint32 Alignment, - pi_result *ErrcodeRet) { - void *RetVal = nullptr; - - if (pi::useBackend(pi::Backend::SYCL_BE_PI_OPENCL)) { - cl_context CLContext = pi::cast(Context); - - if (mEmulated) { - RetVal = mEmulator->hostMemAlloc(CLContext, Properties, Size, Alignment, - pi::cast(ErrcodeRet)); - } else { - RetVal = pfn_clHostMemAllocINTEL(CLContext, Properties, Size, Alignment, - pi::cast(ErrcodeRet)); - } - } - - if (ErrcodeRet && !RetVal) { - *ErrcodeRet = PI_INVALID_OPERATION; - } - return RetVal; -} - -void *USMDispatcher::deviceMemAlloc(pi_context Context, pi_device Device, - cl_mem_properties_intel *Properties, - size_t Size, pi_uint32 Alignment, - pi_result *ErrcodeRet) { - void *RetVal = nullptr; - - if (pi::useBackend(pi::Backend::SYCL_BE_PI_OPENCL)) { - cl_context CLContext = pi::cast(Context); - cl_device_id CLDevice = pi::cast(Device); - - if (mEmulated) { - RetVal = mEmulator->deviceMemAlloc(CLContext, CLDevice, Properties, Size, - Alignment, - pi::cast(ErrcodeRet)); - } else { - RetVal = pfn_clDeviceMemAllocINTEL(CLContext, CLDevice, Properties, Size, - Alignment, - pi::cast(ErrcodeRet)); - } - } - - if (ErrcodeRet && !RetVal) { - *ErrcodeRet = PI_INVALID_OPERATION; - } - return RetVal; -} - -void *USMDispatcher::sharedMemAlloc(pi_context Context, pi_device Device, - cl_mem_properties_intel *Properties, - size_t Size, pi_uint32 Alignment, - pi_result *ErrcodeRet) { - void *RetVal = nullptr; - - if (pi::useBackend(pi::Backend::SYCL_BE_PI_OPENCL)) { - cl_context CLContext = pi::cast(Context); - cl_device_id CLDevice = pi::cast(Device); - - if (mEmulated) { - RetVal = mEmulator->sharedMemAlloc(CLContext, CLDevice, Properties, Size, - Alignment, - pi::cast(ErrcodeRet)); - } else { - RetVal = pfn_clSharedMemAllocINTEL(CLContext, CLDevice, Properties, Size, - Alignment, - pi::cast(ErrcodeRet)); - } - } - - if (ErrcodeRet && !RetVal) { - *ErrcodeRet = PI_INVALID_OPERATION; - } - return RetVal; -} - -pi_result USMDispatcher::memFree(pi_context Context, void *Ptr) { - pi_result RetVal = PI_INVALID_OPERATION; - - if (pi::useBackend(pi::Backend::SYCL_BE_PI_OPENCL)) { - cl_context CLContext = pi::cast(Context); - - if (mEmulated) { - RetVal = pi::cast(mEmulator->memFree(CLContext, Ptr)); - } else { - RetVal = pi::cast(pfn_clMemFreeINTEL(CLContext, Ptr)); - } - } - - return RetVal; -} - -pi_result USMDispatcher::setKernelArgMemPointer(pi_kernel Kernel, - pi_uint32 ArgIndex, - const void *ArgValue) { - pi_result RetVal = PI_INVALID_OPERATION; - - if (mSupported) { - if (pi::useBackend(pi::Backend::SYCL_BE_PI_OPENCL)) { - cl_kernel CLKernel = pi::cast(Kernel); - - if (mEmulated) { - RetVal = pi::cast( - clSetKernelArgSVMPointer(CLKernel, ArgIndex, ArgValue)); - } else { - RetVal = pi::cast( - pfn_clSetKernelArgMemPointerINTEL(CLKernel, ArgIndex, ArgValue)); - } - } - } - - return RetVal; -} - -void USMDispatcher::setKernelIndirectAccess(pi_kernel Kernel, pi_queue Queue) { - - if (mSupported) { - if (pi::useBackend(pi::Backend::SYCL_BE_PI_OPENCL)) { - cl_kernel CLKernel = pi::cast(Kernel); - cl_command_queue CLQueue = pi::cast(Queue); - cl_bool TrueVal = CL_TRUE; - - if (mEmulated) { - CHECK_OCL_CODE(mEmulator->setKernelExecInfo( - CLKernel, CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, - sizeof(cl_bool), &TrueVal)); - CHECK_OCL_CODE(mEmulator->setKernelExecInfo( - CLKernel, CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, - sizeof(cl_bool), &TrueVal)); - CHECK_OCL_CODE(mEmulator->setKernelExecInfo( - CLKernel, CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, - sizeof(cl_bool), &TrueVal)); - CHECK_OCL_CODE( - mEmulator->setKernelIndirectUSMExecInfo(CLQueue, CLKernel)); - } else { - CHECK_OCL_CODE(clSetKernelExecInfo( - CLKernel, CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, - sizeof(cl_bool), &TrueVal)); - CHECK_OCL_CODE(clSetKernelExecInfo( - CLKernel, CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, - sizeof(cl_bool), &TrueVal)); - CHECK_OCL_CODE(clSetKernelExecInfo( - CLKernel, CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, - sizeof(cl_bool), &TrueVal)); - } - } - } -} - -pi_result USMDispatcher::enqueueMemset(pi_queue Queue, void *Ptr, - pi_int32 Value, size_t Count, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - pi_result RetVal = PI_INVALID_OPERATION; - - if (pi::useBackend(pi::Backend::SYCL_BE_PI_OPENCL)) { - cl_command_queue CLQueue = pi::cast(Queue); - - // Is there a better way to convert pi_event * to cl_event *? - - if (mEmulated) { - const cl_uchar Pattern = (cl_uchar)Value; - - RetVal = pi::cast(clEnqueueSVMMemFill( - CLQueue, Ptr, &Pattern, sizeof(Pattern), Count, NumEventsInWaitList, - reinterpret_cast(EventWaitList), - reinterpret_cast(Event))); - } else { - RetVal = pi::cast(pfn_clEnqueueMemsetINTEL( - CLQueue, Ptr, Value, Count, NumEventsInWaitList, - reinterpret_cast(EventWaitList), - reinterpret_cast(Event))); - } - } - - return RetVal; -} - -pi_result USMDispatcher::enqueueMemcpy(pi_queue Queue, pi_bool Blocking, - void *DestPtr, const void *SrcPtr, - size_t Size, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - pi_result RetVal = PI_INVALID_OPERATION; - - if (pi::useBackend(pi::Backend::SYCL_BE_PI_OPENCL)) { - cl_command_queue CLQueue = pi::cast(Queue); - - if (mEmulated) { - RetVal = pi::cast(clEnqueueSVMMemcpy( - CLQueue, Blocking, DestPtr, SrcPtr, Size, NumEventsInWaitList, - reinterpret_cast(EventWaitList), - reinterpret_cast(Event))); - } else { - RetVal = pi::cast(pfn_clEnqueueMemcpyINTEL( - CLQueue, Blocking, DestPtr, SrcPtr, Size, NumEventsInWaitList, - reinterpret_cast(EventWaitList), - reinterpret_cast(Event))); - } - } - - return RetVal; -} - -pi_result USMDispatcher::enqueueMigrateMem(pi_queue Queue, const void *Ptr, - size_t Size, - cl_mem_migration_flags Flags, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - pi_result RetVal = PI_INVALID_OPERATION; - - if (pi::useBackend(pi::Backend::SYCL_BE_PI_OPENCL)) { - cl_command_queue CLQueue = pi::cast(Queue); - - if (mEmulated) { - // We could check for OpenCL 2.1 and call the SVM migrate - // functions, but for now we'll just enqueue a marker. - // TODO: Implement a PI call for this openCL API - RetVal = pi::cast(clEnqueueMarkerWithWaitList( - CLQueue, NumEventsInWaitList, - reinterpret_cast(EventWaitList), - reinterpret_cast(Event))); - RT::GlobalPlugin->checkPiResult(RetVal); - } else { - RetVal = pi::cast(pfn_clEnqueueMigrateMemINTEL( - CLQueue, Ptr, Size, Flags, NumEventsInWaitList, - reinterpret_cast(EventWaitList), - reinterpret_cast(Event))); - } - } - - return RetVal; -} - -pi_result USMDispatcher::getMemAllocInfo(pi_context Context, const void *Ptr, - cl_mem_info_intel ParamName, - size_t ParamValueSize, - void *ParamValue, - size_t *ParamValueSizeRet) { - pi_result RetVal = PI_INVALID_OPERATION; - - if (pi::useBackend(pi::Backend::SYCL_BE_PI_OPENCL)) { - cl_context CLContext = pi::cast(Context); - - if (mEmulated) { - // TODO: What should we do here? - // This isn't really supported yet. - // Advice is typically safe to ignore, - // so a NOP will do. - RetVal = pi::cast(mEmulator->getMemAllocInfoINTEL( - CLContext, Ptr, ParamName, ParamValueSize, ParamValue, - ParamValueSizeRet)); - } else { - RetVal = pi::cast( - pfn_clGetMemAllocInfoINTEL(CLContext, Ptr, ParamName, ParamValueSize, - ParamValue, ParamValueSizeRet)); - } - } - - return RetVal; -} - -void USMDispatcher::memAdvise(pi_queue Queue, const void *Ptr, size_t Length, - int Advice, pi_event *Event) { - if (pi::useBackend(pi::Backend::SYCL_BE_PI_OPENCL)) { - cl_command_queue CLQueue = pi::cast(Queue); - - if (mEmulated) { - // memAdvise does nothing here - // TODO: Implement a PI call for this openCL API - RT::GlobalPlugin->checkPiResult( - RT::cast(clEnqueueMarkerWithWaitList( - CLQueue, 0, nullptr, reinterpret_cast(Event)))); - } else { - // Temporary until driver supports - // memAdvise doesn't do anything on an iGPU anyway - // TODO: Implement a PI call for this openCL API - RT::GlobalPlugin->checkPiResult( - RT::cast(clEnqueueMarkerWithWaitList( - CLQueue, 0, nullptr, reinterpret_cast(Event)))); - /* - // Enable once this is supported in the driver - auto CLAdvice = *reinterpret_cast(&Advice); - // TODO: Implement a PI call for this openCL API - RT::GlobalPlugin->checkPiResult(RT::cast(pfn_clEnqueueMemAdviseINTEL( - CLQueue, Ptr, Length, CLAdvice, 0, nullptr, - reinterpret_cast(Event)))); - */ - } - } -} - -pi_result USMDispatcher::enqueuePrefetch(pi_queue Queue, void *Ptr, size_t Size, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event, - const plugin &Plugin) { - pi_result RetVal = PI_INVALID_OPERATION; - - if (pi::useBackend(pi::Backend::SYCL_BE_PI_OPENCL)) { - if (mEmulated) { - // Prefetch is a hint, so ignoring it is always safe. - RetVal = Plugin.call_nocheck( - Queue, NumEventsInWaitList, EventWaitList, Event); - } else { - // TODO: Replace this with real prefetch support when the driver enables - // it. - RetVal = Plugin.call_nocheck( - Queue, NumEventsInWaitList, EventWaitList, Event); - } - } - - return RetVal; -} - -} // namespace usm -} // namespace detail -} // namespace sycl -} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/usm/usm_dispatch.hpp b/sycl/source/detail/usm/usm_dispatch.hpp deleted file mode 100644 index e8ea5b4fd1a97..0000000000000 --- a/sycl/source/detail/usm/usm_dispatch.hpp +++ /dev/null @@ -1,77 +0,0 @@ -//==-------------- usm_dispatch.hpp - SYCL USM Dispatch --------*- C++ -*---==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// ===--------------------------------------------------------------------=== // -#pragma once - -#include - -#include - -__SYCL_INLINE_NAMESPACE(cl) { -namespace sycl { -namespace detail { -namespace usm { - -class USMDispatcher { -public: - USMDispatcher(cl_platform_id Platform, - const vector_class &DeviceIds); - - void *hostMemAlloc(pi_context Context, cl_mem_properties_intel *Properties, - size_t Size, pi_uint32 Alignment, pi_result *ErrcodeRet); - void *deviceMemAlloc(pi_context Context, pi_device Device, - cl_mem_properties_intel *Properties, size_t Size, - pi_uint32 Alignment, pi_result *ErrcodeRet); - void *sharedMemAlloc(pi_context Context, pi_device Device, - cl_mem_properties_intel *Properties, size_t Size, - pi_uint32 Alignment, pi_result *ErrcodeRet); - pi_result memFree(pi_context Context, void *Ptr); - pi_result setKernelArgMemPointer(pi_kernel Kernel, pi_uint32 ArgIndex, - const void *ArgValue); - void setKernelIndirectAccess(pi_kernel Kernel, pi_queue Queue); - pi_result enqueueMemset(pi_queue Queue, void *Ptr, pi_int32 Value, - size_t Count, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event); - pi_result enqueueMemcpy(pi_queue Queue, pi_bool Blocking, void *DestPtr, - const void *SrcPtr, size_t Size, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event); - pi_result enqueueMigrateMem(pi_queue Queue, const void *Ptr, size_t Size, - cl_mem_migration_flags Flags, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event); - pi_result getMemAllocInfo(pi_context Context, const void *Ptr, - cl_mem_info_intel ParamName, size_t ParamValueSize, - void *ParamValue, size_t *ParamValueSizeRet); - void memAdvise(pi_queue Queue, const void *Ptr, size_t Length, int Advice, - pi_event *Event); - pi_result enqueuePrefetch(pi_queue Queue, void *Ptr, size_t Size, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event, - const plugin &Plugin); - -private: - bool mEmulated = false; - bool mSupported = false; - std::unique_ptr mEmulator; - - clHostMemAllocINTEL_fn pfn_clHostMemAllocINTEL = nullptr; - clDeviceMemAllocINTEL_fn pfn_clDeviceMemAllocINTEL = nullptr; - clSharedMemAllocINTEL_fn pfn_clSharedMemAllocINTEL = nullptr; - clMemFreeINTEL_fn pfn_clMemFreeINTEL = nullptr; - clGetMemAllocInfoINTEL_fn pfn_clGetMemAllocInfoINTEL = nullptr; - clSetKernelArgMemPointerINTEL_fn pfn_clSetKernelArgMemPointerINTEL = nullptr; - clEnqueueMemsetINTEL_fn pfn_clEnqueueMemsetINTEL = nullptr; - clEnqueueMemcpyINTEL_fn pfn_clEnqueueMemcpyINTEL = nullptr; - clEnqueueMigrateMemINTEL_fn pfn_clEnqueueMigrateMemINTEL = nullptr; - clEnqueueMemAdviseINTEL_fn pfn_clEnqueueMemAdviseINTEL = nullptr; -}; - -} // namespace usm -} // namespace detail -} // namespace sycl -} // __SYCL_INLINE_NAMESPACE(cl)