intel · bader · Apr 9, 2020 · Mar 2, 2020 · bader · Apr 9, 2020
@@ -4,7 +4,7 @@ message(STATUS "Including the PI API CUDA backend.")
  # we only require the CUDA driver API to be used
  # CUDA_CUDA_LIBRARY variable defines the path to libcuda.so, the CUDA Driver API library.
 
-find_package(CUDA 10.0 REQUIRED)
+find_package(CUDA 10.1 REQUIRED)
 
 add_library(cudadrv SHARED IMPORTED)
 

@@ -1,9 +1,11 @@
 // REQUIRES: ocloc, gpu
+// UNSUPPORTED: cuda
+// CUDA is not compatible with SPIR.
 
 // RUN: %clangxx -fsycl -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend=spir64_gen-unknown-unknown-sycldevice "-device skl" %s -o %t.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
-// XFAIL: cuda
+
 //==----- gpu.cpp - AOT compilation for gen devices using GEN compiler  ------==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

@@ -7,6 +7,8 @@
 //===------------------------------------------------------------------------===//
 
 // REQUIRES: opencl-aot, ocloc, aoc, cpu, gpu, accelerator
+// UNSUPPORTED: cuda
+// CUDA is not compatible with SPIR.
 
 // 1-command compilation case
 // Targeting CPU, GPU, FPGA

@@ -1,11 +1,12 @@
-// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
+// XFAIL: cuda
+// TODO: Fix CUDA implementation.
+//
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple  %s -o %t.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
-// XFAIL: cuda
-// TODO: cuda fail due to unimplemented param_name 4121 in cuda_piDeviceGetInfo
-
+//
 //==---------- subbuffer.cpp --- sub-buffer basic test ---------------------==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

@@ -1,4 +1,4 @@
-// RUN: %clangxx -fsycl %s -o %t.out
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
@@ -48,7 +48,7 @@ int main() {
 
     vector_class<char> Expected{'x', 'x', '0', '1', '2', '3', 'x', 'x'};
     if (DataRaw != Expected)
-      throw std::runtime_error("Check of hadler.copy(ptr, acc) was failed");
+      throw std::runtime_error("Check of handler.copy(ptr, acc) was failed");
   }
 
   {
@@ -71,7 +71,7 @@ int main() {
     }
     vector_class<char> Expected{'2', '3', '4', '5', 'x', 'x', 'x', 'x'};
     if (DataRaw != Expected)
-      throw std::runtime_error("Check of hadler.copy(acc, ptr) was failed");
+      throw std::runtime_error("Check of handler.copy(acc, ptr) was failed");
   }
   return 0;
 }
@@ -1,7 +1,7 @@
+// REQUIRES: opencl
 // RUN: %clangxx -fsycl %s -o %t.out -L %opencl_libs_dir -lOpenCL
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
-// REQUIRES: opencl
 
 //==------- interop_task.cpp -----------------------------------------------==//
 //

@@ -12,28 +12,18 @@
 //===----------------------------------------------------------------------===//
 
 #include <CL/sycl.hpp>
+#include <cassert>
 
 using namespace cl::sycl;
 
-void check(bool condition, const char *conditionString, const char *filename,
-           const long line) noexcept {
-  if (!condition) {
-    std::cerr << "CHECK failed in " << filename << "#" << line << " "
-              << conditionString << "\n";
-    std::abort();
-  }
-}
-
-#define CHECK(CONDITION) check(CONDITION, #CONDITION, __FILE__, __LINE__)
-
 int main() {
   queue q;
 
   buffer<int, 1> buf(range<1>(1));
   program prg(q.get_context());
 
   prg.build_with_kernel_type<class SingleTask>();
-  CHECK(prg.has_kernel<class SingleTask>());
+  assert(prg.has_kernel<class SingleTask>());
   kernel krn = prg.get_kernel<class SingleTask>();
 
   q.submit([&](handler &cgh) {
@@ -42,26 +32,26 @@ int main() {
   });
 
   const string_class krnName = krn.get_info<info::kernel::function_name>();
-  CHECK(!krnName.empty());
+  assert(!krnName.empty());
   const cl_uint krnArgCount = krn.get_info<info::kernel::num_args>();
-  CHECK(krnArgCount > 0);
+  assert(krnArgCount > 0);
   const context krnCtx = krn.get_info<info::kernel::context>();
-  CHECK(krnCtx == q.get_context());
+  assert(krnCtx == q.get_context());
   const program krnPrg = krn.get_info<info::kernel::program>();
-  CHECK(krnPrg == prg);
+  assert(krnPrg == prg);
   const cl_uint krnRefCount = krn.get_info<info::kernel::reference_count>();
-  CHECK(krnRefCount > 0);
+  assert(krnRefCount > 0);
   const string_class krnAttr = krn.get_info<info::kernel::attributes>();
-  CHECK(krnAttr.empty());
+  assert(krnAttr.empty());
 
   device dev = q.get_device();
   const size_t wgSize =
       krn.get_work_group_info<info::kernel_work_group::work_group_size>(dev);
-  CHECK(wgSize > 0);
+  assert(wgSize > 0);
   const size_t prefWGSizeMult = krn.get_work_group_info<
       info::kernel_work_group::preferred_work_group_size_multiple>(dev);
-  CHECK(prefWGSizeMult > 0);
+  assert(prefWGSizeMult > 0);
   const cl_ulong prvMemSize =
       krn.get_work_group_info<info::kernel_work_group::private_mem_size>(dev);
-  CHECK(prvMemSize == 0);
+  assert(prvMemSize == 0);
 }
@@ -1,8 +1,10 @@
-// RUN: %clangxx -fsycl %s -o %t.out
+// XFAIL: cuda
+// CUDA exposes broken hierarchical parallelism.
+
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple  %s -o %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
-// XFAIL: cuda
 
 #include <CL/sycl.hpp>
 

@@ -6,9 +6,6 @@
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 
-// TODO: Image support in CUDA backend
-// XFAIL: cuda
-
 //==--------------- sampler.cpp - SYCL sampler basic test ------------------==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

@@ -3,7 +3,7 @@
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
-// UNSUPPORTED: cuda
+
 //==------------- fpga_pipes.cpp - SYCL FPGA pipes test --------------------==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

@@ -5,7 +5,7 @@
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
-// UNSUPPORTED: cuda
+
 //==------------- fpga_queue.cpp - SYCL FPGA queues test -------------------==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

@@ -1,11 +1,13 @@
-// RUN: %clangxx -Xclang -fsycl-allow-func-ptr -std=c++14 -fsycl %s -o %t.out -L %opencl_libs_dir -lOpenCL
+// UNSUPPORTED: windows
+// UNSUPPORTED: cuda
+// CUDA does not support the function pointer as kernel argument extension.
+
+// RUN: %clangxx -Xclang -fsycl-allow-func-ptr -std=c++14 -fsycl %s -o %t.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // FIXME: This test should use runtime early exit once correct check for
 // corresponding extension is implemented
-// UNSUPPORTED: windows
-// XFAIL: cuda
 
 #include <CL/sycl.hpp>
 

@@ -1,11 +1,13 @@
-// RUN: %clangxx -Xclang -fsycl-allow-func-ptr -std=c++14 -fsycl %s -o %t.out -L %opencl_libs_dir -lOpenCL
+// UNSUPPORTED: windows
+// UNSUPPORTED: cuda
+// CUDA does not support the function pointer as kernel argument extension.
+
+// RUN: %clangxx -Xclang -fsycl-allow-func-ptr -std=c++14 -fsycl %s -o %t.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // FIXME: This test should use runtime early exit once correct check for
 // corresponding extension is implemented
-// UNSUPPORTED: windows
-// XFAIL: cuda
 
 #include <CL/sycl.hpp>
 

@@ -1,9 +1,11 @@
+// UNSUPPORTED: cuda
+// OpenCL C 2.x alike work-group functions not yet supported by CUDA.
+//
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
-// UNSUPPORTED: cuda
 
 #include <CL/sycl.hpp>
 #include <algorithm>

@@ -1,9 +1,11 @@
+// UNSUPPORTED: cuda
+// OpenCL C 2.x alike work-group functions not yet supported by CUDA.
+//
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
-// UNSUPPORTED: cuda
 
 #include <CL/sycl.hpp>
 #include <algorithm>

@@ -1,9 +1,11 @@
+// UNSUPPORTED: cuda
+// OpenCL C 2.x alike work-group functions not yet supported by CUDA.
+//
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
-// UNSUPPORTED: cuda
 
 #include <CL/sycl.hpp>
 #include <algorithm>

@@ -1,9 +1,11 @@
+// UNSUPPORTED: cuda
+// OpenCL C 2.x alike work-group functions not yet supported by CUDA.
+//
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
-// UNSUPPORTED: cuda
 
 #include <CL/sycl.hpp>
 #include <algorithm>

@@ -1,9 +1,11 @@
+// UNSUPPORTED: cuda
+// OpenCL C 2.x alike work-group functions not yet supported by CUDA.
+//
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
-// UNSUPPORTED: cuda
 
 #include <CL/sycl.hpp>
 #include <algorithm>

@@ -1,3 +1,6 @@
+// UNSUPPORTED: cuda
+// OpenCL C 2.x alike work-group functions not yet supported by CUDA.
+//
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out

@@ -1,9 +1,11 @@
+// UNSUPPORTED: cuda
+// OpenCL C 2.x alike work-group functions not yet supported by CUDA.
+//
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
-// UNSUPPORTED: cuda
 
 #include <CL/sycl.hpp>
 #include <algorithm>

@@ -1,9 +1,11 @@
+// UNSUPPORTED: cuda
+// OpenCL C 2.x alike work-group functions not yet supported by CUDA.
+//
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
-// UNSUPPORTED: cuda
 
 #include <CL/sycl.hpp>
 #include <algorithm>

@@ -18,9 +18,6 @@
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 
-// TODO: ptxas fatal   : Unresolved extern function '__spirv_ControlBarrier'
-// UNSUPPORTED: cuda
-
 // This test checks correctness of hierarchical kernel execution when there is
 // code and data in the work group scope.
 

@@ -1,3 +1,6 @@
+// UNSUPPORTED: cuda
+// CUDA does not support SPIR-V.
+
 //-fsycl-targets=%sycl_triple
 // RUN: %clangxx -fsycl-device-only -fno-sycl-use-bitcode -Xclang -fsycl-int-header=%t.h -c %s -o %t.spv -I %sycl_include -Xclang -verify-ignore-unexpected=note,warning -Wno-sycl-strict
 // RUN: %clangxx -include %t.h -g %s -o %t.out -lsycl -I %sycl_include -Xclang -verify-ignore-unexpected=note,warning

@@ -1,4 +1,4 @@
-// RUN: %clangxx -fsycl %s -o %t.out
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out

@@ -4,9 +4,10 @@
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
-// UNSUPPORTED: cuda
+
 //==---------------- opencl-interop.cpp - SYCL linear id test --------------==//
 //
+//
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

@@ -72,8 +72,8 @@
 
 config.substitutions.append( ('%sycl_libs_dir',  config.sycl_libs_dir ) )
 config.substitutions.append( ('%sycl_include',  config.sycl_include ) )
-config.substitutions.append( ('%opencl_libs_dir',  config.opencl_libs_dir) )
 config.substitutions.append( ('%sycl_source_dir', config.sycl_source_dir) )
+config.substitutions.append( ('%opencl_libs_dir',  config.opencl_libs_dir) )
 config.substitutions.append( ('%opencl_include_dir',  config.opencl_include_dir) )
 config.substitutions.append( ('%cuda_toolkit_include',  config.cuda_toolkit_include) )
 

@@ -3,15 +3,15 @@
 import sys
 
 config.llvm_tools_dir = "@LLVM_TOOLS_DIR@"
-config.sycl_tools_dir = lit_config.params.get('SYCL_TOOLS_DIR', "@LLVM_TOOLS_DIR@")
 config.lit_tools_dir = "@LLVM_LIT_TOOLS_DIR@"
+config.sycl_tools_dir = lit_config.params.get('SYCL_TOOLS_DIR', "@LLVM_TOOLS_DIR@")
 config.sycl_include = lit_config.params.get('SYCL_INCLUDE', "@SYCL_INCLUDE@")
 config.sycl_obj_root = "@SYCL_BINARY_DIR@"
 config.sycl_source_dir = "@SYCL_SOURCE_DIR@/source"
-config.opencl_libs_dir = os.path.dirname("@OpenCL_LIBRARIES@")
 config.sycl_libs_dir = lit_config.params.get('SYCL_LIBS_DIR', "@LLVM_LIBS_DIR@")
 config.target_triple = "@TARGET_TRIPLE@"
 config.host_triple = "@LLVM_HOST_TRIPLE@"
+config.opencl_libs_dir = os.path.dirname("@OpenCL_LIBRARIES@")
 config.opencl_include_dir = "@OpenCL_INCLUDE_DIR@"
 config.cuda_toolkit_include = "@CUDA_TOOLKIT_INCLUDE@"
 

@@ -3,7 +3,6 @@
 // RUN: %clangxx -fsycl %s -o %t1.out -L %opencl_libs_dir -lOpenCL
 // RUN: %CPU_RUN_PLACEHOLDER %t1.out
 // RUN: %GPU_RUN_PLACEHOLDER %t1.out
-// XFAIL: cuda
 //==----------- ordered_dmemll.cpp - Device Memory Linked List test --------==//
 // It uses an ordered queue where explicit waiting is not necessary between
 // kernels

@@ -1,3 +1,5 @@
+// REQUIRES: opencl
+//
 // RUN: %clangxx -fsycl %s -o %t.out -L %opencl_libs_dir -lOpenCL
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 //==---------- ordered_queue.cpp - SYCL ordered queue test -----------------==//

@@ -1,3 +1,6 @@
+// REQUIRES: opencl
+// Env vars are used to pass OpenCL-specific flags to PI compiling/linking.
+//
 // RUN: %clangxx -O0 -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -lsycl
 //
 // Deprecated SYCL_PROGRAM_BUILD_OPTIONS should work as an alias to

@@ -5,11 +5,8 @@
 // TODO: For now PI checks are skipped for ACC device. To decide if it's good.
 // RUN: env %ACC_RUN_PLACEHOLDER %t.out
 
-// TODO: No CUDA image support
-// XFAIL: cuda
-
-// TODO: No CUDA image support
-// XFAIL: cuda
+// UNSUPPORTED: cuda
+// CUDA cannot support OpenCL spec conform images.
 
 //==-------------- image_access.cpp - SYCL image accessors test  -----------==//
 //

@@ -1,5 +1,4 @@
-// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -lOpenCL
-
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out

@@ -14,9 +14,6 @@
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 
-// TODO: terminate called after throwing an instance of 'cl::sycl::runtime_error'
-// TODO: what():  OpenCL API failed. OpenCL API returns: -999 (Unknown OpenCL error code) -999 (Unknown OpenCL error code)
-
 #include <CL/sycl.hpp>
 
 int main() {

@@ -1,9 +1,10 @@
-// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -I %sycl_source_dir %s -o %t.out
+// XFAIL: cuda
+// TODO: Fix accidential error return when unmapping read-only memory objects.
+//
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -I %sycl_source_dir %s -o %t.out -g
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
-// Incorrect event callback processing for host device.
-// XFAIL: cuda
 //
 //==-------------------------- DataMovement.cpp ----------------------------==//
 //

@@ -1,4 +1,7 @@
-// RUN: %clangxx -fsycl %s -o %t.out
+// UNSUPPORTED: cuda
+// CUDA compilation and runtime do not yet support sub-groups.
+//
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUNx: %GPU_RUN_PLACEHOLDER %t.out

@@ -1,9 +1,12 @@
+// UNSUPPORTED: cuda
+// CUDA compilation and runtime do not yet support sub-groups.
+//
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
-// UNSUPPORTED: cuda
+
 //==---------- barrier.cpp - SYCL sub_group barrier test -------*- C++ -*---==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

@@ -1,10 +1,13 @@
+// UNSUPPORTED: cuda
+// CUDA compilation and runtime do not yet support sub-groups.
+
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -D SG_GPU %s -o %t_gpu.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t_gpu.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
-// UNSUPPORTED: cuda
+
 //==--------- broadcast.cpp - SYCL sub_group broadcast test ----*- C++ -*---==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

@@ -1,9 +1,12 @@
+// UNSUPPORTED: cuda
+// CUDA compilation and runtime do not yet support sub-groups.
+//
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
-// UNSUPPORTED: cuda
+
 //==-------------- common.cpp - SYCL sub_group common test -----*- C++ -*---==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

@@ -7,7 +7,7 @@
 // RUN: %CPU_RUN_PLACEHOLDER %t.out %T/kernel_ocl.spv
 // RUN: %GPU_RUN_PLACEHOLDER %t.out %T/kernel_ocl.spv
 // RUN: %ACC_RUN_PLACEHOLDER %t.out %T/kernel_ocl.spv
-// UNSUPPORTED: cuda
+
 //==--- common_ocl.cpp - basic SG methods in SYCL vs OpenCL  ---*- C++ -*---==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

@@ -1,9 +1,11 @@
+// REQUIRES: opencl
+
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
-// UNSUPPORTED: cuda
+
 //==------------- info.cpp - SYCL sub_group parameters test ----*- C++ -*---==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

@@ -1,9 +1,11 @@
+// UNSUPPORTED: cuda
+// CUDA compilation and runtime do not yet support sub-groups.
+//
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
-// UNSUPPORTED: cuda
 //
 //==----------- load_store.cpp - SYCL sub_group load/store test ------------==//
 //

@@ -1,11 +1,13 @@
-//-fsycl-targets=%sycl_triple
+// UNSUPPORTED: cuda
+// CUDA compilation and runtime do not yet support sub-groups.
+//
 // RUN: %clangxx -fsycl -std=c++14 %s -o %t.out
-// RUN: %clangxx -fsycl -std=c++14 -D SG_GPU %s -o %t_gpu.out
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -std=c++14 -D SG_GPU %s -o %t_gpu.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t_gpu.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
-// UNSUPPORTED: cuda
+
 //==--------------- reduce.cpp - SYCL sub_group reduce test ----*- C++ -*---==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

@@ -1,11 +1,13 @@
-//-fsycl-targets=%sycl_triple
+// UNSUPPORTED: cuda
+// CUDA compilation and runtime do not yet support sub-groups.
+//
 // RUN: %clangxx -fsycl -std=c++14 %s -o %t.out
-// RUN: %clangxx -fsycl -std=c++14 -D SG_GPU %s -o %t_gpu.out
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -std=c++14 -D SG_GPU %s -o %t_gpu.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t_gpu.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
-// UNSUPPORTED: cuda
+
 //==--------------- scan.cpp - SYCL sub_group scan test --------*- C++ -*---==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

@@ -1,9 +1,11 @@
+// UNSUPPORTED: cuda
+// CUDA compilation and runtime do not yet support sub-groups.
+//
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUNx: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
-// UNSUPPORTED: cuda
 //
 //==------------ shuffle.cpp - SYCL sub_group shuffle test -----*- C++ -*---==//
 //

@@ -1,9 +1,12 @@
+// UNSUPPORTED: cuda
+// CUDA compilation and runtime do not yet support sub-groups.
+//
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
-// UNSUPPORTED: cuda
+
 //==--------------- vote.cpp - SYCL sub_group vote test --*- C++ -*---------==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

@@ -1,8 +1,12 @@
+// XFAIL: cuda
+// piextUSM*Alloc functions for CUDA are not behaving as described in
+// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
+// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc
+//
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t1.out
 // RUN: %CPU_RUN_PLACEHOLDER %t1.out
 // RUN: %GPU_RUN_PLACEHOLDER %t1.out
-// UNSUPPORTED: cuda
 
 //==---- allocator_vector.cpp - Allocator Container test -------------------==//
 //

@@ -1,8 +1,12 @@
+// XFAIL: cuda
+// piextUSM*Alloc functions for CUDA are not behaving as described in
+// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
+// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc
+//
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t1.out
 // RUN: %CPU_RUN_PLACEHOLDER %t1.out
 // RUN: %GPU_RUN_PLACEHOLDER %t1.out
-// UNSUPPORTED: cuda
 
 //==-- allocator_vector_fail.cpp - Device Memory Allocator fail test -------==//
 //

@@ -1,8 +1,12 @@
+// XFAIL: cuda
+// piextUSM*Alloc functions for CUDA are not behaving as described in
+// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
+// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc
+//
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t1.out
 // RUN: %CPU_RUN_PLACEHOLDER %t1.out
 // RUN: %GPU_RUN_PLACEHOLDER %t1.out
-// UNSUPPORTED: cuda
 
 //==---- allocatorll.cpp - Device Memory Linked List Allocator test --------==//
 //

@@ -1,10 +1,14 @@
+// UNSUPPORTED: windows
+// XFAIL: cuda
+// piextUSM*Alloc functions for CUDA are not behaving as described in
+// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
+// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc
+//
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t1.out
 // RUN: %CPU_RUN_PLACEHOLDER %t1.out
 // RUN: %GPU_RUN_PLACEHOLDER %t1.out
 
-// UNSUPPORTED: windows,cuda
-
 //==----------------- badmalloc.cpp - Bad Mallocs test ---------------------==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -13,7 +17,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-
 // This test verifies that things fail in the proper way when they should.
 
 #include <CL/sycl.hpp>

@@ -1,8 +1,12 @@
+// XFAIL: cuda
+// piextUSM*Alloc functions for CUDA are not behaving as described in
+// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
+// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc
+//
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t1.out
 // RUN: %CPU_RUN_PLACEHOLDER %t1.out
 // RUN: %GPU_RUN_PLACEHOLDER %t1.out
-// UNSUPPORTED: cuda
 
 //==----------------- depends_on.cpp - depends_on test ---------------------==//
 //

@@ -1,8 +1,12 @@
+// XFAIL: cuda
+// piextUSM*Alloc functions for CUDA are not behaving as described in
+// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
+// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc
+//
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t1.out
 // RUN: %CPU_RUN_PLACEHOLDER %t1.out
 // RUN: %GPU_RUN_PLACEHOLDER %t1.out
-// UNSUPPORTED: cuda
 
 //==------------------- dmemll.cpp - Device Memory Linked List test --------==//
 //

@@ -1,8 +1,12 @@
+// XFAIL: cuda
+// piextUSM*Alloc functions for CUDA are not behaving as described in
+// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
+// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc
+//
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t1.out
 // RUN: %CPU_RUN_PLACEHOLDER %t1.out
 // RUN: %GPU_RUN_PLACEHOLDER %t1.out
-// UNSUPPORTED: cuda
 
 //==------------------- hmemll.cpp - Host Memory Linked List test ----------==//
 //

@@ -3,6 +3,8 @@
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 
 // REQUIRES: cpu
+// XFAIL: cuda
+// TODO: ptxas fatal   : Unresolved extern function '_Z20__spirv_ocl_lgamma_rfPi'
 
 #include <CL/sycl.hpp>
 

@@ -1,8 +1,11 @@
+// XFAIL: cuda
+// SYCL runtime and piextUSM*Alloc functions for CUDA not behaving as described
+// in: https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
+//
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t1.out
 // RUN: %CPU_RUN_PLACEHOLDER %t1.out
 // RUN: %GPU_RUN_PLACEHOLDER %t1.out
-// UNSUPPORTED: cuda
 
 //==---------------- memadvise.cpp - Shared Memory Linked List test --------==//
 //

@@ -5,10 +5,14 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
+// XFAIL: cuda
+// piextUSM*Alloc functions for CUDA are not behaving as described in
+// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
+// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc
+//
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple  %s -o %t1.out
 // RUN: %CPU_RUN_PLACEHOLDER %t1.out
 // RUN: %GPU_RUN_PLACEHOLDER %t1.out
-// UNSUPPORTED: cuda
 
 #include <CL/sycl.hpp>
 

@@ -1,7 +1,11 @@
-// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple  %s -o %t1.out
+// XFAIL: cuda
+// piextUSM*Alloc functions for CUDA are not behaving as described in
+// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
+// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc
+//
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out
 // RUN: %CPU_RUN_PLACEHOLDER %t1.out
 // RUN: %GPU_RUN_PLACEHOLDER %t1.out
-// UNSUPPORTED: cuda
 
 //==---- memset.cpp - USM memset test --------------------------------------==//
 //
@@ -10,7 +14,6 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-
 #include <CL/sycl.hpp>
 
 using namespace cl::sycl;

@@ -1,8 +1,12 @@
+// XFAIL: cuda
+// piextUSM*Alloc functions for CUDA are not behaving as described in
+// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
+// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc
+//
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t1.out
 // RUN: %CPU_RUN_PLACEHOLDER %t1.out
 // RUN: %GPU_RUN_PLACEHOLDER %t1.out
-// UNSUPPORTED: cuda
 
 //==------------------- mixed.cpp - Mixed Memory test ---------------------==//
 //

@@ -1,8 +1,12 @@
-// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple  %s -o %t1.out
+// XFAIL: cuda
+// piextUSM*Alloc functions for CUDA are not behaving as described in
+// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
+// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc
+//
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t1.out
 // RUN: %CPU_RUN_PLACEHOLDER %t1.out
 // RUN: %GPU_RUN_PLACEHOLDER %t1.out
-// UNSUPPORTED: cuda
 
 //==------------------- mixed2.cpp - Mixed Memory test ---------------------==//
 //

@@ -1,8 +1,12 @@
-// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple  %s -o %t1.out
+// XFAIL: cuda
+// piextUSM*Alloc functions for CUDA are not behaving as described in
+// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
+// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc
+//
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t1.out
 // RUN: %CPU_RUN_PLACEHOLDER %t1.out
 // RUN: %GPU_RUN_PLACEHOLDER %t1.out
-// UNSUPPORTED: cuda
 
 //==---------- mixed2template.cpp - Mixed Memory with Templatestest --------==//
 //

@@ -1,8 +1,12 @@
-// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple  %s -o %t1.out
+// XFAIL: cuda
+// piextUSM*Alloc functions for CUDA are not behaving as described in
+// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
+// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc
+//
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t1.out
 // RUN: %CPU_RUN_PLACEHOLDER %t1.out
 // RUN: %GPU_RUN_PLACEHOLDER %t1.out
-// UNSUPPORTED: cuda
 
 //==-------------- mixed_queue.cpp - Mixed Memory test ---------------------==//
 //

@@ -1,8 +1,10 @@
+// UNSUPPORTED: cuda
+// CUDA does not support the unnamed lambda extension.
+//
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple  -fsycl-unnamed-lambda %s -o %t1.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t1.out
 // RUN: %CPU_RUN_PLACEHOLDER %t1.out
 // RUN: %GPU_RUN_PLACEHOLDER %t1.out
-// UNSUPPORTED: cuda
 
 //==--------------- pfor_flatten.cpp - Kernel Launch Flattening test -------==//
 //

@@ -5,7 +5,7 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-// RUN: %clangxx -fsycl %s -o %t1.out -L %opencl_libs_dir -lOpenCL
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t1.out
 // RUN: %CPU_RUN_PLACEHOLDER %t1.out
 

@@ -1,8 +1,12 @@
-// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple  %s -o %t.out
+// XFAIL: cuda
+// piextUSM*Alloc functions for CUDA are not behaving as described in
+// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
+// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc
+//
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
-// UNSUPPORTED: cuda
 
 #include <CL/sycl.hpp>
 

@@ -1,8 +1,12 @@
+// XFAIL: cuda
+// piextUSM*Alloc functions for CUDA are not behaving as described in
+// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
+// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc
+//
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t1.out
 // RUN: %CPU_RUN_PLACEHOLDER %t1.out
 // RUN: %GPU_RUN_PLACEHOLDER %t1.out
-// UNSUPPORTED: cuda
 
 //==------------------- smemll.cpp - Shared Memory Linked List test --------==//
 //

@@ -1,4 +1,4 @@
-// RUN: %clangxx -Wall -Wpessimizing-move -Wunused-variable -Wmismatched-tags -Wunneeded-internal-declaration -Werror -fsycl %s -o %t.out
+// RUN: %clangxx -Wall -Wpessimizing-move -Wunused-variable -Wmismatched-tags -Wunneeded-internal-declaration -Werror -Wno-unknown-cuda-version -fsycl %s -o %t.out
 
 #include <CL/sycl.hpp>
 

@@ -6,7 +6,7 @@ add_executable(get_device_count_by_type get_device_count_by_type.cpp)
 add_dependencies(get_device_count_by_type ocl-headers ocl-icd)
 
 if( SYCL_BUILD_PI_CUDA )
-  find_package(CUDA 10.0 REQUIRED)
+  find_package(CUDA 10.1 REQUIRED)
 
   add_library(cudadrv SHARED IMPORTED)