32 IndexType sizeDim1 = 245;
33 IndexType sizeDim2 = 343;
34 IndexType sizeDim3 = 577;
45 Index3 strides1(1L, 1L, 1L);
46 Index3 indicesStart1(1L, 0
L, 0
L);
47 Index3 indicesStop1(sizeDim1, sizeDim2, sizeDim3);
49 Index3 strides2(1L, 1L, 1L);
50 Index3 indicesStart2(0
L, 0
L, 0
L);
51 Index3 indicesStop2(sizeDim1 - 1, sizeDim2, sizeDim3);
57 DataType* gpu_data1 =
static_cast<DataType*
>(sycl_device.allocate(tensor1.size() *
sizeof(DataType)));
58 DataType* gpu_data2 =
static_cast<DataType*
>(sycl_device.allocate(tensor2.size() *
sizeof(DataType)));
59 DataType* gpu_data3 =
static_cast<DataType*
>(sycl_device.allocate(tensor3.size() *
sizeof(DataType)));
65 sycl_device.memcpyHostToDevice(gpu_data1, tensor1.data(), (tensor1.size()) *
sizeof(DataType));
66 sycl_device.memcpyHostToDevice(gpu_data2, tensor2.data(), (tensor2.size()) *
sizeof(DataType));
67 gpu3.device(sycl_device) = gpu1.slice(indicesStart1,
sizes) - gpu2.slice(indicesStart2,
sizes);
68 sycl_device.memcpyDeviceToHost(tensor3.data(), gpu_data3, (tensor3.size()) *
sizeof(DataType));
70 tensor3_cpu = tensor1.stridedSlice(indicesStart1, indicesStop1, strides1) -
71 tensor2.stridedSlice(indicesStart2, indicesStop2, strides2);
73 for (IndexType
i = 0;
i < slice_range[0]; ++
i) {
74 for (IndexType
j = 0;
j < slice_range[1]; ++
j) {
75 for (IndexType
k = 0;
k < slice_range[2]; ++
k) {
80 sycl_device.deallocate(gpu_data1);
81 sycl_device.deallocate(gpu_data2);
82 sycl_device.deallocate(gpu_data3);
int i
Definition: BiCGSTAB_step_by_step.cpp:9
MatrixXd L
Definition: LLT_example.cpp:6
A tensor expression mapping an existing array of data.
Definition: TensorMap.h:33
The tensor class.
Definition: Tensor.h:68
std::vector< Array2i > sizes
Definition: dense_solvers.cpp:12
char char char int int * k
Definition: level2_impl.h:374
#define VERIFY_IS_EQUAL(a, b)
Definition: main.h:367
std::array< T, N > array
Definition: EmulateArray.h:231
Definition: TensorDimensions.h:161
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2