123 DataType* gpu_in_data =
124 static_cast<DataType*
>(sycl_device.allocate(tensor.dimensions().TotalSize() *
sizeof(DataType)));
125 DataType* gpu_out_data_expected =
126 static_cast<DataType*
>(sycl_device.allocate(expected.dimensions().TotalSize() *
sizeof(DataType)));
127 DataType* gpu_out_data_result =
128 static_cast<DataType*
>(sycl_device.allocate(result.dimensions().TotalSize() *
sizeof(DataType)));
134 sycl_device.memcpyHostToDevice(gpu_in_data, tensor.data(), (tensor.dimensions().TotalSize()) *
sizeof(DataType));
137 out_gpu_expected.reverse(dim_rev).device(sycl_device) = in_gpu;
139 out_gpu_expected.device(sycl_device) = in_gpu.reverse(dim_rev);
141 sycl_device.memcpyDeviceToHost(expected.data(), gpu_out_data_expected,
142 expected.dimensions().TotalSize() *
sizeof(DataType));
145 src_slice_dim[0] = 2;
146 src_slice_dim[1] = 3;
147 src_slice_dim[2] = 1;
148 src_slice_dim[3] = 7;
150 src_slice_start[0] = 0;
151 src_slice_start[1] = 0;
152 src_slice_start[2] = 0;
153 src_slice_start[3] = 0;
157 for (IndexType
i = 0;
i < 5; ++
i) {
159 out_gpu_result.slice(dst_slice_start, dst_slice_dim).reverse(dim_rev).device(sycl_device) =
160 in_gpu.slice(src_slice_start, src_slice_dim);
162 out_gpu_result.slice(dst_slice_start, dst_slice_dim).device(sycl_device) =
163 in_gpu.slice(src_slice_start, src_slice_dim).reverse(dim_rev);
165 src_slice_start[2] += 1;
166 dst_slice_start[2] += 1;
168 sycl_device.memcpyDeviceToHost(result.data(), gpu_out_data_result,
169 result.dimensions().TotalSize() *
sizeof(DataType));
171 for (IndexType
i = 0;
i < expected.dimension(0); ++
i) {
172 for (IndexType
j = 0;
j < expected.dimension(1); ++
j) {
173 for (IndexType
k = 0;
k < expected.dimension(2); ++
k) {
174 for (IndexType l = 0; l < expected.dimension(3); ++l) {
181 dst_slice_start[2] = 0;
183 sycl_device.memcpyHostToDevice(gpu_out_data_result, result.data(),
184 (result.dimensions().TotalSize()) *
sizeof(DataType));
185 for (IndexType
i = 0;
i < 5; ++
i) {
187 out_gpu_result.slice(dst_slice_start, dst_slice_dim).reverse(dim_rev).device(sycl_device) =
188 in_gpu.slice(dst_slice_start, dst_slice_dim);
190 out_gpu_result.slice(dst_slice_start, dst_slice_dim).device(sycl_device) =
191 in_gpu.reverse(dim_rev).slice(dst_slice_start, dst_slice_dim);
193 dst_slice_start[2] += 1;
195 sycl_device.memcpyDeviceToHost(result.data(), gpu_out_data_result,
196 result.dimensions().TotalSize() *
sizeof(DataType));
198 for (IndexType
i = 0;
i < expected.dimension(0); ++
i) {
199 for (IndexType
j = 0;
j < expected.dimension(1); ++
j) {
200 for (IndexType
k = 0;
k < expected.dimension(2); ++
k) {
201 for (IndexType l = 0; l < expected.dimension(3); ++l) {
int i
Definition: BiCGSTAB_step_by_step.cpp:9
A tensor expression mapping an existing array of data.
Definition: TensorMap.h:33
The tensor class.
Definition: Tensor.h:68
char char char int int * k
Definition: level2_impl.h:374
#define VERIFY_IS_EQUAL(a, b)
Definition: main.h:367
std::array< T, N > array
Definition: EmulateArray.h:231
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2