102 const int patch_z = 2;
103 const int patch_y = 3;
104 const int patch_x = 5;
111 tensor_col_major.setRandom();
113 DataType* gpu_data_col_major =
114 static_cast<DataType*
>(sycl_device.allocate(tensor_col_major.size() *
sizeof(DataType)));
115 DataType* gpu_data_row_major =
116 static_cast<DataType*
>(sycl_device.allocate(tensor_row_major.size() *
sizeof(DataType)));
120 sycl_device.memcpyHostToDevice(gpu_data_col_major, tensor_col_major.data(),
121 (tensor_col_major.size()) *
sizeof(DataType));
122 gpu_row_major.device(sycl_device) = gpu_col_major.swap_layout();
123 sycl_device.memcpyDeviceToHost(tensor_row_major.data(), gpu_data_row_major,
124 (tensor_col_major.size()) *
sizeof(DataType));
128 {depth, patch_z, patch_y, patch_x, patch_z * patch_y * patch_x, batch}};
130 size_t patchTensorBuffSize = entire_volume_patch_col_major.size() *
sizeof(DataType);
131 DataType* gpu_data_entire_volume_patch_col_major =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
133 gpu_data_entire_volume_patch_col_major, patchColMajorTensorRange);
134 gpu_entire_volume_patch_col_major.device(sycl_device) =
135 gpu_col_major.extract_volume_patches(patch_z, patch_y, patch_x);
136 sycl_device.memcpyDeviceToHost(entire_volume_patch_col_major.data(), gpu_data_entire_volume_patch_col_major,
137 patchTensorBuffSize);
149 VERIFY_IS_EQUAL(entire_volume_patch_col_major.dimension(4), patch_z * patch_y * patch_x);
156 {batch, patch_z * patch_y * patch_x, patch_x, patch_y, patch_z, depth}};
158 patchTensorBuffSize = entire_volume_patch_row_major.size() *
sizeof(DataType);
159 DataType* gpu_data_entire_volume_patch_row_major =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
161 gpu_data_entire_volume_patch_row_major, patchRowMajorTensorRange);
162 gpu_entire_volume_patch_row_major.device(sycl_device) =
163 gpu_row_major.extract_volume_patches(patch_z, patch_y, patch_x);
164 sycl_device.memcpyDeviceToHost(entire_volume_patch_row_major.data(), gpu_data_entire_volume_patch_row_major,
165 patchTensorBuffSize);
168 VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(1), patch_z * patch_y * patch_x);
174 const int dz = patch_z - 1;
175 const int dy = patch_y - 1;
176 const int dx = patch_x - 1;
178 const int forward_pad_z = dz / 2;
179 const int forward_pad_y = dy / 2;
180 const int forward_pad_x = dx / 2;
182 for (
int pz = 0; pz < patch_z; pz++) {
183 for (
int py = 0;
py < patch_y;
py++) {
184 for (
int px = 0;
px < patch_x;
px++) {
185 const int patchId = pz + patch_z * (
py +
px * patch_y);
186 for (
int z = 0; z < patch_z; z++) {
187 for (
int y = 0;
y < patch_y;
y++) {
188 for (
int x = 0;
x < patch_x;
x++) {
189 for (
int b = 0;
b < batch;
b++) {
190 for (
int d = 0; d < depth; d++) {
191 float expected = 0.0f;
192 float expected_row_major = 0.0f;
193 const int eff_z = z - forward_pad_z + pz;
194 const int eff_y =
y - forward_pad_y +
py;
195 const int eff_x =
x - forward_pad_x +
px;
196 if (eff_z >= 0 && eff_y >= 0 && eff_x >= 0 && eff_z < patch_z && eff_y < patch_y && eff_x < patch_x) {
197 expected = tensor_col_major(d, eff_z, eff_y, eff_x,
b);
198 expected_row_major = tensor_row_major(
b, eff_x, eff_y, eff_z, d);
201 VERIFY_IS_EQUAL(entire_volume_patch_row_major(
b, patchId,
x,
y, z, d), expected_row_major);
210 sycl_device.deallocate(gpu_data_col_major);
211 sycl_device.deallocate(gpu_data_row_major);
212 sycl_device.deallocate(gpu_data_entire_volume_patch_col_major);
213 sycl_device.deallocate(gpu_data_entire_volume_patch_row_major);
Scalar * b
Definition: benchVecAdd.cpp:17
A tensor expression mapping an existing array of data.
Definition: TensorMap.h:33
The tensor class.
Definition: Tensor.h:68
RealScalar RealScalar * px
Definition: level1_cplx_impl.h:27
Scalar * y
Definition: level1_cplx_impl.h:128
int RealScalar int RealScalar * py
Definition: level1_cplx_impl.h:124
#define VERIFY_IS_EQUAL(a, b)
Definition: main.h:367
std::array< T, N > array
Definition: EmulateArray.h:231
list x
Definition: plotDoE.py:28