31 IndexType sizeDim1 = 2;
32 IndexType sizeDim2 = 3;
33 IndexType sizeDim3 = 5;
34 IndexType sizeDim4 = 7;
41 paddings[0] = std::make_pair(0, 0);
42 paddings[1] = std::make_pair(2, 1);
43 paddings[2] = std::make_pair(3, 4);
44 paddings[3] = std::make_pair(0, 0);
46 IndexType padedSizeDim1 = 2;
47 IndexType padedSizeDim2 = 6;
48 IndexType padedSizeDim3 = 12;
49 IndexType padedSizeDim4 = 7;
50 array<IndexType, 4> padedtensorRange = {{padedSizeDim1, padedSizeDim2, padedSizeDim3, padedSizeDim4}};
54 DataType* gpu_data1 =
static_cast<DataType*
>(sycl_device.allocate(tensor.size() *
sizeof(DataType)));
55 DataType* gpu_data2 =
static_cast<DataType*
>(sycl_device.allocate(padded.size() *
sizeof(DataType)));
63 sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(), (tensor.size()) *
sizeof(DataType));
64 gpu2.device(sycl_device) = gpu1.pad(paddings);
65 sycl_device.memcpyDeviceToHost(padded.data(), gpu_data2, (padded.size()) *
sizeof(DataType));
66 for (IndexType
i = 0;
i < padedSizeDim1; ++
i) {
67 for (IndexType
j = 0;
j < padedSizeDim2; ++
j) {
68 for (IndexType
k = 0;
k < padedSizeDim3; ++
k) {
69 for (IndexType l = 0; l < padedSizeDim4; ++l) {
70 if (
j >= 2 && j < 5 && k >= 3 &&
k < 8) {
79 sycl_device.deallocate(gpu_data1);
80 sycl_device.deallocate(gpu_data2);