825 IndexType sizeDim1 = 3;
826 IndexType sizeDim2 = 128;
827 IndexType sizeDim3 = 128;
828 IndexType sizeDim4 = 16;
831 l_in_col_major.setRandom();
833 DataType* gpu_data_l_in_col_major =
834 static_cast<DataType*
>(sycl_device.allocate(l_in_col_major.size() *
sizeof(DataType)));
837 sycl_device.memcpyHostToDevice(gpu_data_l_in_col_major, l_in_col_major.data(),
838 (l_in_col_major.size()) *
sizeof(DataType));
840 array<IndexType, 5> patchTensorRange = {{sizeDim1, 11, 11, sizeDim2 * sizeDim3, sizeDim4}};
842 size_t patchTensorBuffSize = l_out_col_major.size() *
sizeof(DataType);
843 DataType* gpu_data_l_out_col_major =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
845 gpu_l_out_col_major.device(sycl_device) = gpu_l_in_col_major.extract_image_patches(11, 11);
846 sycl_device.memcpyDeviceToHost(l_out_col_major.data(), gpu_data_l_out_col_major, patchTensorBuffSize);
855 patchTensorRange = {{sizeDim4, sizeDim2 * sizeDim3, 11, 11, sizeDim1}};
857 patchTensorBuffSize = l_out_row_major.size() *
sizeof(DataType);
858 DataType* gpu_data_l_out_row_major =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
860 gpu_l_out_row_major.device(sycl_device) = gpu_l_in_col_major.swap_layout().extract_image_patches(11, 11);
861 sycl_device.memcpyDeviceToHost(l_out_row_major.data(), gpu_data_l_out_row_major, patchTensorBuffSize);
869 for (IndexType
b = 0;
b < 16; ++
b) {
870 for (IndexType
i = 0;
i < 128; ++
i) {
871 for (IndexType
j = 0;
j < 128; ++
j) {
872 IndexType patchId =
i + 128 *
j;
873 for (IndexType
c = 0;
c < 11; ++
c) {
874 for (IndexType
r = 0;
r < 11; ++
r) {
875 for (IndexType d = 0; d < 3; ++d) {
876 DataType expected = 0.0f;
877 if (
r - 5 +
i >= 0 &&
c - 5 +
j >= 0 &&
r - 5 +
i < 128 &&
c - 5 +
j < 128) {
878 expected = l_in_col_major(d,
r - 5 +
i,
c - 5 +
j,
b);
881 if (l_out_col_major(d,
r,
c, patchId,
b) != expected) {
882 std::cout <<
"Mismatch detected at index i=" <<
i <<
" j=" <<
j <<
" r=" <<
r <<
" c=" <<
c
883 <<
" d=" << d <<
" b=" <<
b << std::endl;
887 if (l_out_row_major(
b, patchId,
c,
r, d) != expected) {
888 std::cout <<
"Mismatch detected at index i=" <<
i <<
" j=" <<
j <<
" r=" <<
r <<
" c=" <<
c
889 <<
" d=" << d <<
" b=" <<
b << std::endl;
900 sycl_device.deallocate(gpu_data_l_in_col_major);
901 sycl_device.deallocate(gpu_data_l_out_col_major);
906 tensorColMajorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
907 l_in_col_major.resize(tensorColMajorRange);
908 l_in_col_major.setRandom();
909 gpu_data_l_in_col_major =
static_cast<DataType*
>(sycl_device.allocate(l_in_col_major.size() *
sizeof(DataType)));
911 tensorColMajorRange);
913 patchTensorRange = {{sizeDim1, 9, 9, sizeDim2 * sizeDim3, sizeDim4}};
914 l_out_col_major.resize(patchTensorRange);
915 patchTensorBuffSize = l_out_col_major.size() *
sizeof(DataType);
916 gpu_data_l_out_col_major =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
919 sycl_device.memcpyHostToDevice(gpu_data_l_in_col_major, l_in_col_major.data(),
920 (l_in_col_major.size()) *
sizeof(DataType));
921 gpu_l_out_col_major_resize1.device(sycl_device) = gpu_l_in_col_major_resize1.extract_image_patches(9, 9);
922 sycl_device.memcpyDeviceToHost(l_out_col_major.data(), gpu_data_l_out_col_major, patchTensorBuffSize);
930 sycl_device.deallocate(gpu_data_l_out_row_major);
931 patchTensorRange = {{sizeDim4, sizeDim2 * sizeDim3, 9, 9, sizeDim1}};
932 l_out_row_major.resize(patchTensorRange);
933 patchTensorBuffSize = l_out_row_major.size() *
sizeof(DataType);
934 gpu_data_l_out_row_major =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
937 gpu_l_out_row_major_resize1.device(sycl_device) =
938 gpu_l_in_col_major_resize1.swap_layout().extract_image_patches(9, 9);
939 sycl_device.memcpyDeviceToHost(l_out_row_major.data(), gpu_data_l_out_row_major, patchTensorBuffSize);
947 for (IndexType
b = 0;
b < 32; ++
b) {
948 for (IndexType
i = 0;
i < 64; ++
i) {
949 for (IndexType
j = 0;
j < 64; ++
j) {
950 IndexType patchId =
i + 64 *
j;
951 for (IndexType
c = 0;
c < 9; ++
c) {
952 for (IndexType
r = 0;
r < 9; ++
r) {
953 for (IndexType d = 0; d < 16; ++d) {
954 DataType expected = 0.0f;
955 if (
r - 4 +
i >= 0 &&
c - 4 +
j >= 0 &&
r - 4 +
i < 64 &&
c - 4 +
j < 64) {
956 expected = l_in_col_major(d,
r - 4 +
i,
c - 4 +
j,
b);
959 if (l_out_col_major(d,
r,
c, patchId,
b) != expected) {
960 std::cout <<
"Mismatch detected at index i=" <<
i <<
" j=" <<
j <<
" r=" <<
r <<
" c=" <<
c
961 <<
" d=" << d <<
" b=" <<
b << std::endl;
965 if (l_out_row_major(
b, patchId,
c,
r, d) != expected) {
966 std::cout <<
"Mismatch detected at index i=" <<
i <<
" j=" <<
j <<
" r=" <<
r <<
" c=" <<
c
967 <<
" d=" << d <<
" b=" <<
b << std::endl;
979 sycl_device.deallocate(gpu_data_l_in_col_major);
980 sycl_device.deallocate(gpu_data_l_out_col_major);
985 tensorColMajorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
986 l_in_col_major.resize(tensorColMajorRange);
987 l_in_col_major.setRandom();
988 gpu_data_l_in_col_major =
static_cast<DataType*
>(sycl_device.allocate(l_in_col_major.size() *
sizeof(DataType)));
990 tensorColMajorRange);
992 patchTensorRange = {{sizeDim1, 7, 7, sizeDim2 * sizeDim3, sizeDim4}};
993 l_out_col_major.resize(patchTensorRange);
994 patchTensorBuffSize = l_out_col_major.size() *
sizeof(DataType);
995 gpu_data_l_out_col_major =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
998 sycl_device.memcpyHostToDevice(gpu_data_l_in_col_major, l_in_col_major.data(),
999 (l_in_col_major.size()) *
sizeof(DataType));
1000 gpu_l_out_col_major_resize2.device(sycl_device) = gpu_l_in_col_major_resize2.extract_image_patches(7, 7);
1001 sycl_device.memcpyDeviceToHost(l_out_col_major.data(), gpu_data_l_out_col_major, patchTensorBuffSize);
1010 sycl_device.deallocate(gpu_data_l_out_row_major);
1011 patchTensorRange = {{sizeDim4, sizeDim2 * sizeDim3, 7, 7, sizeDim1}};
1012 l_out_row_major.resize(patchTensorRange);
1013 patchTensorBuffSize = l_out_row_major.size() *
sizeof(DataType);
1014 gpu_data_l_out_row_major =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
1017 gpu_l_out_row_major_resize2.device(sycl_device) =
1018 gpu_l_in_col_major_resize2.swap_layout().extract_image_patches(7, 7);
1019 sycl_device.memcpyDeviceToHost(l_out_row_major.data(), gpu_data_l_out_row_major, patchTensorBuffSize);
1027 for (IndexType
b = 0;
b < 32; ++
b) {
1028 for (IndexType
i = 0;
i < 16; ++
i) {
1029 for (IndexType
j = 0;
j < 16; ++
j) {
1030 IndexType patchId =
i + 16 *
j;
1031 for (IndexType
c = 0;
c < 7; ++
c) {
1032 for (IndexType
r = 0;
r < 7; ++
r) {
1033 for (IndexType d = 0; d < 32; ++d) {
1034 DataType expected = 0.0f;
1035 if (
r - 3 +
i >= 0 &&
c - 3 +
j >= 0 &&
r - 3 +
i < 16 &&
c - 3 +
j < 16) {
1036 expected = l_in_col_major(d,
r - 3 +
i,
c - 3 +
j,
b);
1039 if (l_out_col_major(d,
r,
c, patchId,
b) != expected) {
1040 std::cout <<
"Mismatch detected at index i=" <<
i <<
" j=" <<
j <<
" r=" <<
r <<
" c=" <<
c
1041 <<
" d=" << d <<
" b=" <<
b << std::endl;
1045 if (l_out_row_major(
b, patchId,
c,
r, d) != expected) {
1046 std::cout <<
"Mismatch detected at index i=" <<
i <<
" j=" <<
j <<
" r=" <<
r <<
" c=" <<
c
1047 <<
" d=" << d <<
" b=" <<
b << std::endl;
1058 sycl_device.deallocate(gpu_data_l_in_col_major);
1059 sycl_device.deallocate(gpu_data_l_out_col_major);
1064 tensorColMajorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
1065 l_in_col_major.resize(tensorColMajorRange);
1066 l_in_col_major.setRandom();
1067 gpu_data_l_in_col_major =
static_cast<DataType*
>(sycl_device.allocate(l_in_col_major.size() *
sizeof(DataType)));
1069 tensorColMajorRange);
1071 patchTensorRange = {{sizeDim1, 3, 3, sizeDim2 * sizeDim3, sizeDim4}};
1072 l_out_col_major.resize(patchTensorRange);
1073 patchTensorBuffSize = l_out_col_major.size() *
sizeof(DataType);
1074 gpu_data_l_out_col_major =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
1077 sycl_device.memcpyHostToDevice(gpu_data_l_in_col_major, l_in_col_major.data(),
1078 (l_in_col_major.size()) *
sizeof(DataType));
1079 gpu_l_out_col_major_resize3.device(sycl_device) = gpu_l_in_col_major_resize3.extract_image_patches(3, 3);
1080 sycl_device.memcpyDeviceToHost(l_out_col_major.data(), gpu_data_l_out_col_major, patchTensorBuffSize);
1089 sycl_device.deallocate(gpu_data_l_out_row_major);
1090 patchTensorRange = {{sizeDim4, sizeDim2 * sizeDim3, 3, 3, sizeDim1}};
1091 l_out_row_major.resize(patchTensorRange);
1092 patchTensorBuffSize = l_out_row_major.size() *
sizeof(DataType);
1093 gpu_data_l_out_row_major =
static_cast<DataType*
>(sycl_device.allocate(patchTensorBuffSize));
1096 gpu_l_out_row_major_resize3.device(sycl_device) =
1097 gpu_l_in_col_major_resize3.swap_layout().extract_image_patches(3, 3);
1098 sycl_device.memcpyDeviceToHost(l_out_row_major.data(), gpu_data_l_out_row_major, patchTensorBuffSize);
1106 for (IndexType
b = 0;
b < 32; ++
b) {
1107 for (IndexType
i = 0;
i < 13; ++
i) {
1108 for (IndexType
j = 0;
j < 13; ++
j) {
1109 IndexType patchId =
i + 13 *
j;
1110 for (IndexType
c = 0;
c < 3; ++
c) {
1111 for (IndexType
r = 0;
r < 3; ++
r) {
1112 for (IndexType d = 0; d < 64; ++d) {
1113 DataType expected = 0.0f;
1114 if (
r - 1 +
i >= 0 &&
c - 1 +
j >= 0 &&
r - 1 +
i < 13 &&
c - 1 +
j < 13) {
1115 expected = l_in_col_major(d,
r - 1 +
i,
c - 1 +
j,
b);
1118 if (l_out_col_major(d,
r,
c, patchId,
b) != expected) {
1119 std::cout <<
"Mismatch detected at index i=" <<
i <<
" j=" <<
j <<
" r=" <<
r <<
" c=" <<
c
1120 <<
" d=" << d <<
" b=" <<
b << std::endl;
1124 if (l_out_row_major(
b, patchId,
c,
r, d) != expected) {
1125 std::cout <<
"Mismatch detected at index i=" <<
i <<
" j=" <<
j <<
" r=" <<
r <<
" c=" <<
c
1126 <<
" d=" << d <<
" b=" <<
b << std::endl;
1135 sycl_device.deallocate(gpu_data_l_in_col_major);
1136 sycl_device.deallocate(gpu_data_l_out_col_major);
1137 sycl_device.deallocate(gpu_data_l_out_row_major);
int i
Definition: BiCGSTAB_step_by_step.cpp:9
Scalar * b
Definition: benchVecAdd.cpp:17
A tensor expression mapping an existing array of data.
Definition: TensorMap.h:33
The tensor class.
Definition: Tensor.h:68
#define VERIFY_IS_EQUAL(a, b)
Definition: main.h:367
std::array< T, N > array
Definition: EmulateArray.h:231
int c
Definition: calibrate.py:100
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2