|
template<int DataLayout, typename DataType , typename IndexType , typename Device > |
static void | test_sycl_contraction (const Device &sycl_device, IndexType m_size, IndexType k_size, IndexType n_size) |
|
template<int DataLayout, typename DataType , typename IndexType , typename Device > |
void | test_sycl_contraction_m (const Device &sycl_device) |
|
template<int DataLayout, typename DataType , typename IndexType , typename Device > |
void | test_sycl_contraction_k (const Device &sycl_device) |
|
template<int DataLayout, typename DataType , typename IndexType , typename Device > |
void | test_sycl_contraction_n (const Device &sycl_device) |
|
template<int DataLayout, typename DataType , typename IndexType , typename Device > |
void | test_sycl_contraction_sizes (const Device &sycl_device) |
|
template<int DataLayout, typename DataType , typename IndexType , typename Device > |
static void | test_no_out_of_bounds (const Device &sycl_device, IndexType m_size, IndexType k_size, IndexType n_size) |
|
template<int DataLayout, typename DataType , typename IndexType , typename Device > |
void | test_scalar (const Device &sycl_device, IndexType m_size, IndexType k_size, IndexType n_size) |
|
template<int DataLayout, typename DataType , typename IndexType , typename Device > |
void | contraction_batch (const Device &sycl_device, IndexType m_size, IndexType k_size, IndexType n_size, IndexType m_batch, IndexType start, IndexType limit) |
|
template<int DataLayout, typename DataType , typename IndexType , typename Device > |
void | contraction_rhs_transposed (const Device &sycl_device, IndexType m_size, IndexType k_size, IndexType n_size) |
|
template<int DataLayout, typename DataType , typename IndexType , typename Device > |
void | contraction_lhs_transposed (const Device &sycl_device, IndexType m_size, IndexType k_size, IndexType n_size) |
|
template<int DataLayout, typename DataType , typename IndexType , typename Device > |
void | contraction_both_transposed (const Device &sycl_device, IndexType m_size, IndexType k_size, IndexType n_size) |
|
template<typename Dev > |
void | tensorOutofBound (const Dev &sycl_device) |
|
template<typename Dev > |
void | tensorTensor (const Dev &sycl_device) |
|
template<typename Dev > |
void | tensorTensor_m (const Dev &sycl_device) |
|
template<typename Dev > |
void | tensorTensor_n (const Dev &sycl_device) |
|
template<typename Dev > |
void | tensorTensor_k (const Dev &sycl_device) |
|
template<typename Dev > |
void | tensorTensor_sizes (const Dev &sycl_device) |
|
template<typename Dev > |
void | vectorVector (const Dev &sycl_device) |
|
template<typename Dev > |
void | vectorTensor (const Dev &sycl_device) |
|
template<typename Dev > |
void | tensorVector (const Dev &sycl_device) |
|
template<typename Dev > |
void | tensorScalar (const Dev &sycl_device) |
|
template<typename Dev > |
void | skinnyTensor_row (const Dev &sycl_device) |
|
template<typename Dev > |
void | skinnyTensor_col (const Dev &sycl_device) |
|
template<typename Dev > |
void | tensor_contraction_batch_per_device (const Dev &sycl_device) |
|
template<typename Dev > |
void | tensor_contraction_lhs_transposed_per_device (const Dev &sycl_device) |
|
template<typename Dev > |
void | tensor_contraction_rhs_transposed_per_device (const Dev &sycl_device) |
|
template<typename Dev > |
void | tensor_contraction_both_transposed_per_device (const Dev &sycl_device) |
|
| EIGEN_DECLARE_TEST (cxx11_tensor_contract_sycl) |
|
template<int DataLayout, typename DataType , typename IndexType , typename Device >
void contraction_rhs_transposed |
( |
const Device & |
sycl_device, |
|
|
IndexType |
m_size, |
|
|
IndexType |
k_size, |
|
|
IndexType |
n_size |
|
) |
| |
351 std::size_t t_left_bytes = t_left.size() *
sizeof(DataType);
352 std::size_t t_right_bytes = t_right.size() *
sizeof(DataType);
353 std::size_t t_result_bytes = t_result.size() *
sizeof(DataType);
355 DataType *d_t_left =
static_cast<DataType *
>(sycl_device.allocate(t_left_bytes));
356 DataType *d_t_right =
static_cast<DataType *
>(sycl_device.allocate(t_right_bytes));
357 DataType *d_t_result =
static_cast<DataType *
>(sycl_device.allocate(t_result_bytes));
363 sycl_device.memcpyHostToDevice(d_t_left, t_left.data(), t_left_bytes);
364 sycl_device.memcpyHostToDevice(d_t_right, t_right.data(), t_right_bytes);
366 gpu_t_result.device(sycl_device) = gpu_t_left.contract(gpu_t_right, dims);
367 sycl_device.memcpyDeviceToHost(t_result_gpu.data(), d_t_result, t_result_bytes);
369 t_result = t_left.contract(t_right, dims);
371 for (IndexType
j = 0;
j < m_size;
j++) {
372 for (IndexType
i = 0;
i < n_size;
i++) {
373 if (
static_cast<DataType
>(
std::fabs(
static_cast<DataType
>(t_result(
j,
i) - t_result_gpu(
j,
i)))) <
380 std::cout <<
"M : " << m_size <<
", N : " << n_size <<
", K : " << k_size
381 <<
", mismatch detected at IndexType m: " <<
j <<
" n: " <<
i <<
" CPU : " << t_result(
j,
i)
382 <<
" vs SYCL:" << t_result_gpu(
j,
i) << std::endl;
386 sycl_device.deallocate(d_t_left);
387 sycl_device.deallocate(d_t_right);
388 sycl_device.deallocate(d_t_result);
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2
References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), e(), error_threshold, boost::multiprecision::fabs(), i, Eigen::internal::isApprox(), j, Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::size(), and VERIFY_IS_APPROX.
EIGEN_DECLARE_TEST |
( |
cxx11_tensor_contract_sycl |
| ) |
|
825 for (
const auto &device : Eigen::get_sycl_supported_devices()) {
826 std::cout <<
"Running on " << device.template get_info<cl::sycl::info::device::name>() << std::endl;
827 QueueInterface queueInterface(device);
828 auto sycl_device = Eigen::SyclDevice(&queueInterface);
void tensorTensor_n(const Dev &sycl_device)
Definition: cxx11_tensor_contract_sycl.cpp:556
void tensor_contraction_batch_per_device(const Dev &sycl_device)
Definition: cxx11_tensor_contract_sycl.cpp:749
void vectorVector(const Dev &sycl_device)
Definition: cxx11_tensor_contract_sycl.cpp:605
void tensorTensor_k(const Dev &sycl_device)
Definition: cxx11_tensor_contract_sycl.cpp:573
void skinnyTensor_col(const Dev &sycl_device)
Definition: cxx11_tensor_contract_sycl.cpp:730
void tensorTensor_sizes(const Dev &sycl_device)
Definition: cxx11_tensor_contract_sycl.cpp:589
void vectorTensor(const Dev &sycl_device)
Definition: cxx11_tensor_contract_sycl.cpp:626
void tensorVector(const Dev &sycl_device)
Definition: cxx11_tensor_contract_sycl.cpp:655
void skinnyTensor_row(const Dev &sycl_device)
Definition: cxx11_tensor_contract_sycl.cpp:711
void tensorTensor_m(const Dev &sycl_device)
Definition: cxx11_tensor_contract_sycl.cpp:539
void tensor_contraction_lhs_transposed_per_device(const Dev &sycl_device)
Definition: cxx11_tensor_contract_sycl.cpp:765
void tensorOutofBound(const Dev &sycl_device)
Definition: cxx11_tensor_contract_sycl.cpp:499
void tensor_contraction_rhs_transposed_per_device(const Dev &sycl_device)
Definition: cxx11_tensor_contract_sycl.cpp:786
void tensorTensor(const Dev &sycl_device)
Definition: cxx11_tensor_contract_sycl.cpp:523
void tensorScalar(const Dev &sycl_device)
Definition: cxx11_tensor_contract_sycl.cpp:690
void tensor_contraction_both_transposed_per_device(const Dev &sycl_device)
Definition: cxx11_tensor_contract_sycl.cpp:808
#define CALL_SUBTEST_6(FUNC)
Definition: split_test_helper.h:34
#define CALL_SUBTEST_3(FUNC)
Definition: split_test_helper.h:16
#define CALL_SUBTEST_1(FUNC)
Definition: split_test_helper.h:4
#define CALL_SUBTEST_8(FUNC)
Definition: split_test_helper.h:46
#define CALL_SUBTEST_5(FUNC)
Definition: split_test_helper.h:28
#define CALL_SUBTEST_11(FUNC)
Definition: split_test_helper.h:64
#define CALL_SUBTEST_2(FUNC)
Definition: split_test_helper.h:10
#define CALL_SUBTEST_7(FUNC)
Definition: split_test_helper.h:40
#define CALL_SUBTEST_4(FUNC)
Definition: split_test_helper.h:22
#define CALL_SUBTEST_9(FUNC)
Definition: split_test_helper.h:52
#define CALL_SUBTEST_10(FUNC)
Definition: split_test_helper.h:58
References CALL_SUBTEST_1, CALL_SUBTEST_10, CALL_SUBTEST_11, CALL_SUBTEST_2, CALL_SUBTEST_3, CALL_SUBTEST_4, CALL_SUBTEST_5, CALL_SUBTEST_6, CALL_SUBTEST_7, CALL_SUBTEST_8, CALL_SUBTEST_9, skinnyTensor_col(), skinnyTensor_row(), tensor_contraction_batch_per_device(), tensor_contraction_both_transposed_per_device(), tensor_contraction_lhs_transposed_per_device(), tensor_contraction_rhs_transposed_per_device(), tensorOutofBound(), tensorScalar(), tensorTensor(), tensorTensor_k(), tensorTensor_m(), tensorTensor_n(), tensorTensor_sizes(), tensorVector(), vectorTensor(), and vectorVector().
template<int DataLayout, typename DataType , typename IndexType , typename Device >
static void test_no_out_of_bounds |
( |
const Device & |
sycl_device, |
|
|
IndexType |
m_size, |
|
|
IndexType |
k_size, |
|
|
IndexType |
n_size |
|
) |
| |
|
static |
145 auto padded_left_size = 2 * t_left.size();
146 auto padded_right_size = 2 * t_right.size();
147 auto padded_result_size = 2 * t_result.size();
149 std::size_t t_left_bytes = padded_left_size *
sizeof(DataType);
150 std::size_t t_right_bytes = padded_right_size *
sizeof(DataType);
151 std::size_t t_result_bytes = padded_result_size *
sizeof(DataType);
153 DataType *d_t_left =
static_cast<DataType *
>(sycl_device.allocate(t_left_bytes));
154 DataType *d_t_right =
static_cast<DataType *
>(sycl_device.allocate(t_right_bytes));
155 DataType *d_t_result =
static_cast<DataType *
>(sycl_device.allocate(t_result_bytes));
164 DataType nan = std::numeric_limits<DataType>::quiet_NaN();
165 auto host_left_data =
new DataType[padded_left_size];
166 std::copy_n(t_left.data(), t_left.size(), host_left_data);
167 std::fill_n(host_left_data + t_left.size(), t_left.size(), nan);
168 auto host_right_data =
new DataType[padded_right_size];
169 std::copy_n(t_right.data(), t_right.size(), host_right_data);
170 std::fill_n(host_right_data + t_right.size(), t_right.size(), nan);
171 auto host_result_data =
new DataType[padded_result_size];
172 std::fill_n(host_result_data, padded_result_size, nan);
174 sycl_device.memcpyHostToDevice(d_t_left, host_left_data, t_left_bytes);
175 sycl_device.memcpyHostToDevice(d_t_right, host_right_data, t_right_bytes);
176 sycl_device.memcpyHostToDevice(d_t_result, host_result_data, t_result_bytes);
178 gpu_t_result.device(sycl_device) = gpu_t_left.contract(gpu_t_right, dims);
179 sycl_device.memcpyDeviceToHost(host_result_data, d_t_result, t_result_bytes);
181 t_result = t_left.contract(t_right, dims);
183 for (IndexType
i = 0;
i < t_result.size();
i++) {
191 std::cout <<
"M : " << m_size <<
", N : " << n_size <<
", K : " << k_size
192 <<
", invalid read detected at IndexType " <<
i <<
": " << t_result(
i) <<
" vs " << host_result_data[
i]
195 std::cout <<
"M : " << m_size <<
", N : " << n_size <<
", K : " << k_size <<
", mismatch detected at IndexType "
196 <<
i <<
": " << t_result(
i) <<
" vs " << host_result_data[
i] << std::endl;
201 for (IndexType
i = t_result.size();
i < padded_result_size;
i++) {
205 std::cout <<
"M : " << m_size <<
", N : " << n_size <<
", K : " << k_size
206 <<
", invalid write detected at IndexType " <<
i <<
": " << host_result_data[
i] << std::endl;
209 sycl_device.deallocate(d_t_left);
210 sycl_device.deallocate(d_t_right);
211 sycl_device.deallocate(d_t_result);
213 delete[] host_left_data;
214 delete[] host_right_data;
215 delete[] host_result_data;
#define isnan(X)
Definition: main.h:109
References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), e(), error_threshold, boost::multiprecision::fabs(), i, Eigen::internal::isApprox(), isnan, Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::size(), and VERIFY_IS_APPROX.