395 IndexType sizeDim1 = 2;
396 IndexType sizeDim2 = 3;
397 IndexType sizeDim3 = 5;
398 IndexType sizeDim4 = 7;
399 IndexType sizeDim5 = 11;
410 const size_t tensorBuffSize = tensor.size() *
sizeof(DataType);
411 const size_t input2TensorBuffSize = input2.size() *
sizeof(DataType);
412 std::cout << tensorBuffSize <<
" , " << input2TensorBuffSize << std::endl;
413 DataType* gpu_data_tensor =
static_cast<DataType*
>(sycl_device.allocate(tensorBuffSize));
414 DataType* gpu_data_input1 =
static_cast<DataType*
>(sycl_device.allocate(tensorBuffSize));
415 DataType* gpu_data_input2 =
static_cast<DataType*
>(sycl_device.allocate(input2TensorBuffSize));
421 sycl_device.memcpyHostToDevice(gpu_data_input1, input1.data(), tensorBuffSize);
422 gpu_tensor.device(sycl_device) = gpu_input1;
423 sycl_device.memcpyHostToDevice(gpu_data_input2, input2.data(), input2TensorBuffSize);
424 gpu_tensor.template chip<0l>(1l).device(sycl_device) = gpu_input2;
425 sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
427 for (
int i = 0;
i < sizeDim1; ++
i) {
428 for (
int j = 0;
j < sizeDim2; ++
j) {
429 for (
int k = 0;
k < sizeDim3; ++
k) {
430 for (
int l = 0; l < sizeDim4; ++l) {
431 for (
int m = 0;
m < sizeDim5; ++
m) {
443 gpu_tensor.device(sycl_device) = gpu_input1;
448 const size_t input3TensorBuffSize = input3.size() *
sizeof(DataType);
449 DataType* gpu_data_input3 =
static_cast<DataType*
>(sycl_device.allocate(input3TensorBuffSize));
452 sycl_device.memcpyHostToDevice(gpu_data_input3, input3.data(), input3TensorBuffSize);
453 gpu_tensor.template chip<1l>(1l).device(sycl_device) = gpu_input3;
454 sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
456 for (
int i = 0;
i < sizeDim1; ++
i) {
457 for (
int j = 0;
j < sizeDim2; ++
j) {
458 for (
int k = 0;
k < sizeDim3; ++
k) {
459 for (
int l = 0; l < sizeDim4; ++l) {
460 for (
int m = 0;
m < sizeDim5; ++
m) {
472 gpu_tensor.device(sycl_device) = gpu_input1;
477 const size_t input4TensorBuffSize = input4.size() *
sizeof(DataType);
478 DataType* gpu_data_input4 =
static_cast<DataType*
>(sycl_device.allocate(input4TensorBuffSize));
481 sycl_device.memcpyHostToDevice(gpu_data_input4, input4.data(), input4TensorBuffSize);
482 gpu_tensor.template chip<2l>(3l).device(sycl_device) = gpu_input4;
483 sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
485 for (
int i = 0;
i < sizeDim1; ++
i) {
486 for (
int j = 0;
j < sizeDim2; ++
j) {
487 for (
int k = 0;
k < sizeDim3; ++
k) {
488 for (
int l = 0; l < sizeDim4; ++l) {
489 for (
int m = 0;
m < sizeDim5; ++
m) {
501 gpu_tensor.device(sycl_device) = gpu_input1;
506 const size_t input5TensorBuffSize = input5.size() *
sizeof(DataType);
507 DataType* gpu_data_input5 =
static_cast<DataType*
>(sycl_device.allocate(input5TensorBuffSize));
510 sycl_device.memcpyHostToDevice(gpu_data_input5, input5.data(), input5TensorBuffSize);
511 gpu_tensor.template chip<3l>(4l).device(sycl_device) = gpu_input5;
512 sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
514 for (
int i = 0;
i < sizeDim1; ++
i) {
515 for (
int j = 0;
j < sizeDim2; ++
j) {
516 for (
int k = 0;
k < sizeDim3; ++
k) {
517 for (
int l = 0; l < sizeDim4; ++l) {
518 for (
int m = 0;
m < sizeDim5; ++
m) {
529 gpu_tensor.device(sycl_device) = gpu_input1;
534 const size_t input6TensorBuffSize = input6.size() *
sizeof(DataType);
535 DataType* gpu_data_input6 =
static_cast<DataType*
>(sycl_device.allocate(input6TensorBuffSize));
538 sycl_device.memcpyHostToDevice(gpu_data_input6, input6.data(), input6TensorBuffSize);
539 gpu_tensor.template chip<4l>(5l).device(sycl_device) = gpu_input6;
540 sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
542 for (
int i = 0;
i < sizeDim1; ++
i) {
543 for (
int j = 0;
j < sizeDim2; ++
j) {
544 for (
int k = 0;
k < sizeDim3; ++
k) {
545 for (
int l = 0; l < sizeDim4; ++l) {
546 for (
int m = 0;
m < sizeDim5; ++
m) {
558 gpu_tensor.device(sycl_device) = gpu_input1;
562 DataType* gpu_data_input7 =
static_cast<DataType*
>(sycl_device.allocate(tensorBuffSize));
565 sycl_device.memcpyHostToDevice(gpu_data_input7, input7.data(), tensorBuffSize);
566 gpu_tensor.chip(0l, 0l).device(sycl_device) = gpu_input7.chip(0l, 0l);
567 sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
569 for (
int i = 0;
i < sizeDim1; ++
i) {
570 for (
int j = 0;
j < sizeDim2; ++
j) {
571 for (
int k = 0;
k < sizeDim3; ++
k) {
572 for (
int l = 0; l < sizeDim4; ++l) {
573 for (
int m = 0;
m < sizeDim5; ++
m) {
584 sycl_device.deallocate(gpu_data_tensor);
585 sycl_device.deallocate(gpu_data_input1);
586 sycl_device.deallocate(gpu_data_input2);
587 sycl_device.deallocate(gpu_data_input3);
588 sycl_device.deallocate(gpu_data_input4);
589 sycl_device.deallocate(gpu_data_input5);
590 sycl_device.deallocate(gpu_data_input6);
591 sycl_device.deallocate(gpu_data_input7);
int i
Definition: BiCGSTAB_step_by_step.cpp:9
A tensor expression mapping an existing array of data.
Definition: TensorMap.h:33
The tensor class.
Definition: Tensor.h:68
int * m
Definition: level2_cplx_impl.h:294
char char char int int * k
Definition: level2_impl.h:374
#define VERIFY_IS_EQUAL(a, b)
Definition: main.h:367
std::array< T, N > array
Definition: EmulateArray.h:231
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2