142 float* d_float1 = (
float*)sycl_device.allocate(
num_elem *
sizeof(
float));
143 float* d_float2 = (
float*)sycl_device.allocate(
num_elem *
sizeof(
float));
144 float* d_float3 = (
float*)sycl_device.allocate(
num_elem *
sizeof(
float));
164 gpu_float1.device(sycl_device) = gpu_float1.random() - gpu_float1.constant(0.5f);
165 gpu_float2.device(sycl_device) = gpu_float2.random() + gpu_float1.constant(0.5f);
166 gpu_float3.device(sycl_device) = gpu_float3.random();
167 gpu_res1_float.device(sycl_device) = gpu_float1.exp().cast<
Eigen::half>();
168 gpu_res2_float.device(sycl_device) = gpu_float2.log().cast<
Eigen::half>();
169 gpu_res3_float.device(sycl_device) = gpu_float3.log1p().cast<
Eigen::half>();
170 gpu_res4_float.device(sycl_device) = gpu_float3.expm1().cast<
Eigen::half>();
172 gpu_res1_half.device(sycl_device) = gpu_float1.cast<
Eigen::half>();
173 gpu_res1_half.device(sycl_device) = gpu_res1_half.exp();
175 gpu_res2_half.device(sycl_device) = gpu_float2.cast<
Eigen::half>();
176 gpu_res2_half.device(sycl_device) = gpu_res2_half.log();
178 gpu_res3_half.device(sycl_device) = gpu_float3.cast<
Eigen::half>();
179 gpu_res3_half.device(sycl_device) = gpu_res3_half.log1p();
181 gpu_res3_half.device(sycl_device) = gpu_float3.cast<
Eigen::half>();
182 gpu_res3_half.device(sycl_device) = gpu_res3_half.expm1();
193 sycl_device.memcpyDeviceToHost(input1.data(), d_float1,
num_elem *
sizeof(
float));
194 sycl_device.memcpyDeviceToHost(input2.data(), d_float2,
num_elem *
sizeof(
float));
195 sycl_device.memcpyDeviceToHost(input3.data(), d_float3,
num_elem *
sizeof(
float));
196 sycl_device.memcpyDeviceToHost(half_prec1.data(), d_res1_half,
num_elem *
sizeof(
Eigen::half));
197 sycl_device.memcpyDeviceToHost(full_prec1.data(), d_res1_float,
num_elem *
sizeof(
Eigen::half));
198 sycl_device.memcpyDeviceToHost(half_prec2.data(), d_res2_half,
num_elem *
sizeof(
Eigen::half));
199 sycl_device.memcpyDeviceToHost(full_prec2.data(), d_res2_float,
num_elem *
sizeof(
Eigen::half));
200 sycl_device.memcpyDeviceToHost(half_prec3.data(), d_res3_half,
num_elem *
sizeof(
Eigen::half));
201 sycl_device.memcpyDeviceToHost(full_prec3.data(), d_res3_float,
num_elem *
sizeof(
Eigen::half));
202 sycl_device.synchronize();
205 std::cout <<
"Checking elemwise exp " <<
i <<
" input = " << input1(
i) <<
" full = " << full_prec1(
i)
206 <<
" half = " << half_prec1(
i) << std::endl;
210 std::cout <<
"Checking elemwise log " <<
i <<
" input = " << input2(
i) <<
" full = " << full_prec2(
i)
211 <<
" half = " << half_prec2(
i) << std::endl;
218 std::cout <<
"Checking elemwise plog1 " <<
i <<
" input = " << input3(
i) <<
" full = " << full_prec3(
i)
219 <<
" half = " << half_prec3(
i) << std::endl;