180 IndexType sizeDim1 = 100;
181 IndexType sizeDim2 = 10;
182 IndexType sizeDim3 = 20;
192 DataType* gpu_in1_data =
static_cast<DataType*
>(sycl_device.allocate(in1.size() *
sizeof(DataType)));
193 DataType* gpu_in2_data =
static_cast<DataType*
>(sycl_device.allocate(in2.size() *
sizeof(DataType)));
194 DataType* gpu_in3_data =
static_cast<DataType*
>(sycl_device.allocate(in3.size() *
sizeof(DataType)));
195 DataType* gpu_out_data =
static_cast<DataType*
>(sycl_device.allocate(
out.size() *
sizeof(DataType)));
203 gpu_in1.device(sycl_device) = gpu_in1.constant(1.2f);
204 sycl_device.memcpyDeviceToHost(in1.data(), gpu_in1_data, (in1.size()) *
sizeof(DataType));
205 sycl_device.synchronize();
207 for (IndexType
i = 0;
i < sizeDim1; ++
i) {
208 for (IndexType
j = 0;
j < sizeDim2; ++
j) {
209 for (IndexType
k = 0;
k < sizeDim3; ++
k) {
214 printf(
"a=1.2f Test passed\n");
217 gpu_out.device(sycl_device) = gpu_in1 * 1.2f;
218 sycl_device.memcpyDeviceToHost(
out.data(), gpu_out_data, (
out.size()) *
sizeof(DataType));
219 sycl_device.synchronize();
221 for (IndexType
i = 0;
i < sizeDim1; ++
i) {
222 for (IndexType
j = 0;
j < sizeDim2; ++
j) {
223 for (IndexType
k = 0;
k < sizeDim3; ++
k) {
228 printf(
"a=b*1.2f Test Passed\n");
231 sycl_device.memcpyHostToDevice(gpu_in2_data, in2.data(), (in2.size()) *
sizeof(DataType));
232 gpu_out.device(sycl_device) = gpu_in1 * gpu_in2;
233 sycl_device.memcpyDeviceToHost(
out.data(), gpu_out_data, (
out.size()) *
sizeof(DataType));
234 sycl_device.synchronize();
236 for (IndexType
i = 0;
i < sizeDim1; ++
i) {
237 for (IndexType
j = 0;
j < sizeDim2; ++
j) {
238 for (IndexType
k = 0;
k < sizeDim3; ++
k) {
243 printf(
"c=a*b Test Passed\n");
246 gpu_out.device(sycl_device) = gpu_in1 + gpu_in2;
247 sycl_device.memcpyDeviceToHost(
out.data(), gpu_out_data, (
out.size()) *
sizeof(DataType));
248 sycl_device.synchronize();
249 for (IndexType
i = 0;
i < sizeDim1; ++
i) {
250 for (IndexType
j = 0;
j < sizeDim2; ++
j) {
251 for (IndexType
k = 0;
k < sizeDim3; ++
k) {
256 printf(
"c=a+b Test Passed\n");
259 gpu_out.device(sycl_device) = gpu_in1 * gpu_in1;
260 sycl_device.memcpyDeviceToHost(
out.data(), gpu_out_data, (
out.size()) *
sizeof(DataType));
261 sycl_device.synchronize();
262 for (IndexType
i = 0;
i < sizeDim1; ++
i) {
263 for (IndexType
j = 0;
j < sizeDim2; ++
j) {
264 for (IndexType
k = 0;
k < sizeDim3; ++
k) {
269 printf(
"c= a*a Test Passed\n");
272 gpu_out.device(sycl_device) = gpu_in1 * gpu_in1.constant(3.14f) + gpu_in2 * gpu_in2.constant(2.7f);
273 sycl_device.memcpyDeviceToHost(
out.data(), gpu_out_data, (
out.size()) *
sizeof(DataType));
274 sycl_device.synchronize();
275 for (IndexType
i = 0;
i < sizeDim1; ++
i) {
276 for (IndexType
j = 0;
j < sizeDim2; ++
j) {
277 for (IndexType
k = 0;
k < sizeDim3; ++
k) {
282 printf(
"a*3.14f + b*2.7f Test Passed\n");
285 sycl_device.memcpyHostToDevice(gpu_in3_data, in3.data(), (in3.size()) *
sizeof(DataType));
286 gpu_out.device(sycl_device) = (gpu_in1 > gpu_in1.constant(0.5f)).select(gpu_in2, gpu_in3);
287 sycl_device.memcpyDeviceToHost(
out.data(), gpu_out_data, (
out.size()) *
sizeof(DataType));
288 sycl_device.synchronize();
289 for (IndexType
i = 0;
i < sizeDim1; ++
i) {
290 for (IndexType
j = 0;
j < sizeDim2; ++
j) {
291 for (IndexType
k = 0;
k < sizeDim3; ++
k) {
296 printf(
"d= (a>0.5? b:c) Test Passed\n");
297 sycl_device.deallocate(gpu_in1_data);
298 sycl_device.deallocate(gpu_in2_data);
299 sycl_device.deallocate(gpu_in3_data);
300 sycl_device.deallocate(gpu_out_data);
char char char int int * k
Definition: level2_impl.h:374
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2