Initial commit
This commit is contained in:
commit
db765fff8d
8
CMakeLists.txt
Normal file
8
CMakeLists.txt
Normal file
@ -0,0 +1,8 @@
|
||||
cmake_minimum_required(VERSION 3.25)
|
||||
|
||||
project(sycl_example)
|
||||
|
||||
find_package(AdaptiveCpp REQUIRED)
|
||||
|
||||
add_executable(sycl_comp sycl_comp.cpp)
|
||||
add_sycl_to_target(TARGET sycl_comp)
|
||||
14
README.org
Normal file
14
README.org
Normal file
@ -0,0 +1,14 @@
|
||||
#+title: Matrix multiplication with SYCL, yay
|
||||
|
||||
This project serves as a sample demonstration of SYCL syntax and offers a
|
||||
straightforward program as an illustration.
|
||||
|
||||
Its primary objective is to function as a benchmark for executing matrix
|
||||
multiplication on a single CPU core while using SYCL for both OpenMP and GPU
|
||||
parallelization. Subsequently, we will record and analyze the execution times.
|
||||
|
||||
At this stage, the project showcases how to transfer and manipulate data on the
|
||||
GPU using the Unified Shared Memory (USM) model with explicit data movement.
|
||||
Unfortunately, I've encountered a hurdle as my current implementation with =hip=
|
||||
lacks a valid USM provider for my graphics card, the AMD Radeon RX 6700 XT,
|
||||
preventing me from achieving implicit data movement for demonstration 😔
|
||||
38
sycl_comp.cpp
Normal file
38
sycl_comp.cpp
Normal file
@ -0,0 +1,38 @@
|
||||
#include <iostream>
|
||||
|
||||
#include <CL/sycl.hpp>
|
||||
|
||||
using namespace cl::sycl;
|
||||
|
||||
auto main(int argc, char **argv) -> int {
|
||||
|
||||
queue q;
|
||||
|
||||
std::cout << "Using device: " << q.get_device().get_info<info::device::name>()
|
||||
<< "\n";
|
||||
|
||||
int hostArray[42];
|
||||
auto deviceArray = static_cast<int *>(malloc_device(42 * sizeof(int), q));
|
||||
|
||||
for (int i = 0; i < 42; i++) {
|
||||
hostArray[i] = i;
|
||||
}
|
||||
|
||||
q.memcpy(deviceArray, hostArray, 42 * sizeof(int));
|
||||
q.wait();
|
||||
|
||||
q.submit([&](handler &h) {
|
||||
h.parallel_for(range<1>(42), [=](auto ID) { deviceArray[ID]++; });
|
||||
});
|
||||
|
||||
q.wait();
|
||||
|
||||
q.memcpy(hostArray, deviceArray, 42 * sizeof(int));
|
||||
q.wait();
|
||||
|
||||
for (int i = 0; i < 42; i++) {
|
||||
std::cout << hostArray[i] << " ";
|
||||
}
|
||||
|
||||
std::cout << "\n";
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user