From db765fff8d7e1329b14479c2b259c42fc351d764 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Max=20L=C3=BCbke?= <mluebke@gfz-potsdam.de>
Date: Fri, 29 Sep 2023 17:16:41 +0200
Subject: [PATCH] Initial commit

---
 CMakeLists.txt |  8 ++++++++
 README.org     | 14 ++++++++++++++
 sycl_comp.cpp  | 38 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 60 insertions(+)
 create mode 100644 CMakeLists.txt
 create mode 100644 README.org
 create mode 100644 sycl_comp.cpp
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..4a68522
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,8 @@
+cmake_minimum_required(VERSION 3.25)
+
+project(sycl_example)
+
+find_package(AdaptiveCpp REQUIRED)
+
+add_executable(sycl_comp sycl_comp.cpp)
+add_sycl_to_target(TARGET sycl_comp)
diff --git a/README.org b/README.org
new file mode 100644
index 0000000..a123d06
--- /dev/null
+++ b/README.org
@@ -0,0 +1,14 @@
+#+title: Matrix multiplication with SYCL, yay
+
+This project serves as a sample demonstration of SYCL syntax and offers a
+straightforward program as an illustration.
+
+Its primary objective is to function as a benchmark for executing matrix
+multiplication on a single CPU core while using SYCL for both OpenMP and GPU
+parallelization. Subsequently, we will record and analyze the execution times.
+
+At this stage, the project showcases how to transfer and manipulate data on the
+GPU using the Unified Shared Memory (USM) model with explicit data movement.
+Unfortunately, I've encountered a hurdle as my current implementation with =hip=
+lacks a valid USM provider for my graphics card, the AMD Radeon RX 6700 XT,
+preventing me from achieving implicit data movement for demonstration 😔
diff --git a/sycl_comp.cpp b/sycl_comp.cpp
new file mode 100644
index 0000000..d6889cd
--- /dev/null
+++ b/sycl_comp.cpp
@@ -0,0 +1,38 @@
+#include <iostream>
+
+#include <CL/sycl.hpp>
+
+using namespace cl::sycl;
+
+auto main(int argc, char **argv) -> int {
+
+  queue q;
+
+  std::cout << "Using device: " << q.get_device().get_info<info::device::name>()
+            << "\n";
+
+  int hostArray[42];
+  auto deviceArray = static_cast<int *>(malloc_device(42 * sizeof(int), q));
+
+  for (int i = 0; i < 42; i++) {
+    hostArray[i] = i;
+  }
+
+  q.memcpy(deviceArray, hostArray, 42 * sizeof(int));
+  q.wait();
+
+  q.submit([&](handler &h) {
+    h.parallel_for(range<1>(42), [=](auto ID) { deviceArray[ID]++; });
+  });
+
+  q.wait();
+
+  q.memcpy(hostArray, deviceArray, 42 * sizeof(int));
+  q.wait();
+
+  for (int i = 0; i < 42; i++) {
+    std::cout << hostArray[i] << " ";
+  }
+
+  std::cout << "\n";
+}