Eigen-unsupported  5.0.1-dev+284dcc12
 
Loading...
Searching...
No Matches
TensorDeviceSycl.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Mehdi Goli Codeplay Software Ltd.
5// Ralph Potter Codeplay Software Ltd.
6// Luke Iwanski Codeplay Software Ltd.
7// Contact: <eigen@codeplay.com>
8// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
9
10//
11// This Source Code Form is subject to the terms of the Mozilla
12// Public License v. 2.0. If a copy of the MPL was not distributed
13// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
14
15#if defined(EIGEN_USE_SYCL) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H)
16#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H
17#include <unordered_set>
18
19// IWYU pragma: private
20#include "./InternalHeaderCheck.h"
21
22namespace Eigen {
23
24namespace TensorSycl {
25namespace internal {
26
28struct SyclDeviceInfo {
29 SyclDeviceInfo(cl::sycl::queue queue)
30 : local_mem_type(queue.get_device().template get_info<cl::sycl::info::device::local_mem_type>()),
31 max_work_item_sizes(queue.get_device().template get_info<cl::sycl::info::device::max_work_item_sizes<3>>()),
32 max_mem_alloc_size(queue.get_device().template get_info<cl::sycl::info::device::max_mem_alloc_size>()),
33 max_compute_units(queue.get_device().template get_info<cl::sycl::info::device::max_compute_units>()),
34 max_work_group_size(queue.get_device().template get_info<cl::sycl::info::device::max_work_group_size>()),
35 local_mem_size(queue.get_device().template get_info<cl::sycl::info::device::local_mem_size>()),
36 platform_name(queue.get_device().get_platform().template get_info<cl::sycl::info::platform::name>()),
37 device_name(queue.get_device().template get_info<cl::sycl::info::device::name>()),
38 device_vendor(queue.get_device().template get_info<cl::sycl::info::device::vendor>()) {}
39
40 cl::sycl::info::local_mem_type local_mem_type;
41 cl::sycl::id<3> max_work_item_sizes;
42 unsigned long max_mem_alloc_size;
43 unsigned long max_compute_units;
44 unsigned long max_work_group_size;
45 size_t local_mem_size;
46 std::string platform_name;
47 std::string device_name;
48 std::string device_vendor;
49};
50
51} // end namespace internal
52} // end namespace TensorSycl
53
54// All devices (even AMD CPU with intel OpenCL runtime) that support OpenCL and
55// can consume SPIR or SPIRV can use the Eigen SYCL backend and consequently
56// TensorFlow via the Eigen SYCL Backend.
57EIGEN_STRONG_INLINE auto get_sycl_supported_devices() -> decltype(cl::sycl::device::get_devices()) {
58#ifdef EIGEN_SYCL_USE_DEFAULT_SELECTOR
59 return {cl::sycl::device(cl::sycl::default_selector())};
60#else
61 std::vector<cl::sycl::device> supported_devices;
62 auto platform_list = cl::sycl::platform::get_platforms();
63 for (const auto &platform : platform_list) {
64 auto device_list = platform.get_devices();
65 auto platform_name = platform.template get_info<cl::sycl::info::platform::name>();
66 std::transform(platform_name.begin(), platform_name.end(), platform_name.begin(), ::tolower);
67 for (const auto &device : device_list) {
68 auto vendor = device.template get_info<cl::sycl::info::device::vendor>();
69 std::transform(vendor.begin(), vendor.end(), vendor.begin(), ::tolower);
70 bool unsupported_condition = (device.is_cpu() && platform_name.find("amd") != std::string::npos &&
71 vendor.find("apu") == std::string::npos) ||
72 (platform_name.find("experimental") != std::string::npos) || device.is_host();
73 if (!unsupported_condition) {
74 supported_devices.push_back(device);
75 }
76 }
77 }
78 return supported_devices;
79#endif
80}
81
82class QueueInterface {
83 public:
85 template <typename DeviceOrSelector>
86 explicit QueueInterface(const DeviceOrSelector &dev_or_sel, cl::sycl::async_handler handler,
87 unsigned num_threads = std::thread::hardware_concurrency())
88 : m_queue{dev_or_sel, handler, {sycl::property::queue::in_order()}},
89 m_thread_pool(num_threads),
90 m_device_info(m_queue) {}
91
92 template <typename DeviceOrSelector>
93 explicit QueueInterface(const DeviceOrSelector &dev_or_sel,
94 unsigned num_threads = std::thread::hardware_concurrency())
95 : QueueInterface(
96 dev_or_sel, [this](cl::sycl::exception_list l) { this->exception_caught_ = this->sycl_async_handler(l); },
97 num_threads) {}
98
99 explicit QueueInterface(const cl::sycl::queue &q, unsigned num_threads = std::thread::hardware_concurrency())
100 : m_queue(q), m_thread_pool(num_threads), m_device_info(m_queue) {}
101
102 EIGEN_STRONG_INLINE void *allocate(size_t num_bytes) const {
103#if EIGEN_MAX_ALIGN_BYTES > 0
104 return (void *)cl::sycl::aligned_alloc_device(EIGEN_MAX_ALIGN_BYTES, num_bytes, m_queue);
105#else
106 return (void *)cl::sycl::malloc_device(num_bytes, m_queue);
107#endif
108 }
109
110 EIGEN_STRONG_INLINE void *allocate_temp(size_t num_bytes) const {
111 return (void *)cl::sycl::malloc_device<uint8_t>(num_bytes, m_queue);
112 }
113
114 template <typename data_t>
115 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE data_t *get(data_t *data) const {
116 return data;
117 }
118
119 EIGEN_STRONG_INLINE void deallocate_temp(void *p) const { deallocate(p); }
120
121 EIGEN_STRONG_INLINE void deallocate_temp(const void *p) const { deallocate_temp(const_cast<void *>(p)); }
122
123 EIGEN_STRONG_INLINE void deallocate(void *p) const { cl::sycl::free(p, m_queue); }
124
129 EIGEN_STRONG_INLINE void memcpyHostToDevice(void *dst, const void *src, size_t n,
130 std::function<void()> callback) const {
131 auto e = m_queue.memcpy(dst, src, n);
132 synchronize_and_callback(e, callback);
133 }
134
139 EIGEN_STRONG_INLINE void memcpyDeviceToHost(void *dst, const void *src, size_t n,
140 std::function<void()> callback) const {
141 if (n == 0) {
142 if (callback) callback();
143 return;
144 }
145 auto e = m_queue.memcpy(dst, src, n);
146 synchronize_and_callback(e, callback);
147 }
148
152 EIGEN_STRONG_INLINE void memcpy(void *dst, const void *src, size_t n) const {
153 if (n == 0) {
154 return;
155 }
156 m_queue.memcpy(dst, src, n).wait();
157 }
158
162 EIGEN_STRONG_INLINE void memset(void *data, int c, size_t n) const {
163 if (n == 0) {
164 return;
165 }
166 m_queue.memset(data, c, n).wait();
167 }
168
169 template <typename T>
170 EIGEN_STRONG_INLINE void fill(T *begin, T *end, const T &value) const {
171 if (begin == end) {
172 return;
173 }
174 const size_t count = end - begin;
175 m_queue.fill(begin, value, count).wait();
176 }
177
178 template <typename OutScalar, typename sycl_kernel, typename Lhs, typename Rhs, typename OutPtr, typename Range,
179 typename Index, typename... T>
180 EIGEN_ALWAYS_INLINE cl::sycl::event binary_kernel_launcher(const Lhs &lhs, const Rhs &rhs, OutPtr outptr,
181 Range thread_range, Index scratchSize, T... var) const {
182 auto kernel_functor = [=](cl::sycl::handler &cgh) {
183 typedef cl::sycl::accessor<OutScalar, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local>
184 LocalAccessor;
185
186 LocalAccessor scratch(cl::sycl::range<1>(scratchSize), cgh);
187 cgh.parallel_for(thread_range, sycl_kernel(scratch, lhs, rhs, outptr, var...));
188 };
189
190 return m_queue.submit(kernel_functor);
191 }
192
193 template <typename OutScalar, typename sycl_kernel, typename InPtr, typename OutPtr, typename Range, typename Index,
194 typename... T>
195 EIGEN_ALWAYS_INLINE cl::sycl::event unary_kernel_launcher(const InPtr &inptr, OutPtr &outptr, Range thread_range,
196 Index scratchSize, T... var) const {
197 auto kernel_functor = [=](cl::sycl::handler &cgh) {
198 typedef cl::sycl::accessor<OutScalar, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local>
199 LocalAccessor;
200
201 LocalAccessor scratch(cl::sycl::range<1>(scratchSize), cgh);
202 cgh.parallel_for(thread_range, sycl_kernel(scratch, inptr, outptr, var...));
203 };
204 return m_queue.submit(kernel_functor);
205 }
206
207 template <typename OutScalar, typename sycl_kernel, typename InPtr, typename Range, typename Index, typename... T>
208 EIGEN_ALWAYS_INLINE cl::sycl::event nullary_kernel_launcher(const InPtr &inptr, Range thread_range, Index scratchSize,
209 T... var) const {
210 auto kernel_functor = [=](cl::sycl::handler &cgh) {
211 typedef cl::sycl::accessor<OutScalar, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local>
212 LocalAccessor;
213
214 LocalAccessor scratch(cl::sycl::range<1>(scratchSize), cgh);
215 cgh.parallel_for(thread_range, sycl_kernel(scratch, inptr, var...));
216 };
217
218 return m_queue.submit(kernel_functor);
219 }
220
221 EIGEN_STRONG_INLINE void synchronize() const {
222#ifdef EIGEN_EXCEPTIONS
223 m_queue.wait_and_throw();
224#else
225 m_queue.wait();
226#endif
227 }
228
229 template <typename Index>
230 EIGEN_STRONG_INLINE void parallel_for_setup(Index n, Index &tileSize, Index &rng, Index &GRange) const {
231 tileSize = static_cast<Index>(getNearestPowerOfTwoWorkGroupSize());
232 tileSize = std::min(static_cast<Index>(EIGEN_SYCL_LOCAL_THREAD_DIM0 * EIGEN_SYCL_LOCAL_THREAD_DIM1),
233 static_cast<Index>(tileSize));
234 rng = n;
235 if (rng == 0) rng = static_cast<Index>(1);
236 GRange = rng;
237 if (tileSize > GRange)
238 tileSize = GRange;
239 else if (GRange > tileSize) {
240 Index xMode = static_cast<Index>(GRange % tileSize);
241 if (xMode != 0) GRange += static_cast<Index>(tileSize - xMode);
242 }
243 }
244
247 template <typename Index>
248 EIGEN_STRONG_INLINE void parallel_for_setup(const std::array<Index, 2> &input_dim, cl::sycl::range<2> &global_range,
249 cl::sycl::range<2> &local_range) const {
250 std::array<Index, 2> input_range = input_dim;
251 Index max_workgroup_Size = static_cast<Index>(getNearestPowerOfTwoWorkGroupSize());
252 max_workgroup_Size = std::min(static_cast<Index>(EIGEN_SYCL_LOCAL_THREAD_DIM0 * EIGEN_SYCL_LOCAL_THREAD_DIM1),
253 static_cast<Index>(max_workgroup_Size));
254 Index pow_of_2 = static_cast<Index>(std::log2(max_workgroup_Size));
255 local_range[1] = static_cast<Index>(std::pow(2, static_cast<Index>(pow_of_2 / 2)));
256 input_range[1] = input_dim[1];
257 if (input_range[1] == 0) input_range[1] = static_cast<Index>(1);
258 global_range[1] = input_range[1];
259 if (local_range[1] > global_range[1])
260 local_range[1] = global_range[1];
261 else if (global_range[1] > local_range[1]) {
262 Index xMode = static_cast<Index>(global_range[1] % local_range[1]);
263 if (xMode != 0) global_range[1] += static_cast<Index>(local_range[1] - xMode);
264 }
265 local_range[0] = static_cast<Index>(max_workgroup_Size / local_range[1]);
266 input_range[0] = input_dim[0];
267 if (input_range[0] == 0) input_range[0] = static_cast<Index>(1);
268 global_range[0] = input_range[0];
269 if (local_range[0] > global_range[0])
270 local_range[0] = global_range[0];
271 else if (global_range[0] > local_range[0]) {
272 Index xMode = static_cast<Index>(global_range[0] % local_range[0]);
273 if (xMode != 0) global_range[0] += static_cast<Index>(local_range[0] - xMode);
274 }
275 }
276
279 template <typename Index>
280 EIGEN_STRONG_INLINE void parallel_for_setup(const std::array<Index, 3> &input_dim, cl::sycl::range<3> &global_range,
281 cl::sycl::range<3> &local_range) const {
282 std::array<Index, 3> input_range = input_dim;
283 Index max_workgroup_Size = static_cast<Index>(getNearestPowerOfTwoWorkGroupSize());
284 max_workgroup_Size = std::min(static_cast<Index>(EIGEN_SYCL_LOCAL_THREAD_DIM0 * EIGEN_SYCL_LOCAL_THREAD_DIM1),
285 static_cast<Index>(max_workgroup_Size));
286 Index pow_of_2 = static_cast<Index>(std::log2(max_workgroup_Size));
287 local_range[2] = static_cast<Index>(std::pow(2, static_cast<Index>(pow_of_2 / 3)));
288 input_range[2] = input_dim[2];
289 if (input_range[2] == 0) input_range[1] = static_cast<Index>(1);
290 global_range[2] = input_range[2];
291 if (local_range[2] > global_range[2])
292 local_range[2] = global_range[2];
293 else if (global_range[2] > local_range[2]) {
294 Index xMode = static_cast<Index>(global_range[2] % local_range[2]);
295 if (xMode != 0) global_range[2] += static_cast<Index>(local_range[2] - xMode);
296 }
297 pow_of_2 = static_cast<Index>(std::log2(static_cast<Index>(max_workgroup_Size / local_range[2])));
298 local_range[1] = static_cast<Index>(std::pow(2, static_cast<Index>(pow_of_2 / 2)));
299 input_range[1] = input_dim[1];
300 if (input_range[1] == 0) input_range[1] = static_cast<Index>(1);
301 global_range[1] = input_range[1];
302 if (local_range[1] > global_range[1])
303 local_range[1] = global_range[1];
304 else if (global_range[1] > local_range[1]) {
305 Index xMode = static_cast<Index>(global_range[1] % local_range[1]);
306 if (xMode != 0) global_range[1] += static_cast<Index>(local_range[1] - xMode);
307 }
308 local_range[0] = static_cast<Index>(max_workgroup_Size / (local_range[1] * local_range[2]));
309 input_range[0] = input_dim[0];
310 if (input_range[0] == 0) input_range[0] = static_cast<Index>(1);
311 global_range[0] = input_range[0];
312 if (local_range[0] > global_range[0])
313 local_range[0] = global_range[0];
314 else if (global_range[0] > local_range[0]) {
315 Index xMode = static_cast<Index>(global_range[0] % local_range[0]);
316 if (xMode != 0) global_range[0] += static_cast<Index>(local_range[0] - xMode);
317 }
318 }
319
320 EIGEN_STRONG_INLINE bool has_local_memory() const {
321#if !defined(EIGEN_SYCL_LOCAL_MEM) && defined(EIGEN_SYCL_NO_LOCAL_MEM)
322 return false;
323#elif defined(EIGEN_SYCL_LOCAL_MEM) && !defined(EIGEN_SYCL_NO_LOCAL_MEM)
324 return true;
325#else
326 return m_device_info.local_mem_type == cl::sycl::info::local_mem_type::local;
327#endif
328 }
329
330 EIGEN_STRONG_INLINE unsigned long max_buffer_size() const { return m_device_info.max_mem_alloc_size; }
331
332 EIGEN_STRONG_INLINE unsigned long getNumSyclMultiProcessors() const { return m_device_info.max_compute_units; }
333
334 EIGEN_STRONG_INLINE unsigned long maxSyclThreadsPerBlock() const { return m_device_info.max_work_group_size; }
335
336 EIGEN_STRONG_INLINE cl::sycl::id<3> maxWorkItemSizes() const { return m_device_info.max_work_item_sizes; }
337
339 EIGEN_STRONG_INLINE int majorDeviceVersion() const { return 1; }
340
341 EIGEN_STRONG_INLINE unsigned long maxSyclThreadsPerMultiProcessor() const {
342 // OpenCL does not have such a concept
343 return 2;
344 }
345
346 EIGEN_STRONG_INLINE size_t sharedMemPerBlock() const { return m_device_info.local_mem_size; }
347
348 // This function returns the nearest power of 2 Work-group size which is <=
349 // maximum device workgroup size.
350 EIGEN_STRONG_INLINE size_t getNearestPowerOfTwoWorkGroupSize() const {
351 return getPowerOfTwo(m_device_info.max_work_group_size, false);
352 }
353
354 EIGEN_STRONG_INLINE std::string getPlatformName() const { return m_device_info.platform_name; }
355
356 EIGEN_STRONG_INLINE std::string getDeviceName() const { return m_device_info.device_name; }
357
358 EIGEN_STRONG_INLINE std::string getDeviceVendor() const { return m_device_info.device_vendor; }
359
360 // This function returns the nearest power of 2
361 // if roundup is true returns result>=wgsize
362 // else it return result <= wgsize
363 EIGEN_STRONG_INLINE size_t getPowerOfTwo(size_t wGSize, bool roundUp) const {
364 if (roundUp) --wGSize;
365 wGSize |= (wGSize >> 1);
366 wGSize |= (wGSize >> 2);
367 wGSize |= (wGSize >> 4);
368 wGSize |= (wGSize >> 8);
369 wGSize |= (wGSize >> 16);
370#if EIGEN_ARCH_x86_64 || EIGEN_ARCH_ARM64 || EIGEN_OS_WIN64
371 wGSize |= (wGSize >> 32);
372#endif
373 return ((!roundUp) ? (wGSize - (wGSize >> 1)) : ++wGSize);
374 }
375
376 EIGEN_STRONG_INLINE cl::sycl::queue &sycl_queue() const { return m_queue; }
377
378 // This function checks if the runtime recorded an error for the
379 // underlying stream device.
380 EIGEN_STRONG_INLINE bool ok() const {
381 if (!exception_caught_) {
382 synchronize();
383 }
384 return !exception_caught_;
385 }
386
387 protected:
388 void synchronize_and_callback(cl::sycl::event e, const std::function<void()> &callback) const {
389 if (callback) {
390 auto callback_ = [=]() {
391#ifdef EIGEN_EXCEPTIONS
392 cl::sycl::event(e).wait_and_throw();
393#else
394 cl::sycl::event(e).wait();
395#endif
396 callback();
397 };
398 m_thread_pool.Schedule(std::move(callback_));
399 } else {
400#ifdef EIGEN_EXCEPTIONS
401 m_queue.wait_and_throw();
402#else
403 m_queue.wait();
404#endif
405 }
406 }
407
408 bool sycl_async_handler(cl::sycl::exception_list exceptions) const {
409 bool exception_caught = false;
410 for (const auto &e : exceptions) {
411 if (e) {
412 exception_caught = true;
413 EIGEN_THROW_X(e);
414 }
415 }
416 return exception_caught;
417 }
418
420 bool exception_caught_ = false;
422 mutable cl::sycl::queue m_queue;
425 mutable Eigen::ThreadPool m_thread_pool;
426
427 const TensorSycl::internal::SyclDeviceInfo m_device_info;
428};
429
430struct SyclDeviceBase {
433 const QueueInterface *m_queue_stream;
434 explicit SyclDeviceBase(const QueueInterface *queue_stream) : m_queue_stream(queue_stream) {}
435 EIGEN_STRONG_INLINE const QueueInterface *queue_stream() const { return m_queue_stream; }
436};
437
438// Here is a sycl device struct which accept the sycl queue interface
439// as an input
440struct SyclDevice : public SyclDeviceBase {
441 explicit SyclDevice(const QueueInterface *queue_stream) : SyclDeviceBase(queue_stream) {}
442
445 template <typename Index>
446 EIGEN_STRONG_INLINE void parallel_for_setup(Index n, Index &tileSize, Index &rng, Index &GRange) const {
447 queue_stream()->parallel_for_setup(n, tileSize, rng, GRange);
448 }
449
452 template <typename Index>
453 EIGEN_STRONG_INLINE void parallel_for_setup(const std::array<Index, 2> &input_dim, cl::sycl::range<2> &global_range,
454 cl::sycl::range<2> &local_range) const {
455 queue_stream()->parallel_for_setup(input_dim, global_range, local_range);
456 }
457
460 template <typename Index>
461 EIGEN_STRONG_INLINE void parallel_for_setup(const std::array<Index, 3> &input_dim, cl::sycl::range<3> &global_range,
462 cl::sycl::range<3> &local_range) const {
463 queue_stream()->parallel_for_setup(input_dim, global_range, local_range);
464 }
465
467 EIGEN_STRONG_INLINE void *allocate(size_t num_bytes) const { return queue_stream()->allocate(num_bytes); }
468
469 EIGEN_STRONG_INLINE void *allocate_temp(size_t num_bytes) const { return queue_stream()->allocate_temp(num_bytes); }
470
472 EIGEN_STRONG_INLINE void deallocate(void *p) const { queue_stream()->deallocate(p); }
473
474 EIGEN_STRONG_INLINE void deallocate_temp(void *buffer) const { queue_stream()->deallocate_temp(buffer); }
475
476 EIGEN_STRONG_INLINE void deallocate_temp(const void *buffer) const { queue_stream()->deallocate_temp(buffer); }
477
478 template <typename data_t>
479 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE data_t *get(data_t *data) const {
480 return data;
481 }
482
483 // some runtime conditions that can be applied here
484 EIGEN_STRONG_INLINE bool isDeviceSuitable() const { return true; }
485
487 template <typename Index>
488 EIGEN_STRONG_INLINE void memcpyHostToDevice(Index *dst, const Index *src, size_t n,
489 std::function<void()> callback = {}) const {
490 queue_stream()->memcpyHostToDevice(dst, src, n, callback);
491 }
493 template <typename Index>
494 EIGEN_STRONG_INLINE void memcpyDeviceToHost(void *dst, const Index *src, size_t n,
495 std::function<void()> callback = {}) const {
496 queue_stream()->memcpyDeviceToHost(dst, src, n, callback);
497 }
499 template <typename Index>
500 EIGEN_STRONG_INLINE void memcpy(void *dst, const Index *src, size_t n) const {
501 queue_stream()->memcpy(dst, src, n);
502 }
504 EIGEN_STRONG_INLINE void memset(void *data, int c, size_t n) const { queue_stream()->memset(data, c, n); }
506 template <typename T>
507 EIGEN_STRONG_INLINE void fill(T *begin, T *end, const T &value) const {
508 queue_stream()->fill(begin, end, value);
509 }
511 EIGEN_STRONG_INLINE cl::sycl::queue &sycl_queue() const { return queue_stream()->sycl_queue(); }
512
513 EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const { return 48 * 1024; }
514
515 EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const {
516 // We won't try to take advantage of the l2 cache for the time being, and
517 // there is no l3 cache on sycl devices.
518 return firstLevelCacheSize();
519 }
520 EIGEN_STRONG_INLINE unsigned long getNumSyclMultiProcessors() const {
521 return queue_stream()->getNumSyclMultiProcessors();
522 }
523 EIGEN_STRONG_INLINE unsigned long maxSyclThreadsPerBlock() const { return queue_stream()->maxSyclThreadsPerBlock(); }
524 EIGEN_STRONG_INLINE cl::sycl::id<3> maxWorkItemSizes() const { return queue_stream()->maxWorkItemSizes(); }
525 EIGEN_STRONG_INLINE unsigned long maxSyclThreadsPerMultiProcessor() const {
526 // OpenCL does not have such a concept
527 return queue_stream()->maxSyclThreadsPerMultiProcessor();
528 }
529 EIGEN_STRONG_INLINE size_t sharedMemPerBlock() const { return queue_stream()->sharedMemPerBlock(); }
530 EIGEN_STRONG_INLINE size_t getNearestPowerOfTwoWorkGroupSize() const {
531 return queue_stream()->getNearestPowerOfTwoWorkGroupSize();
532 }
533
534 EIGEN_STRONG_INLINE size_t getPowerOfTwo(size_t val, bool roundUp) const {
535 return queue_stream()->getPowerOfTwo(val, roundUp);
536 }
538 EIGEN_STRONG_INLINE int majorDeviceVersion() const { return queue_stream()->majorDeviceVersion(); }
539
540 EIGEN_STRONG_INLINE void synchronize() const { queue_stream()->synchronize(); }
541
542 // This function checks if the runtime recorded an error for the
543 // underlying stream device.
544 EIGEN_STRONG_INLINE bool ok() const { return queue_stream()->ok(); }
545
546 EIGEN_STRONG_INLINE bool has_local_memory() const { return queue_stream()->has_local_memory(); }
547 EIGEN_STRONG_INLINE long max_buffer_size() const { return queue_stream()->max_buffer_size(); }
548 EIGEN_STRONG_INLINE std::string getPlatformName() const { return queue_stream()->getPlatformName(); }
549 EIGEN_STRONG_INLINE std::string getDeviceName() const { return queue_stream()->getDeviceName(); }
550 EIGEN_STRONG_INLINE std::string getDeviceVendor() const { return queue_stream()->getDeviceVendor(); }
551 template <typename OutScalar, typename KernelType, typename... T>
552 EIGEN_ALWAYS_INLINE cl::sycl::event binary_kernel_launcher(T... var) const {
553 return queue_stream()->template binary_kernel_launcher<OutScalar, KernelType>(var...);
554 }
555 template <typename OutScalar, typename KernelType, typename... T>
556 EIGEN_ALWAYS_INLINE cl::sycl::event unary_kernel_launcher(T... var) const {
557 return queue_stream()->template unary_kernel_launcher<OutScalar, KernelType>(var...);
558 }
559
560 template <typename OutScalar, typename KernelType, typename... T>
561 EIGEN_ALWAYS_INLINE cl::sycl::event nullary_kernel_launcher(T... var) const {
562 return queue_stream()->template nullary_kernel_launcher<OutScalar, KernelType>(var...);
563 }
564};
565} // end namespace Eigen
566
567#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H
Namespace containing all symbols from the Eigen library.
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index