From d8e2a34520df11473e4f63bbd29229cced0e1a00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20=C5=9Alusarczyk?= Date: Wed, 10 Dec 2025 10:38:15 +0100 Subject: [PATCH] [SYCL] queue use perfect forwarding --- sycl/include/sycl/queue.hpp | 311 +++++++++++++++++++++--------------- sycl/source/queue.cpp | 6 +- 2 files changed, 189 insertions(+), 128 deletions(-) diff --git a/sycl/include/sycl/queue.hpp b/sycl/include/sycl/queue.hpp index 9aa5fe0197604..59ccd513132a4 100644 --- a/sycl/include/sycl/queue.hpp +++ b/sycl/include/sycl/queue.hpp @@ -804,7 +804,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { const T *Src, T *Dest, size_t Count, event DepEvent, const detail::code_location &CodeLoc = detail::code_location::current()) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return this->memcpy(Dest, Src, Count * sizeof(T), DepEvent); + return this->memcpy(Dest, Src, Count * sizeof(T), std::move(DepEvent)); } /// Copies data from one memory region to another, each is either a host @@ -1342,7 +1342,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { const detail::code_location &CodeLoc = detail::code_location::current()) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); return this->memcpy(Dest, Src, NumBytes, Offset, - std::vector{DepEvent}); + std::vector{std::move(DepEvent)}); } /// Copies data from a device_global to USM memory. @@ -1412,7 +1412,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); return this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t), StartIndex * sizeof(std::remove_all_extents_t), - DepEvent); + std::move(DepEvent)); } /// Copies elements of type `std::remove_all_extents_t` from a USM memory @@ -1485,7 +1485,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); return this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t), StartIndex * sizeof(std::remove_all_extents_t), - DepEvent); + std::move(DepEvent)); } /// Copies elements of type `std::remove_all_extents_t` from a @@ -2737,11 +2737,12 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { "of type launch_config or a kernel functor with a " "get(sycl::ext::oneapi::experimental::properties_tag) " "member function instead.") - std::enable_if_t< - ext::oneapi::experimental::is_property_list::value, - event> single_task(PropertiesT Properties, const KernelType &KernelFunc, - const detail::code_location &CodeLoc = - detail::code_location::current()) { + std::enable_if_t>::value, + event> single_task(PropertiesT &&Properties, + const KernelType &KernelFunc, + const detail::code_location &CodeLoc = + detail::code_location::current()) { static_assert( (detail::check_fn_signature, void()>::value || @@ -2757,7 +2758,8 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { if constexpr (!(detail::KernelLambdaHasKernelHandlerArgT::value)) { return detail::submit_kernel_direct_single_task( - *this, KernelFunc, {}, Properties, TlsCodeLocCapture.query()); + *this, KernelFunc, {}, std::forward(Properties), + TlsCodeLocCapture.query()); } else { return submit( [&](handler &CGH) { @@ -2792,12 +2794,12 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { "of type launch_config or a kernel functor with a " "get(sycl::ext::oneapi::experimental::properties_tag) " "member function instead.") - std::enable_if_t< - ext::oneapi::experimental::is_property_list::value, - event> single_task(event DepEvent, PropertiesT Properties, - const KernelType &KernelFunc, - const detail::code_location &CodeLoc = - detail::code_location::current()) { + std::enable_if_t>::value, + event> single_task(event DepEvent, PropertiesT &&Properties, + const KernelType &KernelFunc, + const detail::code_location &CodeLoc = + detail::code_location::current()) { static_assert( (detail::check_fn_signature, void()>::value || @@ -2813,14 +2815,14 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { if constexpr (!(detail::KernelLambdaHasKernelHandlerArgT::value)) { return detail::submit_kernel_direct_single_task( - *this, KernelFunc, sycl::span(&DepEvent, 1), Properties, - TlsCodeLocCapture.query()); + *this, KernelFunc, sycl::span(&DepEvent, 1), + std::forward(Properties), TlsCodeLocCapture.query()); } else { return submit( [&](handler &CGH) { CGH.depends_on(DepEvent); CGH.template single_task( - Properties, KernelFunc); + std::forward(Properties), KernelFunc); }, TlsCodeLocCapture.query()); } @@ -2853,12 +2855,13 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { "of type launch_config or a kernel functor with a " "get(sycl::ext::oneapi::experimental::properties_tag) " "member function instead.") - std::enable_if_t< - ext::oneapi::experimental::is_property_list::value, - event> single_task(const std::vector &DepEvents, - PropertiesT Properties, const KernelType &KernelFunc, - const detail::code_location &CodeLoc = - detail::code_location::current()) { + std::enable_if_t>::value, + event> single_task(const std::vector &DepEvents, + PropertiesT &&Properties, + const KernelType &KernelFunc, + const detail::code_location &CodeLoc = + detail::code_location::current()) { static_assert( (detail::check_fn_signature, void()>::value || @@ -2874,13 +2877,14 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { if constexpr (!(detail::KernelLambdaHasKernelHandlerArgT::value)) { return detail::submit_kernel_direct_single_task( - *this, KernelFunc, DepEvents, Properties, TlsCodeLocCapture.query()); + *this, KernelFunc, DepEvents, std::forward(Properties), + TlsCodeLocCapture.query()); } else { return submit( [&](handler &CGH) { CGH.depends_on(DepEvents); CGH.template single_task( - Properties, KernelFunc); + std::forward(Properties), KernelFunc); }, TlsCodeLocCapture.query()); } @@ -2914,11 +2918,13 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { "of type launch_config or a kernel functor with a " "get(sycl::ext::oneapi::experimental::properties_tag) " "member function instead.") - std::enable_if_t< - ext::oneapi::experimental::is_property_list::value, - event> parallel_for(range<1> Range, PropertiesT Properties, - RestT &&...Rest) { - return parallel_for_impl(Range, Properties, Rest...); + std::enable_if_t>::value, + event> parallel_for(range<1> Range, PropertiesT &&Properties, + RestT &&...Rest) { + return parallel_for_impl(Range, + std::forward(Properties), + std::forward(Rest)...); } /// parallel_for version with a kernel represented as a lambda + range that @@ -2929,7 +2935,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { /// const KernelType &KernelFunc". template event parallel_for(range<1> Range, RestT &&...Rest) { - return parallel_for_impl(Range, Rest...); + return parallel_for_impl(Range, std::forward(Rest)...); } /// parallel_for version with a kernel represented as a lambda + range that @@ -2945,11 +2951,13 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { "of type launch_config or a kernel functor with a " "get(sycl::ext::oneapi::experimental::properties_tag) " "member function instead.") - std::enable_if_t< - ext::oneapi::experimental::is_property_list::value, - event> parallel_for(range<2> Range, PropertiesT Properties, - RestT &&...Rest) { - return parallel_for_impl(Range, Properties, Rest...); + std::enable_if_t>::value, + event> parallel_for(range<2> Range, PropertiesT &&Properties, + RestT &&...Rest) { + return parallel_for_impl(Range, + std::forward(Properties), + std::forward(Rest)...); } /// parallel_for version with a kernel represented as a lambda + range that @@ -2960,7 +2968,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { /// const KernelType &KernelFunc". template event parallel_for(range<2> Range, RestT &&...Rest) { - return parallel_for_impl(Range, Rest...); + return parallel_for_impl(Range, std::forward(Rest)...); } /// parallel_for version with a kernel represented as a lambda + range that @@ -2976,11 +2984,13 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { "of type launch_config or a kernel functor with a " "get(sycl::ext::oneapi::experimental::properties_tag) " "member function instead.") - std::enable_if_t< - ext::oneapi::experimental::is_property_list::value, - event> parallel_for(range<3> Range, PropertiesT Properties, - RestT &&...Rest) { - return parallel_for_impl(Range, Properties, Rest...); + std::enable_if_t>::value, + event> parallel_for(range<3> Range, PropertiesT &&Properties, + RestT &&...Rest) { + return parallel_for_impl(Range, + std::forward(Properties), + std::forward(Rest)...); } /// parallel_for version with a kernel represented as a lambda + range that @@ -2991,7 +3001,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { /// const KernelType &KernelFunc". template event parallel_for(range<3> Range, RestT &&...Rest) { - return parallel_for_impl(Range, Rest...); + return parallel_for_impl(Range, std::forward(Rest)...); } /// parallel_for version with a kernel represented as a lambda + range that @@ -3008,11 +3018,14 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { "of type launch_config or a kernel functor with a " "get(sycl::ext::oneapi::experimental::properties_tag) " "member function instead.") - std::enable_if_t< - ext::oneapi::experimental::is_property_list::value, - event> parallel_for(range<1> Range, event DepEvent, - PropertiesT Properties, RestT &&...Rest) { - return parallel_for_impl(Range, DepEvent, Properties, Rest...); + std::enable_if_t>::value, + event> parallel_for(range<1> Range, event DepEvent, + PropertiesT &&Properties, + RestT &&...Rest) { + return parallel_for_impl(Range, std::move(DepEvent), + std::forward(Properties), + std::forward(Rest)...); } /// parallel_for version with a kernel represented as a lambda + range that @@ -3024,7 +3037,8 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { /// const KernelType &KernelFunc". template event parallel_for(range<1> Range, event DepEvent, RestT &&...Rest) { - return parallel_for_impl(Range, DepEvent, Rest...); + return parallel_for_impl(Range, std::move(DepEvent), + std::forward(Rest)...); } /// parallel_for version with a kernel represented as a lambda + range that @@ -3041,11 +3055,14 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { "of type launch_config or a kernel functor with a " "get(sycl::ext::oneapi::experimental::properties_tag) " "member function instead.") - std::enable_if_t< - ext::oneapi::experimental::is_property_list::value, - event> parallel_for(range<2> Range, event DepEvent, - PropertiesT Properties, RestT &&...Rest) { - return parallel_for_impl(Range, DepEvent, Properties, Rest...); + std::enable_if_t>::value, + event> parallel_for(range<2> Range, event DepEvent, + PropertiesT &&Properties, + RestT &&...Rest) { + return parallel_for_impl(Range, std::move(DepEvent), + std::forward(Properties), + std::forward(Rest)...); } /// parallel_for version with a kernel represented as a lambda + range that @@ -3057,7 +3074,8 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { /// const KernelType &KernelFunc". template event parallel_for(range<2> Range, event DepEvent, RestT &&...Rest) { - return parallel_for_impl(Range, DepEvent, Rest...); + return parallel_for_impl(Range, std::move(DepEvent), + std::forward(Rest)...); } /// parallel_for version with a kernel represented as a lambda + range that @@ -3074,11 +3092,14 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { "of type launch_config or a kernel functor with a " "get(sycl::ext::oneapi::experimental::properties_tag) " "member function instead.") - std::enable_if_t< - ext::oneapi::experimental::is_property_list::value, - event> parallel_for(range<3> Range, event DepEvent, - PropertiesT Properties, RestT &&...Rest) { - return parallel_for_impl(Range, DepEvent, Properties, Rest...); + std::enable_if_t>::value, + event> parallel_for(range<3> Range, event DepEvent, + PropertiesT &&Properties, + RestT &&...Rest) { + return parallel_for_impl(Range, std::move(DepEvent), + std::forward(Properties), + std::forward(Rest)...); } /// parallel_for version with a kernel represented as a lambda + range that @@ -3090,7 +3111,8 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { /// const KernelType &KernelFunc". template event parallel_for(range<3> Range, event DepEvent, RestT &&...Rest) { - return parallel_for_impl(Range, DepEvent, Rest...); + return parallel_for_impl(Range, std::move(DepEvent), + std::forward(Rest)...); } /// parallel_for version with a kernel represented as a lambda + range that @@ -3108,11 +3130,15 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { "of type launch_config or a kernel functor with a " "get(sycl::ext::oneapi::experimental::properties_tag) " "member function instead.") - std::enable_if_t< - ext::oneapi::experimental::is_property_list::value, - event> parallel_for(range<1> Range, const std::vector &DepEvents, - PropertiesT Properties, RestT &&...Rest) { - return parallel_for_impl(Range, DepEvents, Properties, Rest...); + std::enable_if_t>::value, + event> parallel_for(range<1> Range, + const std::vector &DepEvents, + PropertiesT &&Properties, + RestT &&...Rest) { + return parallel_for_impl(Range, DepEvents, + std::forward(Properties), + std::forward(Rest)...); } /// parallel_for version with a kernel represented as a lambda + range that @@ -3126,7 +3152,8 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { template event parallel_for(range<1> Range, const std::vector &DepEvents, RestT &&...Rest) { - return parallel_for_impl(Range, DepEvents, Rest...); + return parallel_for_impl(Range, DepEvents, + std::forward(Rest)...); } /// parallel_for version with a kernel represented as a lambda + range that @@ -3144,11 +3171,15 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { "of type launch_config or a kernel functor with a " "get(sycl::ext::oneapi::experimental::properties_tag) " "member function instead.") - std::enable_if_t< - ext::oneapi::experimental::is_property_list::value, - event> parallel_for(range<2> Range, const std::vector &DepEvents, - PropertiesT Properties, RestT &&...Rest) { - return parallel_for_impl(Range, DepEvents, Properties, Rest...); + std::enable_if_t>::value, + event> parallel_for(range<2> Range, + const std::vector &DepEvents, + PropertiesT &&Properties, + RestT &&...Rest) { + return parallel_for_impl(Range, DepEvents, + std::forward(Properties), + std::forward(Rest)...); } /// parallel_for version with a kernel represented as a lambda + range that @@ -3162,7 +3193,8 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { template event parallel_for(range<2> Range, const std::vector &DepEvents, RestT &&...Rest) { - return parallel_for_impl(Range, DepEvents, Rest...); + return parallel_for_impl(Range, DepEvents, + std::forward(Rest)...); } /// parallel_for version with a kernel represented as a lambda + range that @@ -3180,11 +3212,15 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { "of type launch_config or a kernel functor with a " "get(sycl::ext::oneapi::experimental::properties_tag) " "member function instead.") - std::enable_if_t< - ext::oneapi::experimental::is_property_list::value, - event> parallel_for(range<3> Range, const std::vector &DepEvents, - PropertiesT Properties, RestT &&...Rest) { - return parallel_for_impl(Range, DepEvents, Properties, Rest...); + std::enable_if_t>::value, + event> parallel_for(range<3> Range, + const std::vector &DepEvents, + PropertiesT &&Properties, + RestT &&...Rest) { + return parallel_for_impl(Range, DepEvents, + std::forward(Properties), + std::forward(Rest)...); } /// parallel_for version with a kernel represented as a lambda + range that @@ -3198,7 +3234,8 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { template event parallel_for(range<3> Range, const std::vector &DepEvents, RestT &&...Rest) { - return parallel_for_impl(Range, DepEvents, Rest...); + return parallel_for_impl(Range, DepEvents, + std::forward(Rest)...); } // While other shortcuts with offsets are able to go through parallel_for(..., @@ -3306,11 +3343,12 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { "of type launch_config or a kernel functor with a " "get(sycl::ext::oneapi::experimental::properties_tag) " "member function instead.") - std::enable_if_t< - detail::AreAllButLastReductions::value && - ext::oneapi::experimental::is_property_list::value, - event> parallel_for(nd_range Range, PropertiesT Properties, - RestT &&...Rest) { + std::enable_if_t::value && + ext::oneapi::experimental::is_property_list< + std::decay_t>::value, + event> parallel_for(nd_range Range, + PropertiesT &&Properties, + RestT &&...Rest) { constexpr detail::code_location CodeLoc = getCodeLocation(); detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); using KernelType = std::tuple_element_t<0, std::tuple>; @@ -3322,11 +3360,14 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { KernelType, sycl::nd_item>::value)) { return detail::submit_kernel_direct_parallel_for( - *this, Range, Rest..., {}, Properties, TlsCodeLocCapture.query()); + *this, Range, std::forward(Rest)..., {}, + std::forward(Properties), TlsCodeLocCapture.query()); } else return submit( [&](handler &CGH) { - CGH.template parallel_for(Range, Properties, Rest...); + CGH.template parallel_for( + Range, std::forward(Properties), + std::forward(Rest)...); }, TlsCodeLocCapture.query()); } @@ -3351,13 +3392,14 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { !(detail::KernelLambdaHasKernelHandlerArgT< KernelType, sycl::nd_item>::value)) { return detail::submit_kernel_direct_parallel_for( - *this, Range, Rest..., {}, + *this, Range, std::forward(Rest)..., {}, ext::oneapi::experimental::empty_properties_t{}, TlsCodeLocCapture.query()); } else { return submit( [&](handler &CGH) { - CGH.template parallel_for(Range, Rest...); + CGH.template parallel_for(Range, + std::forward(Rest)...); }, TlsCodeLocCapture.query()); } @@ -3377,17 +3419,20 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { "of type launch_config or a kernel functor with a " "get(sycl::ext::oneapi::experimental::properties_tag) " "member function instead.") - std::enable_if_t< - detail::AreAllButLastReductions::value && - ext::oneapi::experimental::is_property_list::value, - event> parallel_for(nd_range Range, event DepEvent, - PropertiesT Properties, RestT &&...Rest) { + std::enable_if_t::value && + ext::oneapi::experimental::is_property_list< + std::decay_t>::value, + event> parallel_for(nd_range Range, event DepEvent, + PropertiesT &&Properties, + RestT &&...Rest) { constexpr detail::code_location CodeLoc = getCodeLocation(); detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); return submit( [&](handler &CGH) { CGH.depends_on(DepEvent); - CGH.template parallel_for(Range, Properties, Rest...); + CGH.template parallel_for( + Range, std::forward(Properties), + std::forward(Rest)...); }, TlsCodeLocCapture.query()); } @@ -3413,14 +3458,16 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { !(detail::KernelLambdaHasKernelHandlerArgT< KernelType, sycl::nd_item>::value)) { return detail::submit_kernel_direct_parallel_for( - *this, Range, Rest..., sycl::span(&DepEvent, 1), + *this, Range, std::forward(Rest)..., + sycl::span(&DepEvent, 1), ext::oneapi::experimental::empty_properties_t{}, TlsCodeLocCapture.query()); } else { return submit( [&](handler &CGH) { CGH.depends_on(DepEvent); - CGH.template parallel_for(Range, Rest...); + CGH.template parallel_for(Range, + std::forward(Rest)...); }, TlsCodeLocCapture.query()); } @@ -3441,18 +3488,21 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { "of type launch_config or a kernel functor with a " "get(sycl::ext::oneapi::experimental::properties_tag) " "member function instead.") - std::enable_if_t< - detail::AreAllButLastReductions::value && - ext::oneapi::experimental::is_property_list::value, - event> parallel_for(nd_range Range, - const std::vector &DepEvents, - PropertiesT Properties, RestT &&...Rest) { + std::enable_if_t::value && + ext::oneapi::experimental::is_property_list< + std::decay_t>::value, + event> parallel_for(nd_range Range, + const std::vector &DepEvents, + PropertiesT &&Properties, + RestT &&...Rest) { constexpr detail::code_location CodeLoc = getCodeLocation(); detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); return submit( [&](handler &CGH) { CGH.depends_on(DepEvents); - CGH.template parallel_for(Range, Properties, Rest...); + CGH.template parallel_for( + Range, std::forward(Properties), + std::forward(Rest)...); }, TlsCodeLocCapture.query()); } @@ -3480,14 +3530,15 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { !(detail::KernelLambdaHasKernelHandlerArgT< KernelType, sycl::nd_item>::value)) { return detail::submit_kernel_direct_parallel_for( - *this, Range, Rest..., DepEvents, + *this, Range, std::forward(Rest)..., DepEvents, ext::oneapi::experimental::empty_properties_t{}, TlsCodeLocCapture.query()); } else { return submit( [&](handler &CGH) { CGH.depends_on(DepEvents); - CGH.template parallel_for(Range, Rest...); + CGH.template parallel_for(Range, + std::forward(Rest)...); }, TlsCodeLocCapture.query()); } @@ -3807,17 +3858,19 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { /// \param KernelFunc is the Kernel functor or lambda template - std::enable_if_t< - detail::AreAllButLastReductions::value && - ext::oneapi::experimental::is_property_list::value, - event> - parallel_for_impl(range Range, PropertiesT Properties, + std::enable_if_t::value && + ext::oneapi::experimental::is_property_list< + std::decay_t>::value, + event> + parallel_for_impl(range Range, PropertiesT &&Properties, RestT &&...Rest) { constexpr detail::code_location CodeLoc = getCodeLocation(); detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); return submit( [&](handler &CGH) { - CGH.template parallel_for(Range, Properties, Rest...); + CGH.template parallel_for( + Range, std::forward(Properties), + std::forward(Rest)...); }, TlsCodeLocCapture.query()); } @@ -3831,7 +3884,8 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { std::enable_if_t::value, event> parallel_for_impl(range Range, RestT &&...Rest) { return parallel_for_impl( - Range, ext::oneapi::experimental::empty_properties_t{}, Rest...); + Range, ext::oneapi::experimental::empty_properties_t{}, + std::forward(Rest)...); } /// parallel_for_impl with a kernel represented as a lambda + range that @@ -3843,16 +3897,19 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { /// \param KernelFunc is the Kernel functor or lambda template - std::enable_if_t< - ext::oneapi::experimental::is_property_list::value, event> - parallel_for_impl(range Range, event DepEvent, PropertiesT Properties, + std::enable_if_t>::value, + event> + parallel_for_impl(range Range, event DepEvent, PropertiesT &&Properties, RestT &&...Rest) { constexpr detail::code_location CodeLoc = getCodeLocation(); detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); return submit( [&](handler &CGH) { CGH.depends_on(DepEvent); - CGH.template parallel_for(Range, Properties, Rest...); + CGH.template parallel_for( + Range, std::forward(Properties), + std::forward(Rest)...); }, TlsCodeLocCapture.query()); } @@ -3866,8 +3923,9 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { template event parallel_for_impl(range Range, event DepEvent, RestT &&...Rest) { return parallel_for_impl( - Range, DepEvent, ext::oneapi::experimental::empty_properties_t{}, - Rest...); + Range, std::move(DepEvent), + ext::oneapi::experimental::empty_properties_t{}, + std::forward(Rest)...); } /// parallel_for_impl version with a kernel represented as a lambda + range @@ -3880,16 +3938,19 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { /// \param KernelFunc is the Kernel functor or lambda template - std::enable_if_t< - ext::oneapi::experimental::is_property_list::value, event> + std::enable_if_t>::value, + event> parallel_for_impl(range Range, const std::vector &DepEvents, - PropertiesT Properties, RestT &&...Rest) { + PropertiesT &&Properties, RestT &&...Rest) { constexpr detail::code_location CodeLoc = getCodeLocation(); detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); return submit( [&](handler &CGH) { CGH.depends_on(DepEvents); - CGH.template parallel_for(Range, Properties, Rest...); + CGH.template parallel_for( + Range, std::forward(Properties), + std::forward(Rest)...); }, TlsCodeLocCapture.query()); } @@ -3907,7 +3968,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { RestT &&...Rest) { return parallel_for_impl( Range, DepEvents, ext::oneapi::experimental::empty_properties_t{}, - Rest...); + std::forward(Rest)...); } event memcpyToDeviceGlobal(void *DeviceGlobalPtr, const void *Src, diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index 1efa8a45b5d05..c1a4a957bd5eb 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -107,7 +107,7 @@ event queue::memset(void *Ptr, int Value, size_t Count, event queue::memset(void *Ptr, int Value, size_t Count, event DepEvent, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->memset(Ptr, Value, Count, {DepEvent}, + return impl->memset(Ptr, Value, Count, {std::move(DepEvent)}, /*CallerNeedsEvent=*/true); } @@ -129,7 +129,7 @@ event queue::memcpy(void *Dest, const void *Src, size_t Count, event queue::memcpy(void *Dest, const void *Src, size_t Count, event DepEvent, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->memcpy(Dest, Src, Count, {DepEvent}, + return impl->memcpy(Dest, Src, Count, {std::move(DepEvent)}, /*CallerNeedsEvent=*/true, TlsCodeLocCapture.query()); } @@ -152,7 +152,7 @@ event queue::mem_advise(const void *Ptr, size_t Length, int Advice, event DepEvent, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); return impl->mem_advise(Ptr, Length, ur_usm_advice_flags_t(Advice), - {DepEvent}, + {std::move(DepEvent)}, /*CallerNeedsEvent=*/true); }