/* * SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * * NVIDIA SOFTWARE LICENSE * * This license is a legal agreement between you and NVIDIA Corporation ("NVIDIA") and governs your use of the * NVIDIA/CUDA C++ Library software and materials provided hereunder (“SOFTWARE”). * * This license can be accepted only by an adult of legal age of majority in the country in which the SOFTWARE is used. * If you are under the legal age of majority, you must ask your parent or legal guardian to consent to this license. By * taking delivery of the SOFTWARE, you affirm that you have reached the legal age of majority, you accept the terms of * this license, and you take legal and financial responsibility for the actions of your permitted users. * * You agree to use the SOFTWARE only for purposes that are permitted by (a) this license, and (b) any applicable law, * regulation or generally accepted practices or guidelines in the relevant jurisdictions. * * 1. LICENSE. Subject to the terms of this license, NVIDIA grants you a non-exclusive limited license to: (a) install * and use the SOFTWARE, and (b) distribute the SOFTWARE subject to the distribution requirements described in this * license. NVIDIA reserves all rights, title and interest in and to the SOFTWARE not expressly granted to you under * this license. * * 2. DISTRIBUTION REQUIREMENTS. These are the distribution requirements for you to exercise the distribution grant: * a. The terms under which you distribute the SOFTWARE must be consistent with the terms of this license, * including (without limitation) terms relating to the license grant and license restrictions and protection of * NVIDIA’s intellectual property rights. b. You agree to notify NVIDIA in writing of any known or suspected * distribution or use of the SOFTWARE not in compliance with the requirements of this license, and to enforce the terms * of your agreements with respect to distributed SOFTWARE. * * 3. LIMITATIONS. Your license to use the SOFTWARE is restricted as follows: * a. The SOFTWARE is licensed for you to develop applications only for use in systems with NVIDIA GPUs. * b. You may not reverse engineer, decompile or disassemble, or remove copyright or other proprietary notices from * any portion of the SOFTWARE or copies of the SOFTWARE. c. You may not modify or create derivative works of any * portion of the SOFTWARE. d. You may not bypass, disable, or circumvent any technical measure, encryption, * security, digital rights management or authentication mechanism in the SOFTWARE. e. You may not use the SOFTWARE * in any manner that would cause it to become subject to an open source software license. As examples, licenses that * require as a condition of use, modification, and/or distribution that the SOFTWARE be (i) disclosed or distributed in * source code form; (ii) licensed for the purpose of making derivative works; or (iii) redistributable at no charge. f. * Unless you have an agreement with NVIDIA for this purpose, you may not use the SOFTWARE with any system or * application where the use or failure of the system or application can reasonably be expected to threaten or result in * personal injury, death, or catastrophic loss. Examples include use in avionics, navigation, military, medical, life * support or other life critical applications. NVIDIA does not design, test or manufacture the SOFTWARE for these * critical uses and NVIDIA shall not be liable to you or any third party, in whole or in part, for any claims or * damages arising from such uses. g. You agree to defend, indemnify and hold harmless NVIDIA and its affiliates, * and their respective employees, contractors, agents, officers and directors, from and against any and all claims, * damages, obligations, losses, liabilities, costs or debt, fines, restitutions and expenses (including but not limited * to attorney’s fees and costs incident to establishing the right of indemnification) arising out of or related to use * of the SOFTWARE outside of the scope of this Agreement, or not in compliance with its terms. * * 4. PRE-RELEASE. SOFTWARE versions identified as alpha, beta, preview, early access or otherwise as pre-release may * not be fully functional, may contain errors or design flaws, and may have reduced or different security, privacy, * availability, and reliability standards relative to commercial versions of NVIDIA software and materials. You may use * a pre-release SOFTWARE version at your own risk, understanding that these versions are not intended for use in * production or business-critical systems. * * 5. OWNERSHIP. The SOFTWARE and the related intellectual property rights therein are and will remain the sole and * exclusive property of NVIDIA or its licensors. The SOFTWARE is copyrighted and protected by the laws of the United * States and other countries, and international treaty provisions. NVIDIA may make changes to the SOFTWARE, at any time * without notice, but is not obligated to support or update the SOFTWARE. * * 6. COMPONENTS UNDER OTHER LICENSES. The SOFTWARE may include NVIDIA or third-party components with separate legal * notices or terms as may be described in proprietary notices accompanying the SOFTWARE. If and to the extent there is * a conflict between the terms in this license and the license terms associated with a component, the license terms * associated with the components control only to the extent necessary to resolve the conflict. * * 7. FEEDBACK. You may, but don’t have to, provide to NVIDIA any Feedback. “Feedback” means any suggestions, bug fixes, * enhancements, modifications, feature requests or other feedback regarding the SOFTWARE. For any Feedback that you * voluntarily provide, you hereby grant NVIDIA and its affiliates a perpetual, non-exclusive, worldwide, irrevocable * license to use, reproduce, modify, license, sublicense (through multiple tiers of sublicensees), and distribute * (through multiple tiers of distributors) the Feedback without the payment of any royalties or fees to you. NVIDIA * will use Feedback at its choice. * * 8. NO WARRANTIES. THE SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING, BUT * NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT, OR FITNESS FOR A PARTICULAR PURPOSE. NVIDIA DOES NOT * WARRANT THAT THE SOFTWARE WILL MEET YOUR REQUIREMENTS OR THAT THE OPERATION THEREOF WILL BE UNINTERRUPTED OR * ERROR-FREE, OR THAT ALL ERRORS WILL BE CORRECTED. * * 9. LIMITATIONS OF LIABILITY. TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS AFFILIATES SHALL NOT BE LIABLE * FOR ANY SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, PROJECT DELAYS, LOSS OF USE, * LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION WITH * THIS LICENSE OR THE USE OR PERFORMANCE OF THE SOFTWARE, WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON * BREACH OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE), PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION * OR THEORY OF LIABILITY, EVEN IF NVIDIA HAS PREVIOUSLY BEEN ADVISED OF, OR COULD REASONABLY HAVE FORESEEN, THE * POSSIBILITY OF SUCH DAMAGES. IN NO EVENT WILL NVIDIA’S AND ITS AFFILIATES TOTAL CUMULATIVE LIABILITY UNDER OR ARISING * OUT OF THIS LICENSE EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE * OR EXTEND THIS LIMIT. * * 10. TERMINATION. Your rights under this license will terminate automatically without notice from NVIDIA if you fail * to comply with any term and condition of this license or if you commence or participate in any legal proceeding * against NVIDIA with respect to the SOFTWARE. NVIDIA may terminate this license with advance written notice to you if * NVIDIA decides to no longer provide the SOFTWARE in a country or, in NVIDIA’s sole discretion, the continued use of * it is no longer commercially viable. Upon any termination of this license, you agree to promptly discontinue use of * the SOFTWARE and destroy all copies in your possession or control. Your prior distributions in accordance with this * license are not affected by the termination of this license. All provisions of this license will survive termination, * except for the license granted to you. * * 11. APPLICABLE LAW. This license will be governed in all respects by the laws of the United States and of the State * of Delaware as those laws are applied to contracts entered into and performed entirely within Delaware by Delaware * residents, without regard to the conflicts of laws principles. The United Nations Convention on Contracts for the * International Sale of Goods is specifically disclaimed. You agree to all terms of this Agreement in the English * language. The state or federal courts residing in Santa Clara County, California shall have exclusive jurisdiction * over any dispute or claim arising out of this license. Notwithstanding this, you agree that NVIDIA shall still be * allowed to apply for injunctive remedies or an equivalent type of urgent legal relief in any jurisdiction. * * 12. NO ASSIGNMENT. This license and your rights and obligations thereunder may not be assigned by you by any means or * operation of law without NVIDIA’s permission. Any attempted assignment not approved by NVIDIA in writing shall be * void and of no effect. * * 13. EXPORT. The SOFTWARE is subject to United States export laws and regulations. You agree that you will not ship, * transfer or export the SOFTWARE into any country, or use the SOFTWARE in any manner, prohibited by the United States * Bureau of Industry and Security or economic sanctions regulations administered by the U.S. Department of Treasury’s * Office of Foreign Assets Control (OFAC), or any applicable export laws, restrictions or regulations. These laws * include restrictions on destinations, end users and end use. By accepting this license, you confirm that you are not * a resident or citizen of any country currently embargoed by the U.S. and that you are not otherwise prohibited from * receiving the SOFTWARE. * * 14. GOVERNMENT USE. The SOFTWARE has been developed entirely at private expense and is “commercial items” consisting * of “commercial computer software” and “commercial computer software documentation” provided with RESTRICTED RIGHTS. * Use, duplication or disclosure by the U.S. Government or a U.S. Government subcontractor is subject to the * restrictions in this license pursuant to DFARS 227.7202-3(a) or as set forth in subparagraphs (b)(1) and (2) of the * Commercial Computer Software - Restricted Rights clause at FAR 52.227-19, as applicable. Contractor/manufacturer is * NVIDIA, 2788 San Tomas Expressway, Santa Clara, CA 95051. * * 15. ENTIRE AGREEMENT. This license is the final, complete and exclusive agreement between the parties relating to the * subject matter of this license and supersedes all prior or contemporaneous understandings and agreements relating to * this subject matter, whether oral or written. If any court of competent jurisdiction determines that any provision of * this license is illegal, invalid or unenforceable, the remaining provisions will remain in full force and effect. * This license may only be modified in a writing signed by an authorized representative of each party. * * (v. August 20, 2021) */ #ifndef _CUDA_ANNOTATED_PTR #define _CUDA_ANNOTATED_PTR #include #if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) # pragma GCC system_header #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) # pragma clang system_header #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) # pragma system_header #endif // no system header #include #include #include #include _LIBCUDACXX_BEGIN_NAMESPACE_CUDA class access_property { private: std::uint64_t __descriptor = 0; public: struct shared {}; struct global {}; struct persisting { _CCCL_HOST_DEVICE constexpr operator cudaAccessProperty() const noexcept { return cudaAccessProperty::cudaAccessPropertyPersisting; } }; struct streaming { _CCCL_HOST_DEVICE constexpr operator cudaAccessProperty() const noexcept { return cudaAccessProperty::cudaAccessPropertyStreaming; } }; struct normal { _CCCL_HOST_DEVICE constexpr operator cudaAccessProperty() const noexcept { return cudaAccessProperty::cudaAccessPropertyNormal; } }; _CCCL_HOST_DEVICE constexpr access_property(global) noexcept : __descriptor(__detail_ap::__sm_80::__interleave_normal()) {} _CCCL_HOST_DEVICE constexpr access_property() noexcept : __descriptor(__detail_ap::__sm_80::__interleave_normal()) {} _CCCL_HIDE_FROM_ABI constexpr access_property(access_property const&) noexcept = default; _CCCL_HIDE_FROM_ABI access_property& operator=(const access_property& other) noexcept = default; _CCCL_HOST_DEVICE constexpr access_property(normal, float __fraction) : __descriptor(__detail_ap::__interleave(normal{}, __fraction)) {} _CCCL_HOST_DEVICE constexpr access_property(streaming, float __fraction) : __descriptor(__detail_ap::__interleave(streaming{}, __fraction)) {} _CCCL_HOST_DEVICE constexpr access_property(persisting, float __fraction) : __descriptor(__detail_ap::__interleave(persisting{}, __fraction)) {} _CCCL_HOST_DEVICE constexpr access_property(normal, float __fraction, streaming) : __descriptor(__detail_ap::__interleave(normal{}, __fraction, streaming{})) {} _CCCL_HOST_DEVICE constexpr access_property(persisting, float __fraction, streaming) : __descriptor(__detail_ap::__interleave(persisting{}, __fraction, streaming{})) {} _CCCL_HOST_DEVICE constexpr access_property(normal) noexcept : access_property(normal{}, 1.0) {} _CCCL_HOST_DEVICE constexpr access_property(streaming) noexcept : access_property(streaming{}, 1.0) {} _CCCL_HOST_DEVICE constexpr access_property(persisting) noexcept : access_property(persisting{}, 1.0) {} _CCCL_HOST_DEVICE constexpr access_property(void* __ptr, std::size_t __hit_bytes, std::size_t __total_bytes, normal) : __descriptor(__detail_ap::__block(__ptr, __hit_bytes, __total_bytes, normal{})) {} _CCCL_HOST_DEVICE constexpr access_property(void* __ptr, std::size_t __hit_bytes, std::size_t __total_bytes, streaming) : __descriptor(__detail_ap::__block(__ptr, __hit_bytes, __total_bytes, streaming{})) {} _CCCL_HOST_DEVICE constexpr access_property( void* __ptr, std::size_t __hit_bytes, std::size_t __total_bytes, persisting) : __descriptor(__detail_ap::__block(__ptr, __hit_bytes, __total_bytes, persisting{})) {} _CCCL_HOST_DEVICE constexpr access_property( void* __ptr, std::size_t __hit_bytes, std::size_t __total_bytes, normal, streaming) : __descriptor(__detail_ap::__block(__ptr, __hit_bytes, __total_bytes, normal{}, streaming{})) {} _CCCL_HOST_DEVICE constexpr access_property( void* __ptr, std::size_t __hit_bytes, std::size_t __total_bytes, persisting, streaming) : __descriptor(__detail_ap::__block(__ptr, __hit_bytes, __total_bytes, persisting{}, streaming{})) {} _CCCL_HOST_DEVICE constexpr explicit operator std::uint64_t() const noexcept { return __descriptor; } }; _LIBCUDACXX_END_NAMESPACE_CUDA #include _LIBCUDACXX_BEGIN_NAMESPACE_CUDA template _CCCL_HOST_DEVICE _Tp* associate_access_property(_Tp* __ptr, _Property __prop) { static_assert( std::is_same<_Property, access_property>::value || std::is_same<_Property, access_property::persisting>::value || std::is_same<_Property, access_property::streaming>::value || std::is_same<_Property, access_property::normal>::value || std::is_same<_Property, access_property::global>::value || std::is_same<_Property, access_property::shared>::value, "property is not convertible to cuda::access_property"); return __detail_ap::__associate(__ptr, __prop); } template _CCCL_HOST_DEVICE void apply_access_property(const volatile void* __ptr, const _Shape __shape, access_property::persisting __prop) noexcept { NV_IF_TARGET( NV_PROVIDES_SM_80, (if (!__isGlobal((void*) __ptr)) return; char* __p = reinterpret_cast(const_cast(__ptr)); static constexpr std::size_t _LINE_SIZE = 128; std::size_t __nbytes = static_cast(__shape); std::size_t __end = ((std::uintptr_t)(__p + __nbytes) % _LINE_SIZE) ? __nbytes + _LINE_SIZE : __nbytes; __end /= _LINE_SIZE; // Apply to all 128 bytes aligned cache lines inclusive of __p for (std::size_t __i = 0; __i < __end; __i += _LINE_SIZE) { asm volatile("prefetch.global.L2::evict_last [%0];" ::"l"(__p + (__i * _LINE_SIZE)) :); })) } template _CCCL_HOST_DEVICE void apply_access_property(const volatile void* __ptr, const _Shape __shape, access_property::normal __prop) noexcept { NV_IF_TARGET( NV_PROVIDES_SM_80, (if (!__isGlobal((void*) __ptr)) return; char* __p = reinterpret_cast(const_cast(__ptr)); static constexpr std::size_t _LINE_SIZE = 128; std::size_t __nbytes = static_cast(__shape); std::size_t __end = ((std::uintptr_t)(__p + __nbytes) % _LINE_SIZE) ? __nbytes + _LINE_SIZE : __nbytes; __end /= _LINE_SIZE; // Apply to all 128 bytes aligned cache lines inclusive of __p for (std::size_t __i = 0; __i < __end; __i += _LINE_SIZE) { asm volatile("prefetch.global.L2::evict_normal [%0];" ::"l"(__p + (__i * _LINE_SIZE)) :); })) } template class annotated_ptr : public __detail_ap::__annotated_ptr_base<_Property> { public: using value_type = _Tp; using size_type = std::size_t; using reference = value_type&; using pointer = value_type*; using const_pointer = value_type const*; using difference_type = std::ptrdiff_t; private: using __self = annotated_ptr<_Tp, _Property>; // Converting from a 64-bit to 32-bit shared pointer and maybe back just for storage might or might not be profitable. pointer __repr = (pointer) ((size_type) nullptr); _CCCL_HOST_DEVICE pointer __get(bool __skip_prop = false, difference_type __n = 0) const { NV_IF_TARGET(NV_IS_DEVICE, (if (!__skip_prop) { return static_cast( this->__apply_prop(const_cast(static_cast(__repr + __n)))); })) return __repr + __n; } _CCCL_HOST_DEVICE pointer __offset(difference_type __n, bool __skip_prop = false) const { return __get(__skip_prop, __n); } public: _CCCL_HOST_DEVICE pointer operator->() const { return __get(); } _CCCL_HOST_DEVICE reference operator*() const { return *__get(); } _CCCL_HOST_DEVICE reference operator[](difference_type __n) const { return *__offset(__n); } _CCCL_HOST_DEVICE constexpr difference_type operator-(annotated_ptr o) const { return __repr - o.__repr; } _CCCL_HIDE_FROM_ABI constexpr annotated_ptr() noexcept = default; _CCCL_HIDE_FROM_ABI constexpr annotated_ptr(annotated_ptr const&) noexcept = default; // No constexpr for c11 as the method can't be const _CCCL_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 annotated_ptr& operator=(annotated_ptr const& other) noexcept = default; _CCCL_HOST_DEVICE explicit annotated_ptr(pointer __p) : __repr(__p) { NV_IF_TARGET( NV_IS_DEVICE, (_LIBCUDACXX_DEBUG_ASSERT((std::is_same<_Property, shared>::value && __isShared(__p) || __isGlobal(__p)), "");)) } template _CCCL_HOST_DEVICE annotated_ptr(pointer __p, _RuntimeProperty __prop) : __detail_ap::__annotated_ptr_base<_Property>(static_cast(access_property(__prop))) , __repr(__p) { static_assert(std::is_same<_Property, access_property>::value, "This method requires annotated_ptr"); static_assert( std::is_same<_RuntimeProperty, access_property::global>::value || std::is_same<_RuntimeProperty, access_property::normal>::value || std::is_same<_RuntimeProperty, access_property::streaming>::value || std::is_same<_RuntimeProperty, access_property::persisting>::value || std::is_same<_RuntimeProperty, access_property>::value, "This method requires RuntimeProperty=global|normal|streaming|persisting|access_property"); NV_IF_TARGET(NV_IS_DEVICE, (_LIBCUDACXX_DEBUG_ASSERT((__isGlobal(__p) == true), "");)) } template _CCCL_HOST_DEVICE annotated_ptr(const annotated_ptr<_TTp, _Prop>& __other); _CCCL_HOST_DEVICE constexpr explicit operator bool() const noexcept { return __repr != nullptr; } _CCCL_HOST_DEVICE pointer get() const noexcept { constexpr bool __is_shared = std::is_same<_Property, access_property::shared>::value; return __is_shared ? __repr : &(*annotated_ptr(__repr)); } _CCCL_HOST_DEVICE _Property __property() const noexcept { return this->__get_property(); } }; template template _CCCL_HOST_DEVICE annotated_ptr<_Tp, _Property>::annotated_ptr(const annotated_ptr<_TTp, _Prop>& __other) : __detail_ap::__annotated_ptr_base<_Property>(__other.__property()) , __repr(__other.get()) { static_assert(std::is_assignable::value, "pointer must be assignable from other pointer"); static_assert( (std::is_same<_Property, access_property>::value && !std::is_same<_Prop, access_property::shared>::value) || std::is_same<_Property, _Prop>::value, "Property must be either access_property or other property, and both properties must have same address space"); // note: precondition "__other.__rep must be compatible with _Property" currently always holds } template _CCCL_HOST_DEVICE void memcpy_async(_Dst* __dst, annotated_ptr<_Src, _SrcProperty> __src, _Shape __shape, _Sync& __sync) { memcpy_async(__dst, &(*__src), __shape, __sync); } template _CCCL_HOST_DEVICE void memcpy_async( annotated_ptr<_Dst, _DstProperty> __dst, annotated_ptr<_Src, _SrcProperty> __src, _Shape __shape, _Sync& __sync) { memcpy_async(&(*__dst), &(*__src), __shape, __sync); } template _CCCL_HOST_DEVICE void memcpy_async(const _Group& __group, _Dst* __dst, annotated_ptr<_Src, _SrcProperty> __src, _Shape __shape, _Sync& __sync) { memcpy_async(__group, __dst, &(*__src), __shape, __sync); } template _CCCL_HOST_DEVICE void memcpy_async( const _Group& __group, annotated_ptr<_Dst, _DstProperty> __dst, annotated_ptr<_Src, _SrcProperty> __src, _Shape __shape, _Sync& __sync) { memcpy_async(__group, &(*__dst), &(*__src), __shape, __sync); } _LIBCUDACXX_END_NAMESPACE_CUDA #endif // _CUDA_ANNOTATED_PTR