/*
 * SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 *
 * NVIDIA SOFTWARE LICENSE
 *
 * This license is a legal agreement between you and NVIDIA Corporation ("NVIDIA") and governs your use of the
 * NVIDIA/CUDA C++ Library software and materials provided hereunder (“SOFTWARE”).
 *
 * This license can be accepted only by an adult of legal age of majority in the country in which the SOFTWARE is used.
 * If you are under the legal age of majority, you must ask your parent or legal guardian to consent to this license. By
 * taking delivery of the SOFTWARE, you affirm that you have reached the legal age of majority, you accept the terms of
 * this license, and you take legal and financial responsibility for the actions of your permitted users.
 *
 * You agree to use the SOFTWARE only for purposes that are permitted by (a) this license, and (b) any applicable law,
 * regulation or generally accepted practices or guidelines in the relevant jurisdictions.
 *
 * 1. LICENSE. Subject to the terms of this license, NVIDIA grants you a non-exclusive limited license to: (a) install
 * and use the SOFTWARE, and (b) distribute the SOFTWARE subject to the distribution requirements described in this
 * license. NVIDIA reserves all rights, title and interest in and to the SOFTWARE not expressly granted to you under
 * this license.
 *
 * 2. DISTRIBUTION REQUIREMENTS. These are the distribution requirements for you to exercise the distribution grant:
 * a.      The terms under which you distribute the SOFTWARE must be consistent with the terms of this license,
 * including (without limitation) terms relating to the license grant and license restrictions and protection of
 * NVIDIA’s intellectual property rights. b.      You agree to notify NVIDIA in writing of any known or suspected
 * distribution or use of the SOFTWARE not in compliance with the requirements of this license, and to enforce the terms
 * of your agreements with respect to distributed SOFTWARE.
 *
 * 3. LIMITATIONS. Your license to use the SOFTWARE is restricted as follows:
 * a.      The SOFTWARE is licensed for you to develop applications only for use in systems with NVIDIA GPUs.
 * b.      You may not reverse engineer, decompile or disassemble, or remove copyright or other proprietary notices from
 * any portion of the SOFTWARE or copies of the SOFTWARE. c.      You may not modify or create derivative works of any
 * portion of the SOFTWARE. d.      You may not bypass, disable, or circumvent any technical measure, encryption,
 * security, digital rights management or authentication mechanism in the SOFTWARE. e.      You may not use the SOFTWARE
 * in any manner that would cause it to become subject to an open source software license. As examples, licenses that
 * require as a condition of use, modification, and/or distribution that the SOFTWARE be (i) disclosed or distributed in
 * source code form; (ii) licensed for the purpose of making derivative works; or (iii) redistributable at no charge. f.
 * Unless you have an agreement with NVIDIA for this purpose, you may not use the SOFTWARE with any system or
 * application where the use or failure of the system or application can reasonably be expected to threaten or result in
 * personal injury, death, or catastrophic loss. Examples include use in avionics, navigation, military, medical, life
 * support or other life critical applications. NVIDIA does not design, test or manufacture the SOFTWARE for these
 * critical uses and NVIDIA shall not be liable to you or any third party, in whole or in part, for any claims or
 * damages arising from such uses. g.      You agree to defend, indemnify and hold harmless NVIDIA and its affiliates,
 * and their respective employees, contractors, agents, officers and directors, from and against any and all claims,
 * damages, obligations, losses, liabilities, costs or debt, fines, restitutions and expenses (including but not limited
 * to attorney’s fees and costs incident to establishing the right of indemnification) arising out of or related to use
 * of the SOFTWARE outside of the scope of this Agreement, or not in compliance with its terms.
 *
 * 4. PRE-RELEASE. SOFTWARE versions identified as alpha, beta, preview, early access or otherwise as pre-release may
 * not be fully functional, may contain errors or design flaws, and may have reduced or different security, privacy,
 * availability, and reliability standards relative to commercial versions of NVIDIA software and materials. You may use
 * a pre-release SOFTWARE version at your own risk, understanding that these versions are not intended for use in
 * production or business-critical systems.
 *
 * 5. OWNERSHIP. The SOFTWARE and the related intellectual property rights therein are and will remain the sole and
 * exclusive property of NVIDIA or its licensors. The SOFTWARE is copyrighted and protected by the laws of the United
 * States and other countries, and international treaty provisions. NVIDIA may make changes to the SOFTWARE, at any time
 * without notice, but is not obligated to support or update the SOFTWARE.
 *
 * 6. COMPONENTS UNDER OTHER LICENSES. The SOFTWARE may include NVIDIA or third-party components with separate legal
 * notices or terms as may be described in proprietary notices accompanying the SOFTWARE. If and to the extent there is
 * a conflict between the terms in this license and the license terms associated with a component, the license terms
 * associated with the components control only to the extent necessary to resolve the conflict.
 *
 * 7. FEEDBACK. You may, but don’t have to, provide to NVIDIA any Feedback. “Feedback” means any suggestions, bug fixes,
 * enhancements, modifications, feature requests or other feedback regarding the SOFTWARE. For any Feedback that you
 * voluntarily provide, you hereby grant NVIDIA and its affiliates a perpetual, non-exclusive, worldwide, irrevocable
 * license to use, reproduce, modify, license, sublicense (through multiple tiers of sublicensees), and distribute
 * (through multiple tiers of distributors) the Feedback without the payment of any royalties or fees to you. NVIDIA
 * will use Feedback at its choice.
 *
 * 8. NO WARRANTIES. THE SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING, BUT
 * NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT, OR FITNESS FOR A PARTICULAR PURPOSE. NVIDIA DOES NOT
 * WARRANT THAT THE SOFTWARE WILL MEET YOUR REQUIREMENTS OR THAT THE OPERATION THEREOF WILL BE UNINTERRUPTED OR
 * ERROR-FREE, OR THAT ALL ERRORS WILL BE CORRECTED.
 *
 * 9. LIMITATIONS OF LIABILITY. TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS AFFILIATES SHALL NOT BE LIABLE
 * FOR ANY SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, PROJECT DELAYS, LOSS OF USE,
 * LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION WITH
 * THIS LICENSE OR THE USE OR PERFORMANCE OF THE SOFTWARE, WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON
 * BREACH OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE), PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION
 * OR THEORY OF LIABILITY, EVEN IF NVIDIA HAS PREVIOUSLY BEEN ADVISED OF, OR COULD REASONABLY HAVE FORESEEN, THE
 * POSSIBILITY OF SUCH DAMAGES. IN NO EVENT WILL NVIDIA’S AND ITS AFFILIATES TOTAL CUMULATIVE LIABILITY UNDER OR ARISING
 * OUT OF THIS LICENSE EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE
 * OR EXTEND THIS LIMIT.
 *
 * 10. TERMINATION. Your rights under this license will terminate automatically without notice from NVIDIA if you fail
 * to comply with any term and condition of this license or if you commence or participate in any legal proceeding
 * against NVIDIA with respect to the SOFTWARE. NVIDIA may terminate this license with advance written notice to you if
 * NVIDIA decides to no longer provide the SOFTWARE in a country or, in NVIDIA’s sole discretion, the continued use of
 * it is no longer commercially viable. Upon any termination of this license, you agree to promptly discontinue use of
 * the SOFTWARE and destroy all copies in your possession or control. Your prior distributions in accordance with this
 * license are not affected by the termination of this license. All provisions of this license will survive termination,
 * except for the license granted to you.
 *
 * 11. APPLICABLE LAW. This license will be governed in all respects by the laws of the United States and of the State
 * of Delaware as those laws are applied to contracts entered into and performed entirely within Delaware by Delaware
 * residents, without regard to the conflicts of laws principles. The United Nations Convention on Contracts for the
 * International Sale of Goods is specifically disclaimed. You agree to all terms of this Agreement in the English
 * language. The state or federal courts residing in Santa Clara County, California shall have exclusive jurisdiction
 * over any dispute or claim arising out of this license. Notwithstanding this, you agree that NVIDIA shall still be
 * allowed to apply for injunctive remedies or an equivalent type of urgent legal relief in any jurisdiction.
 *
 * 12. NO ASSIGNMENT. This license and your rights and obligations thereunder may not be assigned by you by any means or
 * operation of law without NVIDIA’s permission. Any attempted assignment not approved by NVIDIA in writing shall be
 * void and of no effect.
 *
 * 13. EXPORT. The SOFTWARE is subject to United States export laws and regulations. You agree that you will not ship,
 * transfer or export the SOFTWARE into any country, or use the SOFTWARE in any manner, prohibited by the United States
 * Bureau of Industry and Security or economic sanctions regulations administered by the U.S. Department of Treasury’s
 * Office of Foreign Assets Control (OFAC), or any applicable export laws, restrictions or regulations. These laws
 * include restrictions on destinations, end users and end use. By accepting this license, you confirm that you are not
 * a resident or citizen of any country currently embargoed by the U.S. and that you are not otherwise prohibited from
 * receiving the SOFTWARE.
 *
 * 14. GOVERNMENT USE. The SOFTWARE has been developed entirely at private expense and is “commercial items” consisting
 * of “commercial computer software” and “commercial computer software documentation” provided with RESTRICTED RIGHTS.
 * Use, duplication or disclosure by the U.S. Government or a U.S. Government subcontractor is subject to the
 * restrictions in this license pursuant to DFARS 227.7202-3(a) or as set forth in subparagraphs (b)(1) and (2) of the
 * Commercial Computer Software - Restricted Rights clause at FAR 52.227-19, as applicable. Contractor/manufacturer is
 * NVIDIA, 2788 San Tomas Expressway, Santa Clara, CA 95051.
 *
 * 15. ENTIRE AGREEMENT. This license is the final, complete and exclusive agreement between the parties relating to the
 * subject matter of this license and supersedes all prior or contemporaneous understandings and agreements relating to
 * this subject matter, whether oral or written. If any court of competent jurisdiction determines that any provision of
 * this license is illegal, invalid or unenforceable, the remaining provisions will remain in full force and effect.
 * This license may only be modified in a writing signed by an authorized representative of each party.
 *
 * (v. August 20, 2021)
 */

#ifndef _CUDA_ANNOTATED_PTR
#define _CUDA_ANNOTATED_PTR

#include <cuda/std/detail/__config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
#  pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
#  pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
#  pragma system_header
#endif // no system header

#include <cuda/barrier>
#include <cuda/discard_memory>
#include <cuda/std/cstdint>
#include <cuda/std/detail/__access_property>

_LIBCUDACXX_BEGIN_NAMESPACE_CUDA

class access_property
{
private:
  std::uint64_t __descriptor = 0;

public:
  struct shared
  {};
  struct global
  {};
  struct persisting
  {
    _CCCL_HOST_DEVICE constexpr operator cudaAccessProperty() const noexcept
    {
      return cudaAccessProperty::cudaAccessPropertyPersisting;
    }
  };
  struct streaming
  {
    _CCCL_HOST_DEVICE constexpr operator cudaAccessProperty() const noexcept
    {
      return cudaAccessProperty::cudaAccessPropertyStreaming;
    }
  };
  struct normal
  {
    _CCCL_HOST_DEVICE constexpr operator cudaAccessProperty() const noexcept
    {
      return cudaAccessProperty::cudaAccessPropertyNormal;
    }
  };

  _CCCL_HOST_DEVICE constexpr access_property(global) noexcept
      : __descriptor(__detail_ap::__sm_80::__interleave_normal())
  {}
  _CCCL_HOST_DEVICE constexpr access_property() noexcept
      : __descriptor(__detail_ap::__sm_80::__interleave_normal())
  {}
  _CCCL_HIDE_FROM_ABI constexpr access_property(access_property const&) noexcept        = default;
  _CCCL_HIDE_FROM_ABI access_property& operator=(const access_property& other) noexcept = default;

  _CCCL_HOST_DEVICE constexpr access_property(normal, float __fraction)
      : __descriptor(__detail_ap::__interleave(normal{}, __fraction))
  {}
  _CCCL_HOST_DEVICE constexpr access_property(streaming, float __fraction)
      : __descriptor(__detail_ap::__interleave(streaming{}, __fraction))
  {}
  _CCCL_HOST_DEVICE constexpr access_property(persisting, float __fraction)
      : __descriptor(__detail_ap::__interleave(persisting{}, __fraction))
  {}
  _CCCL_HOST_DEVICE constexpr access_property(normal, float __fraction, streaming)
      : __descriptor(__detail_ap::__interleave(normal{}, __fraction, streaming{}))
  {}
  _CCCL_HOST_DEVICE constexpr access_property(persisting, float __fraction, streaming)
      : __descriptor(__detail_ap::__interleave(persisting{}, __fraction, streaming{}))
  {}

  _CCCL_HOST_DEVICE constexpr access_property(normal) noexcept
      : access_property(normal{}, 1.0)
  {}
  _CCCL_HOST_DEVICE constexpr access_property(streaming) noexcept
      : access_property(streaming{}, 1.0)
  {}
  _CCCL_HOST_DEVICE constexpr access_property(persisting) noexcept
      : access_property(persisting{}, 1.0)
  {}

  _CCCL_HOST_DEVICE constexpr access_property(void* __ptr, std::size_t __hit_bytes, std::size_t __total_bytes, normal)
      : __descriptor(__detail_ap::__block(__ptr, __hit_bytes, __total_bytes, normal{}))
  {}
  _CCCL_HOST_DEVICE constexpr access_property(void* __ptr, std::size_t __hit_bytes, std::size_t __total_bytes, streaming)
      : __descriptor(__detail_ap::__block(__ptr, __hit_bytes, __total_bytes, streaming{}))
  {}
  _CCCL_HOST_DEVICE constexpr access_property(
    void* __ptr, std::size_t __hit_bytes, std::size_t __total_bytes, persisting)
      : __descriptor(__detail_ap::__block(__ptr, __hit_bytes, __total_bytes, persisting{}))
  {}
  _CCCL_HOST_DEVICE constexpr access_property(
    void* __ptr, std::size_t __hit_bytes, std::size_t __total_bytes, normal, streaming)
      : __descriptor(__detail_ap::__block(__ptr, __hit_bytes, __total_bytes, normal{}, streaming{}))
  {}
  _CCCL_HOST_DEVICE constexpr access_property(
    void* __ptr, std::size_t __hit_bytes, std::size_t __total_bytes, persisting, streaming)
      : __descriptor(__detail_ap::__block(__ptr, __hit_bytes, __total_bytes, persisting{}, streaming{}))
  {}

  _CCCL_HOST_DEVICE constexpr explicit operator std::uint64_t() const noexcept
  {
    return __descriptor;
  }
};

_LIBCUDACXX_END_NAMESPACE_CUDA

#include <cuda/std/detail/__annotated_ptr>

_LIBCUDACXX_BEGIN_NAMESPACE_CUDA

template <class _Tp, class _Property>
_CCCL_HOST_DEVICE _Tp* associate_access_property(_Tp* __ptr, _Property __prop)
{
  static_assert(
    std::is_same<_Property, access_property>::value || std::is_same<_Property, access_property::persisting>::value
      || std::is_same<_Property, access_property::streaming>::value
      || std::is_same<_Property, access_property::normal>::value
      || std::is_same<_Property, access_property::global>::value
      || std::is_same<_Property, access_property::shared>::value,
    "property is not convertible to cuda::access_property");
  return __detail_ap::__associate(__ptr, __prop);
}

template <class _Shape>
_CCCL_HOST_DEVICE void
apply_access_property(const volatile void* __ptr, const _Shape __shape, access_property::persisting __prop) noexcept
{
  NV_IF_TARGET(
    NV_PROVIDES_SM_80,
    (if (!__isGlobal((void*) __ptr)) return;

     char* __p                               = reinterpret_cast<char*>(const_cast<void*>(__ptr));
     static constexpr std::size_t _LINE_SIZE = 128;
     std::size_t __nbytes                    = static_cast<std::size_t>(__shape);
     std::size_t __end = ((std::uintptr_t)(__p + __nbytes) % _LINE_SIZE) ? __nbytes + _LINE_SIZE : __nbytes;
     __end /= _LINE_SIZE;

     // Apply to all 128 bytes aligned cache lines inclusive of __p
     for (std::size_t __i = 0; __i < __end; __i += _LINE_SIZE) {
       asm volatile("prefetch.global.L2::evict_last [%0];" ::"l"(__p + (__i * _LINE_SIZE)) :);
     }))
}

template <class _Shape>
_CCCL_HOST_DEVICE void
apply_access_property(const volatile void* __ptr, const _Shape __shape, access_property::normal __prop) noexcept
{
  NV_IF_TARGET(
    NV_PROVIDES_SM_80,
    (if (!__isGlobal((void*) __ptr)) return;

     char* __p                               = reinterpret_cast<char*>(const_cast<void*>(__ptr));
     static constexpr std::size_t _LINE_SIZE = 128;
     std::size_t __nbytes                    = static_cast<std::size_t>(__shape);
     std::size_t __end = ((std::uintptr_t)(__p + __nbytes) % _LINE_SIZE) ? __nbytes + _LINE_SIZE : __nbytes;
     __end /= _LINE_SIZE;

     // Apply to all 128 bytes aligned cache lines inclusive of __p
     for (std::size_t __i = 0; __i < __end; __i += _LINE_SIZE) {
       asm volatile("prefetch.global.L2::evict_normal [%0];" ::"l"(__p + (__i * _LINE_SIZE)) :);
     }))
}

template <class _Tp, class _Property>
class annotated_ptr : public __detail_ap::__annotated_ptr_base<_Property>
{
public:
  using value_type      = _Tp;
  using size_type       = std::size_t;
  using reference       = value_type&;
  using pointer         = value_type*;
  using const_pointer   = value_type const*;
  using difference_type = std::ptrdiff_t;

private:
  using __self = annotated_ptr<_Tp, _Property>;

  // Converting from a 64-bit to 32-bit shared pointer and maybe back just for storage might or might not be profitable.
  pointer __repr = (pointer) ((size_type) nullptr);

  _CCCL_HOST_DEVICE pointer __get(bool __skip_prop = false, difference_type __n = 0) const
  {
    NV_IF_TARGET(NV_IS_DEVICE, (if (!__skip_prop) {
                   return static_cast<pointer>(
                     this->__apply_prop(const_cast<void*>(static_cast<const volatile void*>(__repr + __n))));
                 }))
    return __repr + __n;
  }
  _CCCL_HOST_DEVICE pointer __offset(difference_type __n, bool __skip_prop = false) const
  {
    return __get(__skip_prop, __n);
  }

public:
  _CCCL_HOST_DEVICE pointer operator->() const
  {
    return __get();
  }

  _CCCL_HOST_DEVICE reference operator*() const
  {
    return *__get();
  }

  _CCCL_HOST_DEVICE reference operator[](difference_type __n) const
  {
    return *__offset(__n);
  }

  _CCCL_HOST_DEVICE constexpr difference_type operator-(annotated_ptr o) const
  {
    return __repr - o.__repr;
  }

  _CCCL_HIDE_FROM_ABI constexpr annotated_ptr() noexcept                     = default;
  _CCCL_HIDE_FROM_ABI constexpr annotated_ptr(annotated_ptr const&) noexcept = default;
  // No constexpr for c11 as the method can't be const
  _CCCL_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 annotated_ptr& operator=(annotated_ptr const& other) noexcept = default;

  _CCCL_HOST_DEVICE explicit annotated_ptr(pointer __p)
      : __repr(__p)
  {
    NV_IF_TARGET(
      NV_IS_DEVICE,
      (_LIBCUDACXX_DEBUG_ASSERT((std::is_same<_Property, shared>::value && __isShared(__p) || __isGlobal(__p)), "");))
  }

  template <typename _RuntimeProperty>
  _CCCL_HOST_DEVICE annotated_ptr(pointer __p, _RuntimeProperty __prop)
      : __detail_ap::__annotated_ptr_base<_Property>(static_cast<std::uint64_t>(access_property(__prop)))
      , __repr(__p)
  {
    static_assert(std::is_same<_Property, access_property>::value,
                  "This method requires annotated_ptr<T, cuda::access_property>");
    static_assert(
      std::is_same<_RuntimeProperty, access_property::global>::value
        || std::is_same<_RuntimeProperty, access_property::normal>::value
        || std::is_same<_RuntimeProperty, access_property::streaming>::value
        || std::is_same<_RuntimeProperty, access_property::persisting>::value
        || std::is_same<_RuntimeProperty, access_property>::value,
      "This method requires RuntimeProperty=global|normal|streaming|persisting|access_property");
    NV_IF_TARGET(NV_IS_DEVICE, (_LIBCUDACXX_DEBUG_ASSERT((__isGlobal(__p) == true), "");))
  }

  template <class _TTp, class _Prop>
  _CCCL_HOST_DEVICE annotated_ptr(const annotated_ptr<_TTp, _Prop>& __other);

  _CCCL_HOST_DEVICE constexpr explicit operator bool() const noexcept
  {
    return __repr != nullptr;
  }

  _CCCL_HOST_DEVICE pointer get() const noexcept
  {
    constexpr bool __is_shared = std::is_same<_Property, access_property::shared>::value;
    return __is_shared ? __repr : &(*annotated_ptr<value_type, access_property::global>(__repr));
  }

  _CCCL_HOST_DEVICE _Property __property() const noexcept
  {
    return this->__get_property();
  }
};

template <class _Tp, class _Property>
template <class _TTp, class _Prop>
_CCCL_HOST_DEVICE annotated_ptr<_Tp, _Property>::annotated_ptr(const annotated_ptr<_TTp, _Prop>& __other)
    : __detail_ap::__annotated_ptr_base<_Property>(__other.__property())
    , __repr(__other.get())
{
  static_assert(std::is_assignable<pointer&, _TTp*>::value, "pointer must be assignable from other pointer");
  static_assert(
    (std::is_same<_Property, access_property>::value && !std::is_same<_Prop, access_property::shared>::value)
      || std::is_same<_Property, _Prop>::value,
    "Property must be either access_property or other property, and both properties must have same address space");
  // note: precondition "__other.__rep must be compatible with _Property" currently always holds
}

template <class _Dst, class _Src, class _SrcProperty, class _Shape, class _Sync>
_CCCL_HOST_DEVICE void memcpy_async(_Dst* __dst, annotated_ptr<_Src, _SrcProperty> __src, _Shape __shape, _Sync& __sync)
{
  memcpy_async(__dst, &(*__src), __shape, __sync);
}

template <class _Dst, class _DstProperty, class _Src, class _SrcProperty, class _Shape, class _Sync>
_CCCL_HOST_DEVICE void memcpy_async(
  annotated_ptr<_Dst, _DstProperty> __dst, annotated_ptr<_Src, _SrcProperty> __src, _Shape __shape, _Sync& __sync)
{
  memcpy_async(&(*__dst), &(*__src), __shape, __sync);
}

template <class _Group, class _Dst, class _Src, class _SrcProperty, class _Shape, class _Sync>
_CCCL_HOST_DEVICE void
memcpy_async(const _Group& __group, _Dst* __dst, annotated_ptr<_Src, _SrcProperty> __src, _Shape __shape, _Sync& __sync)
{
  memcpy_async(__group, __dst, &(*__src), __shape, __sync);
}

template <class _Group, class _Dst, class _DstProperty, class _Src, class _SrcProperty, class _Shape, class _Sync>
_CCCL_HOST_DEVICE void memcpy_async(
  const _Group& __group,
  annotated_ptr<_Dst, _DstProperty> __dst,
  annotated_ptr<_Src, _SrcProperty> __src,
  _Shape __shape,
  _Sync& __sync)
{
  memcpy_async(__group, &(*__dst), &(*__src), __shape, __sync);
}

_LIBCUDACXX_END_NAMESPACE_CUDA

#endif // _CUDA_ANNOTATED_PTR