/*
 * Copyright 2007-2024 NVIDIA Corporation.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *  * Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *  * Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *  * Neither the name of NVIDIA CORPORATION nor the names of its
 *    contributors may be used to endorse or promote products derived
 *    from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */


/*--------------------------------- Includes --------------------------------*/

#ifndef CUDADEBUGGER_H
#define CUDADEBUGGER_H



#include <stdlib.h>
#include "cuda_stdint.h"

#if defined(__STDC__)
#include <inttypes.h>
#endif

#if defined(_MSC_VER) && _MSC_VER < 1800
// old MSVC does not support stdbool.h
typedef unsigned char bool;
#undef false
#undef true
#define false 0
#define true  1
#else
#include <stdbool.h>
#endif

#ifdef __cplusplus
extern "C" {
#endif

/* OS-agnostic _CUDBG_INLINE */
#if defined(_WIN32)
#define _CUDBG_INLINE __inline
#else
#define _CUDBG_INLINE inline
#endif


/*--------------------------------- API Version ------------------------------*/

#define CUDBG_API_VERSION_MAJOR      12 /* Major release version number */
#define CUDBG_API_VERSION_MINOR       8 /* Minor release version number */
#define CUDBG_API_VERSION_REVISION  152 /* Revision (build) number */

/*---------------------------------- Constants -------------------------------*/

#define CUDBG_MAX_DEVICES 64  /* Maximum number of supported devices */
#define CUDBG_MAX_SMS    256  /* Maximum number of SMs per device */
#define CUDBG_MAX_WARPS   64  /* Maximum number of warps per SM */
#define CUDBG_MAX_LANES   32  /* Maximum number of lanes per warp */

/*----------------------- Thread/Block Coordinates Types ---------------------*/

typedef struct { uint32_t x, y; }    CuDim2;   /* DEPRECATED */
typedef struct { uint32_t x, y, z; } CuDim3;   /* 3-dimensional coordinates for threads,... */

/*--------------------- Memory Segments (as used in DWARF) -------------------*/

typedef enum {
    ptxUNSPECIFIEDStorage,
    ptxCodeStorage,
    ptxRegStorage,
    ptxSregStorage,
    ptxConstStorage,
    ptxGlobalStorage,
    ptxLocalStorage,
    ptxParamStorage,
    ptxSharedStorage,
    ptxSurfStorage,
    ptxTexStorage,
    ptxTexSamplerStorage,
    ptxGenericStorage,
    ptxIParamStorage,
    ptxOParamStorage,
    ptxFrameStorage,
    ptxURegStorage,
    ptxMAXStorage
} ptxStorageKind;

/*--------------------------- Debugger System Calls --------------------------*/

#define CUDBG_IPC_FLAG_NAME                 cudbgIpcFlag
#define CUDBG_RPC_ENABLED                   cudbgRpcEnabled
#define CUDBG_APICLIENT_PID                 cudbgApiClientPid
#define CUDBG_DEBUGGER_INITIALIZED          cudbgDebuggerInitialized
#define CUDBG_APICLIENT_REVISION            cudbgApiClientRevision
#define CUDBG_SESSION_ID                    cudbgSessionId
#define CUDBG_ATTACH_HANDLER_AVAILABLE      cudbgAttachHandlerAvailable
#define CUDBG_DETACH_SUSPENDED_DEVICES_MASK cudbgDetachSuspendedDevicesMask
#define CUDBG_ENABLE_LAUNCH_BLOCKING        cudbgEnableLaunchBlocking
#define CUDBG_ENABLE_INTEGRATED_MEMCHECK    cudbgEnableIntegratedMemcheck
#define CUDBG_ENABLE_PREEMPTION_DEBUGGING   cudbgEnablePreemptionDebugging
#define CUDBG_RESUME_FOR_ATTACH_DETACH      cudbgResumeForAttachDetach

/*
 * Bitmask of the capabilities supported by the debugger front-end
 */
#define CUDBG_DEBUGGER_CAPABILITIES         cudbgDebuggerCapabilities

/*
 * Can be read to detect whether the external debugger implementation
 * (libcudadebugger.so) is used or not.
 */
#define CUDBG_USE_EXTERNAL_DEBUGGER          cudbgUseExternalDebugger

typedef enum {
    CUDBG_DEBUGGER_CAPABILITY_NONE                  = 0,
    CUDBG_DEBUGGER_CAPABILITY_LAZY_FUNCTION_LOADING = (1 << 0),
    CUDBG_DEBUGGER_CAPABILITY_SUSPEND_EVENTS        = (1 << 1),
    CUDBG_DEBUGGER_CAPABILITY_REPORT_EXCEPTIONS_IN_EXITED_WARPS = (1 << 2),
    CUDBG_DEBUGGER_CAPABILITY_NO_CONTEXT_PUSH_POP_EVENTS        = (1 << 3),
} CUDBGCapabilityFlags;

/*---------------- Internal Breakpoint Entries for Error Reporting ------------*/

#define CUDBG_REPORT_DRIVER_API_ERROR                   cudbgReportDriverApiError
#define CUDBG_REPORT_DRIVER_API_ERROR_FLAGS             cudbgReportDriverApiErrorFlags
#define CUDBG_REPORTED_DRIVER_API_ERROR_CODE            cudbgReportedDriverApiErrorCode
#define CUDBG_REPORTED_DRIVER_API_ERROR_FUNC_NAME_SIZE  cudbgReportedDriverApiErrorFuncNameSize
#define CUDBG_REPORTED_DRIVER_API_ERROR_FUNC_NAME_ADDR  cudbgReportedDriverApiErrorFuncNameAddr
#define CUDBG_REPORTED_DRIVER_API_ERROR_SOURCE          cudbgReportedDriverApiErrorSource
#define CUDBG_REPORTED_DRIVER_API_ERROR_NAME_SIZE       cudbgReportedDriverApiErrorNameSize
#define CUDBG_REPORTED_DRIVER_API_ERROR_NAME_ADDR       cudbgReportedDriverApiErrorNameAddr
#define CUDBG_REPORTED_DRIVER_API_ERROR_STRING_SIZE     cudbgReportedDriverApiErrorStringSize
#define CUDBG_REPORTED_DRIVER_API_ERROR_STRING_ADDR     cudbgReportedDriverApiErrorStringAddr
#define CUDBG_REPORT_DRIVER_INTERNAL_ERROR              cudbgReportDriverInternalError
#define CUDBG_REPORTED_DRIVER_INTERNAL_ERROR_CODE       cudbgReportedDriverInternalErrorCode

/*----------------------------- API Return Types -----------------------------*/

typedef enum {
    CUDBG_SUCCESS                           = 0x0000,  /* Successful execution */
    CUDBG_ERROR_UNKNOWN                     = 0x0001,  /* Error type not listed below */
    CUDBG_ERROR_BUFFER_TOO_SMALL            = 0x0002,  /* Cannot copy all the queried data into the buffer argument */
    CUDBG_ERROR_UNKNOWN_FUNCTION            = 0x0003,  /* Function cannot be found in the CUDA kernel */
    CUDBG_ERROR_INVALID_ARGS                = 0x0004,  /* Wrong use of arguments (NULL pointer, illegal value,...) */
    CUDBG_ERROR_UNINITIALIZED               = 0x0005,  /* Debugger API has not yet been properly initialized */
    CUDBG_ERROR_INVALID_COORDINATES         = 0x0006,  /* Invalid block or thread coordinates were provided */
    CUDBG_ERROR_INVALID_MEMORY_SEGMENT      = 0x0007,  /* Invalid memory segment requested (read/write) */
    CUDBG_ERROR_INVALID_MEMORY_ACCESS       = 0x0008,  /* Requested address (+size) is not within proper segment boundaries */
    CUDBG_ERROR_MEMORY_MAPPING_FAILED       = 0x0009,  /* Memory is not mapped and cannot be mapped */
    CUDBG_ERROR_INTERNAL                    = 0x000a,  /* A debugger internal error occurred */
    CUDBG_ERROR_INVALID_DEVICE              = 0x000b,  /* Specified device cannot be found */
    CUDBG_ERROR_INVALID_SM                  = 0x000c,  /* Specified sm cannot be found */
    CUDBG_ERROR_INVALID_WARP                = 0x000d,  /* Specified warp cannot be found */
    CUDBG_ERROR_INVALID_LANE                = 0x000e,  /* Specified lane cannot be found */
    CUDBG_ERROR_SUSPENDED_DEVICE            = 0x000f,  /* device is suspended */
    CUDBG_ERROR_RUNNING_DEVICE              = 0x0010,  /* device is running and not suspended */
    CUDBG_ERROR_RESERVED_0                  = 0x0011,  /* Reserved error code */
    CUDBG_ERROR_INVALID_ADDRESS             = 0x0012,  /* address is out-of-range */
    CUDBG_ERROR_INCOMPATIBLE_API            = 0x0013,  /* API version does not match */
    CUDBG_ERROR_INITIALIZATION_FAILURE      = 0x0014,  /* The CUDA Driver failed to initialize */
    CUDBG_ERROR_INVALID_GRID                = 0x0015,  /* Specified grid cannot be found */
    CUDBG_ERROR_NO_EVENT_AVAILABLE          = 0x0016,  /* No event left to be processed */
    CUDBG_ERROR_SOME_DEVICES_WATCHDOGGED    = 0x0017,  /* One or more devices have an associated watchdog (eg. X) */
    CUDBG_ERROR_ALL_DEVICES_WATCHDOGGED     = 0x0018,  /* All devices have an associated watchdog (eg. X) */
    CUDBG_ERROR_INVALID_ATTRIBUTE           = 0x0019,  /* Specified attribute does not exist or is incorrect */
    CUDBG_ERROR_ZERO_CALL_DEPTH             = 0x001a,  /* No function calls have been made on the device */
    CUDBG_ERROR_INVALID_CALL_LEVEL          = 0x001b,  /* Specified call level is invalid */
    CUDBG_ERROR_COMMUNICATION_FAILURE       = 0x001c,  /* Communication error between the debugger and the application. */
    CUDBG_ERROR_INVALID_CONTEXT             = 0x001d,  /* Specified context cannot be found */
    CUDBG_ERROR_ADDRESS_NOT_IN_DEVICE_MEM   = 0x001e,  /* Requested address was not originally allocated from device memory (most likely visible in system memory) */
    CUDBG_ERROR_MEMORY_UNMAPPING_FAILED     = 0x001f,  /* Memory is not unmapped and cannot be unmapped */
    CUDBG_ERROR_INCOMPATIBLE_DISPLAY_DRIVER = 0x0020,  /* The display driver is incompatible with the API */
    CUDBG_ERROR_INVALID_MODULE              = 0x0021,  /* The specified module is not valid */
    CUDBG_ERROR_LANE_NOT_IN_SYSCALL         = 0x0022,  /* The specified lane is not inside a device syscall */
    CUDBG_ERROR_MEMCHECK_NOT_ENABLED        = 0x0023,  /* Memcheck has not been enabled */
    CUDBG_ERROR_INVALID_ENVVAR_ARGS         = 0x0024,  /* Some environment variable's value is invalid */
    CUDBG_ERROR_OS_RESOURCES                = 0x0025,  /* Error while allocating resources from the OS */
    CUDBG_ERROR_FORK_FAILED                 = 0x0026,  /* Error while forking the debugger process */
    CUDBG_ERROR_NO_DEVICE_AVAILABLE         = 0x0027,  /* No CUDA capable device was found */
    CUDBG_ERROR_ATTACH_NOT_POSSIBLE         = 0x0028,  /* Attaching to the CUDA program is not possible */
    CUDBG_ERROR_WARP_RESUME_NOT_POSSIBLE    = 0x0029,  /* The resumeWarpsUntilPC() API is not possible, use resumeDevice() or singleStepWarp() instead */
    CUDBG_ERROR_INVALID_WARP_MASK           = 0x002a,  /* Specified warp mask is zero, or contains invalid warps */
    CUDBG_ERROR_AMBIGUOUS_MEMORY_ADDRESS    = 0x002b,  /* Address cannot be resolved to a GPU unambiguously */
    CUDBG_ERROR_RECURSIVE_API_CALL          = 0x002c,  /* Debug API entry point called from within a debug API callback */
    CUDBG_ERROR_MISSING_DATA                = 0x002d,  /* The requested data is missing */
    CUDBG_ERROR_NOT_SUPPORTED               = 0x002e,  /* Attempted operation is not supported */
} CUDBGResult;

static const char *CUDBGResultNames[] = {
    "CUDBG_SUCCESS",
    "CUDBG_ERROR_UNKNOWN",
    "CUDBG_ERROR_BUFFER_TOO_SMALL",
    "CUDBG_ERROR_UNKNOWN_FUNCTION",
    "CUDBG_ERROR_INVALID_ARGS",
    "CUDBG_ERROR_UNINITIALIZED",
    "CUDBG_ERROR_INVALID_COORDINATES",
    "CUDBG_ERROR_INVALID_MEMORY_SEGMENT",
    "CUDBG_ERROR_INVALID_MEMORY_ACCESS",
    "CUDBG_ERROR_MEMORY_MAPPING_FAILED",
    "CUDBG_ERROR_INTERNAL",
    "CUDBG_ERROR_INVALID_DEVICE",
    "CUDBG_ERROR_INVALID_SM",
    "CUDBG_ERROR_INVALID_WARP",
    "CUDBG_ERROR_INVALID_LANE",
    "CUDBG_ERROR_SUSPENDED_DEVICE",
    "CUDBG_ERROR_RUNNING_DEVICE",
    "CUDBG_ERROR_RESERVED_0",
    "CUDBG_ERROR_INVALID_ADDRESS",
    "CUDBG_ERROR_INCOMPATIBLE_API",
    "CUDBG_ERROR_INITIALIZATION_FAILURE",
    "CUDBG_ERROR_INVALID_GRID",
    "CUDBG_ERROR_NO_EVENT_AVAILABLE",
    "CUDBG_ERROR_SOME_DEVICES_WATCHDOGGED",
    "CUDBG_ERROR_ALL_DEVICES_WATCHDOGGED",
    "CUDBG_ERROR_INVALID_ATTRIBUTE",
    "CUDBG_ERROR_ZERO_CALL_DEPTH",
    "CUDBG_ERROR_INVALID_CALL_LEVEL",
    "CUDBG_ERROR_COMMUNICATION_FAILURE",
    "CUDBG_ERROR_INVALID_CONTEXT",
    "CUDBG_ERROR_ADDRESS_NOT_IN_DEVICE_MEM",
    "CUDBG_ERROR_MEMORY_UNMAPPING_FAILED",
    "CUDBG_ERROR_INCOMPATIBLE_DISPLAY_DRIVER",
    "CUDBG_ERROR_INVALID_MODULE",
    "CUDBG_ERROR_LANE_NOT_IN_SYSCALL",
    "CUDBG_ERROR_MEMCHECK_NOT_ENABLED",
    "CUDBG_ERROR_INVALID_ENVVAR_ARGS",
    "CUDBG_ERROR_OS_RESOURCES",
    "CUDBG_ERROR_FORK_FAILED",
    "CUDBG_ERROR_NO_DEVICE_AVAILABLE",
    "CUDBG_ERROR_ATTACH_NOT_POSSIBLE",
    "CUDBG_ERROR_WARP_RESUME_NOT_POSSIBLE",
    "CUDBG_ERROR_INVALID_WARP_MASK",
    "CUDBG_ERROR_AMBIGUOUS_MEMORY_ADDRESS",
    "CUDBG_ERROR_RECURSIVE_API_CALL",
    "CUDBG_ERROR_MISSING_DATA",
    "CUDBG_ERROR_NOT_SUPPORTED",
};

static _CUDBG_INLINE const char *cudbgGetErrorString (CUDBGResult error)
{
    if (((unsigned)error)*sizeof(char *) >= sizeof(CUDBGResultNames))
        return "*UNDEFINED*";
    return CUDBGResultNames[(unsigned)error];
}


/*------------------------- API Error Reporting Flags -------------------------*/
typedef enum {
    CUDBG_REPORT_DRIVER_API_ERROR_FLAGS_NONE = 0x0000, /* Default is that there is no flag */
    CUDBG_REPORT_DRIVER_API_ERROR_FLAGS_SUPPRESS_NOT_READY = ( 1U << 0 ), /* When set, cudaErrorNotReady/cuErrorNotReady will not be reported */
} CUDBGReportDriverApiErrorFlags;

typedef enum {
    CUDBG_REPORTED_DRIVER_API_ERROR_SOURCE_NONE     = 0x000,   /* Default is that there is no error and no source */
    CUDBG_REPORTED_DRIVER_API_ERROR_SOURCE_DRIVER   = 0x001,   /* The error originates from the CUDA Driver API */
    CUDBG_REPORTED_DRIVER_API_ERROR_SOURCE_RUNTIME  = 0x002,   /* The error originates from the CUDA Runtime API */
} CUDBGReportedDriverApiErrorSource;

/*------------------------------ Grid Attributes -----------------------------*/

typedef enum {
    CUDBG_ATTR_GRID_LAUNCH_BLOCKING    = 0x000,   /* Whether the grid launch is blocking or not. */
    CUDBG_ATTR_GRID_TID                = 0x001,   /* Id of the host thread that launched the grid. */
} CUDBGAttribute;

typedef struct {
    CUDBGAttribute attribute;
    uint64_t       value;
} CUDBGAttributeValuePair;

typedef enum {
    CUDBG_GRID_STATUS_INVALID,          /* An invalid grid ID was passed, or an error occurred during status lookup */
    CUDBG_GRID_STATUS_PENDING,          /* The grid was launched but is not running on the HW yet */
    CUDBG_GRID_STATUS_ACTIVE,           /* The grid is currently running on the HW */
    CUDBG_GRID_STATUS_SLEEPING,         /* The grid is on the device, doing a join */
    CUDBG_GRID_STATUS_TERMINATED,       /* The grid has finished executing */
    CUDBG_GRID_STATUS_UNDETERMINED,     /* The grid is either PENDING or TERMINATED */
} CUDBGGridStatus;

/*------------------------------- Kernel Types -------------------------------*/

typedef enum {
    CUDBG_KNL_TYPE_UNKNOWN             = 0x000,   /* Any type not listed below. */
    CUDBG_KNL_TYPE_SYSTEM              = 0x001,   /* System kernel, such as MemCpy. */
    CUDBG_KNL_TYPE_APPLICATION         = 0x002,   /* Application kernel, user-defined or libraries. */
} CUDBGKernelType;

/*--------------------------- Elf Image Properties ---------------------------*/

typedef enum {
    CUDBG_ELF_IMAGE_PROPERTIES_SYSTEM  = 0x001,   /* ELF image contains system kernels. */
} CUDBGElfImageProperties;

/*-------------------------- Physical Register Types -------------------------*/

typedef enum {
    REG_CLASS_INVALID                  = 0x000,   /* invalid register */
    REG_CLASS_REG_CC                   = 0x001,   /* Condition register */
    REG_CLASS_REG_PRED                 = 0x002,   /* Predicate register */
    REG_CLASS_REG_ADDR                 = 0x003,   /* Address register */
    REG_CLASS_REG_HALF                 = 0x004,   /* 16-bit register (Currently unused) */
    REG_CLASS_REG_FULL                 = 0x005,   /* 32-bit register */
    REG_CLASS_MEM_LOCAL                = 0x006,   /* register spilled in memory */
    REG_CLASS_LMEM_REG_OFFSET          = 0x007,   /* register at stack offset (ABI only) */
    REG_CLASS_UREG_PRED                = 0x009,   /* uniform predicate register */
    REG_CLASS_UREG_HALF                = 0x00a,   /* 16-bit uniform register */
    REG_CLASS_UREG_FULL                = 0x00b,   /* 32-bit uniform register */
} CUDBGRegClass;

/*---------------------------- Application Events ----------------------------*/

typedef enum {
    CUDBG_EVENT_INVALID                = 0x000,   /* Invalid event */
    CUDBG_EVENT_ELF_IMAGE_LOADED       = 0x001,   /* ELF image for CUDA kernel(s) is ready */
    CUDBG_EVENT_KERNEL_READY           = 0x002,   /* A CUDA kernel is ready to be launched */
    CUDBG_EVENT_KERNEL_FINISHED        = 0x003,   /* A CUDA kernel has terminated */
    CUDBG_EVENT_INTERNAL_ERROR         = 0x004,   /* Unexpected error. The API may be unstable. */
    CUDBG_EVENT_CTX_PUSH               = 0x005,   /* A CUDA context has been pushed. */
    CUDBG_EVENT_CTX_POP                = 0x006,   /* A CUDA context has been popped. */
    CUDBG_EVENT_CTX_CREATE             = 0x007,   /* A CUDA context has been created and pushed. */
    CUDBG_EVENT_CTX_DESTROY            = 0x008,   /* A CUDA context has been, popped if pushed, then destroyed. */
    CUDBG_EVENT_TIMEOUT                = 0x009,   /* Nothing happened for a while. This is heartbeat event.
                                                       NOTE: Only sent by the classic backend. */
    CUDBG_EVENT_ATTACH_COMPLETE        = 0x00a,   /* Attach complete. */
    CUDBG_EVENT_DETACH_COMPLETE        = 0x00b,   /* Detach complete. */
    CUDBG_EVENT_ELF_IMAGE_UNLOADED     = 0x00c,   /* ELF image for CUDA kernels(s) no longer available */
    CUDBG_EVENT_FUNCTIONS_LOADED       = 0x00d,   /* A group of functions/kernels have been loaded
                                                   *   NOTE: Will only be sent if the debugger capability
                                                   *   CUDBG_DEBUGGER_CAPABILITY_LAZY_FUNCTION_LOADING is set.
                                                   */
    CUDBG_EVENT_ALL_DEVICES_SUSPENDED  = 0x00e,   /* All CUDA devices have been suspended due to a breakpoint hit
                                                   *   or an exception. Does not get sent for GPU events that
                                                   *   result in synchronous API method calls, such as
                                                   *   singleStepWarp or resumeWarpsUntilPC.
                                                   *   NOTE: Will only be sent if the debugger capability
                                                   *   CUDBG_DEBUGGER_CAPABILITY_SUSPEND_EVENTS is set.
                                                   */
} CUDBGEventKind;

/*------------------------------- Kernel Origin ------------------------------*/

typedef enum {
    CUDBG_KNL_ORIGIN_CPU               = 0x000,   /* The kernel was launched from the CPU. */
    CUDBG_KNL_ORIGIN_GPU               = 0x001,   /* The kernel was launched from the GPU. */
} CUDBGKernelOrigin;

/*------------------------ Kernel Launch Notify Mode --------------------------*/

typedef enum {
    CUDBG_KNL_LAUNCH_NOTIFY_EVENT      = 0x000,   /* The kernel notifications generate events */
    CUDBG_KNL_LAUNCH_NOTIFY_DEFER      = 0x001,   /* The kernel notifications are deferred */
} CUDBGKernelLaunchNotifyMode;

/*---------------------- Application Event Queue Type ------------------------*/

typedef enum {
    CUDBG_EVENT_QUEUE_TYPE_SYNC      = 0,   /* Synchronous event queue */
    CUDBG_EVENT_QUEUE_TYPE_ASYNC     = 1,   /* Asynchronous event queue */
} CUDBGEventQueueType;

/*------------------------------ Elf Image Type ------------------------------*/

typedef enum {
    CUDBG_ELF_IMAGE_TYPE_NONRELOCATED      = 0,   /* Non-relocated ELF image */
    CUDBG_ELF_IMAGE_TYPE_RELOCATED         = 1,   /* Relocated ELF image */
} CUDBGElfImageType;

/*------------------------------ Code Address --------------------------------*/

typedef enum {
    CUDBG_ADJ_PREVIOUS_ADDRESS         = 0x000,   /* Get the adjusted previous code address. */
    CUDBG_ADJ_CURRENT_ADDRESS          = 0x001,   /* Get the adjusted current code address. */
    CUDBG_ADJ_NEXT_ADDRESS             = 0x002,   /* Get the adjusted next code address. */
} CUDBGAdjAddrAction;

/*------------------------------ Single Step Flags --------------------------------*/

typedef enum {
    /* Default behavior */
    CUDBG_SINGLE_STEP_FLAGS_NONE                        = 0,
    /* Do not step over warp-wide barriers using a breakpoint and resume,
     * instead perform a single step and return. Passing this flag in means
     * that the API client plans to repeat the singleStepWarp() call until
     * the warp barrier is stepped over. This gives a more precise exception
     * information if an exception is encountered by the diverged threads
     * while stepping. */
    CUDBG_SINGLE_STEP_FLAGS_NO_STEP_OVER_WARP_BARRIERS  = (1U << 0),
} CUDBGSingleStepFlags;

/* Deprecated */
typedef struct {
    CUDBGEventKind kind;
    union cases30_st {
        struct elfImageLoaded30_st {
            char     *relocatedElfImage;
            char     *nonRelocatedElfImage;
            uint32_t  size;
        } elfImageLoaded;
        struct kernelReady30_st {
            uint32_t dev;
            uint32_t gridId;
            uint32_t tid;
        } kernelReady;
        struct kernelFinished30_st {
            uint32_t dev;
            uint32_t gridId;
            uint32_t tid;
        } kernelFinished;
    } cases;
} CUDBGEvent30;

/* Deprecated */
typedef struct {
    CUDBGEventKind kind;
    union cases32_st {
        struct elfImageLoaded32_st {
            char     *relocatedElfImage;
            char     *nonRelocatedElfImage;
            uint32_t  size;
            uint32_t  dev;
            uint64_t  context;
            uint64_t  module;
        } elfImageLoaded;
        struct kernelReady32_st {
            uint32_t dev;
            uint32_t gridId;
            uint32_t tid;
            uint64_t context;
            uint64_t module;
            uint64_t function;
            uint64_t functionEntry;
        } kernelReady;
        struct kernelFinished32_st {
            uint32_t dev;
            uint32_t gridId;
            uint32_t tid;
            uint64_t context;
            uint64_t module;
            uint64_t function;
            uint64_t functionEntry;
        } kernelFinished;
        struct contextPush32_st {
            uint32_t dev;
            uint32_t tid;
            uint64_t context;
        } contextPush;
        struct contextPop32_st {
            uint32_t dev;
            uint32_t tid;
            uint64_t context;
        } contextPop;
        struct contextCreate32_st {
            uint32_t dev;
            uint32_t tid;
            uint64_t context;
        } contextCreate;
        struct contextDestroy32_st {
            uint32_t dev;
            uint32_t tid;
            uint64_t context;
        } contextDestroy;
    } cases;
} CUDBGEvent32;

/* Deprecated */
typedef struct {
    CUDBGEventKind kind;
    union cases42_st {
        struct elfImageLoaded42_st {
            char     *relocatedElfImage;
            char     *nonRelocatedElfImage;
            uint32_t  size32;
            uint32_t  dev;
            uint64_t  context;
            uint64_t  module;
            uint64_t  size;
        } elfImageLoaded;
        struct kernelReady42_st {
            uint32_t dev;
            uint32_t gridId;
            uint32_t tid;
            uint64_t context;
            uint64_t module;
            uint64_t function;
            uint64_t functionEntry;
            CuDim3   gridDim;
            CuDim3   blockDim;
            CUDBGKernelType type;
        } kernelReady;
        struct kernelFinished42_st {
            uint32_t dev;
            uint32_t gridId;
            uint32_t tid;
            uint64_t context;
            uint64_t module;
            uint64_t function;
            uint64_t functionEntry;
        } kernelFinished;
        struct contextPush42_st {
            uint32_t dev;
            uint32_t tid;
            uint64_t context;
        } contextPush;
        struct contextPop42_st {
            uint32_t dev;
            uint32_t tid;
            uint64_t context;
        } contextPop;
        struct contextCreate42_st {
            uint32_t dev;
            uint32_t tid;
            uint64_t context;
        } contextCreate;
        struct contextDestroy42_st {
            uint32_t dev;
            uint32_t tid;
            uint64_t context;
        } contextDestroy;
    } cases;
} CUDBGEvent42;

typedef struct {
    CUDBGEventKind kind;
    union cases50_st {
        struct elfImageLoaded50_st {
            char     *relocatedElfImage;
            char     *nonRelocatedElfImage;
            uint32_t  size32;
            uint32_t  dev;
            uint64_t  context;
            uint64_t  module;
            uint64_t  size;
        } elfImageLoaded;
        struct kernelReady50_st{
            uint32_t dev;
            uint32_t gridId;
            uint32_t tid;
            uint64_t context;
            uint64_t module;
            uint64_t function;
            uint64_t functionEntry;
            CuDim3   gridDim;
            CuDim3   blockDim;
            CUDBGKernelType type;
        } kernelReady;
        struct kernelFinished50_st {
            uint32_t dev;
            uint32_t gridId;
            uint32_t tid;
            uint64_t context;
            uint64_t module;
            uint64_t function;
            uint64_t functionEntry;
        } kernelFinished;
        struct contextPush50_st {
            uint32_t dev;
            uint32_t tid;
            uint64_t context;
        } contextPush;
        struct contextPop50_st {
            uint32_t dev;
            uint32_t tid;
            uint64_t context;
        } contextPop;
        struct contextCreate50_st {
            uint32_t dev;
            uint32_t tid;
            uint64_t context;
        } contextCreate;
        struct contextDestroy50_st {
            uint32_t dev;
            uint32_t tid;
            uint64_t context;
        } contextDestroy;
        struct internalError50_st {
            CUDBGResult errorType;
        } internalError;
    } cases;
} CUDBGEvent50;

typedef struct {
    CUDBGEventKind kind;
    union cases55_st {
        struct elfImageLoaded55_st {
            char     *relocatedElfImage;
            char     *nonRelocatedElfImage;
            uint32_t  size32;
            uint32_t  dev;
            uint64_t  context;
            uint64_t  module;
            uint64_t  size;
        } elfImageLoaded;
        struct kernelReady55_st{
            uint32_t dev;
            uint32_t gridId;
            uint32_t tid;
            uint64_t context;
            uint64_t module;
            uint64_t function;
            uint64_t functionEntry;
            CuDim3   gridDim;
            CuDim3   blockDim;
            CUDBGKernelType type;
            uint64_t parentGridId;
            uint64_t gridId64;
            CUDBGKernelOrigin origin;
        } kernelReady;
        struct kernelFinished55_st {
            uint32_t dev;
            uint32_t gridId;
            uint32_t tid;
            uint64_t context;
            uint64_t module;
            uint64_t function;
            uint64_t functionEntry;
            uint64_t gridId64;
        } kernelFinished;
        struct contextPush55_st {
            uint32_t dev;
            uint32_t tid;
            uint64_t context;
        } contextPush;
        struct contextPop55_st {
            uint32_t dev;
            uint32_t tid;
            uint64_t context;
        } contextPop;
        struct contextCreate55_st {
            uint32_t dev;
            uint32_t tid;
            uint64_t context;
        } contextCreate;
        struct contextDestroy55_st {
            uint32_t dev;
            uint32_t tid;
            uint64_t context;
        } contextDestroy;
        struct internalError55_st {
            CUDBGResult errorType;
        } internalError;
    } cases;
} CUDBGEvent55;

#pragma pack(push,1)
typedef struct {
    CUDBGEventKind kind;
    union cases_st {
        struct elfImageLoaded_st {
            uint32_t dev;
            uint64_t context;
            uint64_t module;
            uint64_t size;
            uint64_t handle;
            uint32_t properties;
        } elfImageLoaded;
        struct elfImageUnloaded_st {
            uint32_t dev;
            uint64_t context;
            uint64_t module;
            uint64_t size;
            uint64_t handle;
        } elfImageUnloaded;
        struct kernelReady_st{
            uint32_t dev;
            uint32_t tid;
            uint64_t gridId;
            uint64_t context;
            uint64_t module;
            uint64_t function;
            uint64_t functionEntry;
            CuDim3   gridDim;
            CuDim3   blockDim;
            CUDBGKernelType type;
            uint64_t parentGridId;
            CUDBGKernelOrigin origin;
        } kernelReady;
        struct kernelFinished_st {
            uint32_t dev;
            uint32_t tid;
            uint64_t context;
            uint64_t module;
            uint64_t function;
            uint64_t functionEntry;
            uint64_t gridId;
        } kernelFinished;
        struct contextPush_st {
            uint32_t dev;
            uint32_t tid;
            uint64_t context;
        } contextPush;
        struct contextPop_st {
            uint32_t dev;
            uint32_t tid;
            uint64_t context;
        } contextPop;
        struct contextCreate_st {
            uint32_t dev;
            uint32_t tid;
            uint64_t context;
        } contextCreate;
        struct contextDestroy_st {
            uint32_t dev;
            uint32_t tid;
            uint64_t context;
        } contextDestroy;
        struct internalError_st {
            CUDBGResult errorType;
        } internalError;
        struct functionsLoaded_st {
            uint32_t dev;
            uint32_t count;
            uint64_t context;
            uint64_t module;
        } functionsLoaded;
        struct allDevicesSuspended_st {
            /* This mask has bits set for devices with any warps that hit a breakpoint */
            uint64_t brokenDevicesMask;
            /* This mask has bits set for devices with any warps that hit an exception */
            uint64_t faultedDevicesMask;
        } allDevicesSuspended;
    } cases;
} CUDBGEvent;
#pragma pack(pop)

typedef struct {
    uint32_t tid;
} CUDBGEventCallbackData40;

typedef struct {
    uint32_t tid;
    uint32_t timeout;
} CUDBGEventCallbackData;

#pragma pack(push,1)
typedef struct {
    uint32_t dev;
    uint64_t gridId64;
    uint32_t tid;
    uint64_t context;
    uint64_t module;
    uint64_t function;
    uint64_t functionEntry;
    CuDim3   gridDim;
    CuDim3   blockDim;
    CUDBGKernelType type;
    uint64_t parentGridId;
    CUDBGKernelOrigin origin;
} CUDBGGridInfo55;

typedef struct {
    uint32_t dev;
    uint64_t gridId64;
    uint32_t tid;
    uint64_t context;
    uint64_t module;
    uint64_t function;
    uint64_t functionEntry;
    CuDim3   gridDim;
    CuDim3   blockDim;
    CUDBGKernelType type;
    uint64_t parentGridId;
    CUDBGKernelOrigin origin;
    CuDim3   clusterDim;
} CUDBGGridInfo120;

typedef struct {
    uint32_t dev;
    uint64_t gridId64;
    uint32_t tid;
    uint64_t context;
    uint64_t module;
    uint64_t function;
    uint64_t functionEntry;
    CuDim3   gridDim;
    CuDim3   blockDim;
    CUDBGKernelType type;
    uint64_t parentGridId;
    CUDBGKernelOrigin origin;
    CuDim3   clusterDim;
    CuDim3   preferredClusterDim;
} CUDBGGridInfo;
#pragma pack(pop)

#pragma pack(push,1)
typedef struct {
    uint64_t sectionIndex;
    uint64_t address;
} CUDBGLoadedFunctionInfo;
#pragma pack(pop)

typedef void (*CUDBGNotifyNewEventCallback31)(void *data);
typedef void (*CUDBGNotifyNewEventCallback40)(CUDBGEventCallbackData40 *data);
typedef void (*CUDBGNotifyNewEventCallback)(CUDBGEventCallbackData *data);

/*-------------------------------- Exceptions ------------------------------*/

typedef enum {
    CUDBG_EXCEPTION_UNKNOWN = 0xFFFFFFFFU, // Force sizeof(CUDBGException_t)==4
    CUDBG_EXCEPTION_NONE = 0,
    CUDBG_EXCEPTION_LANE_ILLEGAL_ADDRESS = 1,
    CUDBG_EXCEPTION_LANE_USER_STACK_OVERFLOW = 2,
    CUDBG_EXCEPTION_DEVICE_HARDWARE_STACK_OVERFLOW = 3,
    CUDBG_EXCEPTION_WARP_ILLEGAL_INSTRUCTION = 4,
    CUDBG_EXCEPTION_WARP_OUT_OF_RANGE_ADDRESS = 5,
    CUDBG_EXCEPTION_WARP_MISALIGNED_ADDRESS = 6,
    CUDBG_EXCEPTION_WARP_INVALID_ADDRESS_SPACE = 7,
    CUDBG_EXCEPTION_WARP_INVALID_PC = 8,
    CUDBG_EXCEPTION_WARP_HARDWARE_STACK_OVERFLOW = 9,
    CUDBG_EXCEPTION_DEVICE_ILLEGAL_ADDRESS = 10,
    CUDBG_EXCEPTION_LANE_MISALIGNED_ADDRESS = 11,
    CUDBG_EXCEPTION_WARP_ASSERT = 12,
    CUDBG_EXCEPTION_LANE_SYSCALL_ERROR = 13,
    CUDBG_EXCEPTION_WARP_ILLEGAL_ADDRESS = 14,
    CUDBG_EXCEPTION_LANE_NONMIGRATABLE_ATOMSYS = 15,
    CUDBG_EXCEPTION_LANE_INVALID_ATOMSYS = 16,
    CUDBG_EXCEPTION_CLUSTER_OUT_OF_RANGE_ADDRESS = 17,
    CUDBG_EXCEPTION_CLUSTER_BLOCK_NOT_PRESENT = 18,
    CUDBG_EXCEPTION_WARP_STACK_CANARY = 19,
} CUDBGException_t;

typedef enum {
    CUDBG_UVM_MEMORY_ACCESS_TYPE_UNKNOWN  = 0xFFFFFFFFU,
    CUDBG_UVM_MEMORY_ACCESS_TYPE_INVALID  = 0,
    CUDBG_UVM_MEMORY_ACCESS_TYPE_READ     = 1,
    CUDBG_UVM_MEMORY_ACCESS_TYPE_WRITE    = 2,
    CUDBG_UVM_MEMORY_ACCESS_TYPE_ATOMIC   = 3,
    CUDBG_UVM_MEMORY_ACCESS_TYPE_PREFETCH = 4,
} CUDBGUvmMemoryAccessType_t;

typedef enum {
    CUDBG_UVM_FAULT_TYPE_UNKNOWN               =  0xFFFFFFFFU,
    CUDBG_UVM_FAULT_TYPE_INVALID               =  0,
    CUDBG_UVM_FAULT_TYPE_INVALID_PDE           =  1,
    CUDBG_UVM_FAULT_TYPE_INVALID_PTE           =  2,
    CUDBG_UVM_FAULT_TYPE_WRITE                 =  3,
    CUDBG_UVM_FAULT_TYPE_ATOMIC                =  4,
    CUDBG_UVM_FAULT_TYPE_INVALID_PDE_SIZE      =  5,
    CUDBG_UVM_FAULT_TYPE_LIMIT_VIOLATION       =  6,
    CUDBG_UVM_FAULT_TYPE_UNBOUND_INST_BLOCK    =  7,
    CUDBG_UVM_FAULT_TYPE_PRIV_VIOLATION        =  8,
    CUDBG_UVM_FAULT_TYPE_PITCH_MASK_VIOLATION  =  9,
    CUDBG_UVM_FAULT_TYPE_WORK_CREATION         = 10,
    CUDBG_UVM_FAULT_TYPE_UNSUPPORTED_APERTURE  = 11,
    CUDBG_UVM_FAULT_TYPE_COMPRESSION_FAILURE   = 12,
    CUDBG_UVM_FAULT_TYPE_UNSUPPORTED_KIND      = 13,
    CUDBG_UVM_FAULT_TYPE_REGION_VIOLATION      = 14,
    CUDBG_UVM_FAULT_TYPE_POISON                = 15,
} CUDBGUvmFaultType_t;

typedef enum {
    CUDBG_UVM_FATAL_REASON_UNKNOWN             = 0xFFFFFFFFU,
    CUDBG_UVM_FATAL_REASON_INVALID             = 0,
    CUDBG_UVM_FATAL_REASON_INVALID_ADDRESS     = 1,
    CUDBG_UVM_FATAL_REASON_INVALID_PERMISSIONS = 2,
    CUDBG_UVM_FATAL_REASON_INVALID_FAULT_TYPE  = 3,
    CUDBG_UVM_FATAL_REASON_OUT_OF_MEMORY       = 4,
    CUDBG_UVM_FATAL_REASON_INTERNAL_ERROR      = 5,
    CUDBG_UVM_FATAL_REASON_INVALID_OPERATION   = 6,
} CUDBGUvmFatalReason_t;

/*------------------------------ Warp State --------------------------------*/
#pragma pack(push,1)
typedef struct {
    uint64_t virtualPC;
    CuDim3 threadIdx;
    CUDBGException_t exception;
} CUDBGLaneState;

typedef struct {
    uint64_t gridId;
    uint64_t errorPC;
    CuDim3 blockIdx;
    uint32_t validLanes;
    uint32_t activeLanes;
    uint32_t errorPCValid;
    CUDBGLaneState lane[32];
} CUDBGWarpState60;

typedef struct {
    uint64_t gridId;
    uint64_t errorPC;
    CuDim3 blockIdx;
    uint32_t validLanes;
    uint32_t activeLanes;
    uint32_t errorPCValid;
    CUDBGLaneState lane[32];
    CuDim3 clusterIdx;
} CUDBGWarpState120;

typedef struct {
    uint64_t gridId;
    uint64_t errorPC;
    CuDim3 blockIdx;
    uint32_t validLanes;
    uint32_t activeLanes;
    uint32_t errorPCValid;
    CUDBGLaneState lane[32];
    CuDim3 clusterIdx;
    CuDim3 clusterDim;
    uint32_t clusterExceptionTargetBlockIdxValid;
    CuDim3 clusterExceptionTargetBlockIdx;
} CUDBGWarpState;

typedef struct {
    uint32_t sharedMemSize;
    uint32_t numRegisters;
} CUDBGWarpResources;
#pragma pack(pop)

#pragma pack(push,1)
typedef struct {
    uint64_t startAddress;
    uint64_t size;
} CUDBGMemoryInfo;
#pragma pack(pop)

/*----------------------- Batched device info support ----------------------*/

/* uint32_t sized enum */
typedef enum {
    /* Request state information for all valid SMs/Warps/Lanes */
    CUDBG_RESPONSE_TYPE_FULL,

    /* Request state information for all changed SMs/Warps/Lanes since the last call */
    CUDBG_RESPONSE_TYPE_UPDATE,

    /* Force sizeof(CUDBGDeviceInfoQueryType_t)==4 */
    CUDBG_RESPONSE_TYPE_UNKNOWN = 0xFFFFFFFFU,
} CUDBGDeviceInfoQueryType_t;

/* uint32_t sized enum */
typedef enum {
    /* Mask of updated SMs reported by this response
       Optional: Yes, assume all 1's if absent
       Size: Number of SMs-sized bitmask, rounded up to be divisible by 8 */
    CUDBG_DEVICE_ATTRIBUTE_SM_UPDATE_MASK       = 0,
    /* Mask of SMs with any valid warp
       Optional: No, always returned by the API
       Size: Number of SMs-sized bitmask, rounded up to be divisible by 8 */
    CUDBG_DEVICE_ATTRIBUTE_SM_ACTIVE_MASK       = 1,
    /* Mask of SMs with any warps with exceptions
       Optional: Yes, assume all 0's if absent
       Size: Number of SMs-sized bitmask, rounded up to be divisible by 8 */
    CUDBG_DEVICE_ATTRIBUTE_SM_EXCEPTION_MASK    = 2,

    CUDBG_DEVICE_ATTRIBUTE_COUNT                = 3,
} CUDBGDeviceInfoAttribute_t;

/* uint32_t sized enum */
typedef enum {
    /* Mask of updated warps reported by this response
       Optional: Yes, assume all 1's if absent
       Size: uint64_t */
    CUDBG_SM_ATTRIBUTE_WARP_UPDATE_MASK  = 0,

    CUDBG_SM_ATTRIBUTE_COUNT             = 1,
} CUDBGSMInfoAttribute_t;

/* uint32_t sized enum */
typedef enum {
    /* Mask of updated lanes reported by this response
       Optional: Yes, assume all 1's if absent
       Size: uint32_t */
    CUDBG_WARP_ATTRIBUTE_LANE_UPDATE_MASK                   = 0,
    /* Signals whether the attribute flags field is present on the lane level for this warp
       Optional: Yes, assume no lane attributes for this warp if absent
       Size: 0 (doesn't have an associated warp-level field) */
    CUDBG_WARP_ATTRIBUTE_LANE_ATTRIBUTES                    = 1,
    /* CUDBGException_t for this warp
       Optional: Yes, assume CUDBG_EXCEPTION_NONE if absent
       Size: uint32_t */
    CUDBG_WARP_ATTRIBUTE_EXCEPTION                          = 2,
    /* Error PC for this warp
       Optional: Yes, assume no error PC is available if absent
       Size: uint64_t */
    CUDBG_WARP_ATTRIBUTE_ERRORPC                            = 3,
    /* Cluster index for this warp
       Optional: Yes if warp is not in a cluster
       Size: CuDim3 */
    CUDBG_WARP_ATTRIBUTE_CLUSTERIDX                         = 4,
    /* Cluster dimensions for this warp
       Optional: Yes if warp is not in a cluster
       Size: CuDim3 */
    CUDBG_WARP_ATTRIBUTE_CLUSTERDIM                         = 5,
    /* For cluster exceptions, this represents the target block index handling
       cluster requests.
       Optional: Yes, assume no block index is available if absent
       Size: CuDim3 */
    CUDBG_WARP_ATTRIBUTE_CLUSTER_EXCEPTION_TARGET_BLOCK_IDX = 6,

    CUDBG_WARP_ATTRIBUTE_COUNT                              = 7,
} CUDBGWarpInfoAttribute_t;

/* uint32_t sized enum */
typedef enum {
    CUDBG_LANE_ATTRIBUTE_COUNT           = 0,
} CUDBGLaneInfoAttribute_t;

/* Sizes of the various structs returned by the batched device update APIs
   No explicit version field - implied by debugAPI major.minor.revision
*/
#pragma pack(push,1)
typedef struct {
    uint32_t requiredBufferSize;
    
    uint32_t deviceInfoSize;
    uint32_t deviceInfoAttributeSizes[32];

    uint32_t smInfoSize;
    uint32_t smInfoAttributeSizes[32];

    uint32_t warpInfoSize;
    uint32_t warpInfoAttributeSizes[32];
 
    uint32_t laneInfoSize;
    uint32_t laneInfoAttributeSizes[32];
} CUDBGDeviceInfoSizes;
#pragma pack(pop)

/* This is the first element in the deviceInfoBuffer, and is always present.
   getDeviceInfo() takes a deviceId as input, so no need to explicitly pass it back here
*/
#pragma pack(push,1)
typedef struct {
    CUDBGDeviceInfoQueryType_t responseType;

    /* Bitmask of CUDBGDeviceInfoAttribute_t enums for a Device */
    uint32_t deviceAttributeFlags;
} CUDBGDeviceInfo;
#pragma pack(pop)

/*
  Only "valid & updated" SMs/Warps/Lanes are included in the buffer, which allows us to determine
  indexes without having to encode an explicit ID field in the following buffer datastructures.
*/ 

/* Represents a SM */
#pragma pack(push,1)
typedef struct {
    uint64_t warpValidMask;
    uint64_t warpBrokenMask;

    /* Bitmask of CUDBGSmInfoAttribute_t enums for a SM */
    uint32_t smAttributeFlags;

    /* New elements are appended (but not added to the struct) */
} CUDBGSMInfo;
#pragma pack(pop)

/* Represents a Warp */
#pragma pack(push,1)
typedef struct {
    uint64_t gridId;

    CuDim3   blockIdx;
    CuDim3   baseThreadIdx;

    uint32_t validLanes;
    uint32_t activeLanes;

    /* Bitmask of CUDBGWarpInfoAttribute_t enums for warps and their lanes */
    uint32_t warpAttributeFlags;

    /* Optional fields based on the "warpAttributeFlags" bitmask */
} CUDBGWarpInfo;
#pragma pack(pop)
 
/* Represents a Lane */
#pragma pack(push,1)
typedef struct {
    uint64_t virtualPC;

    /* Optional: present only if CUDBG_WARP_ATTRIBUTE_LANE_ATTRIBUTES bit
       is set in CUDBGWarpInfo::warpAttributeFlags. Any additional data is
       appended here after this.

       uint32_t laneAttributeFlags;
     */
} CUDBGLaneInfo;
#pragma pack(pop)

/*----------------------- Coredump/snapshot support ------------------------*/

typedef enum {
    CUDBG_COREDUMP_DEFAULT_FLAGS                = 0,
    CUDBG_COREDUMP_SKIP_NONRELOCATED_ELF_IMAGES = (1 << 0),
    CUDBG_COREDUMP_SKIP_GLOBAL_MEMORY           = (1 << 1),
    CUDBG_COREDUMP_SKIP_SHARED_MEMORY           = (1 << 2),
    CUDBG_COREDUMP_SKIP_LOCAL_MEMORY            = (1 << 3),

    /* The value used to be SKIP_ABORT, but it's impossible to change this behavior.  */
    /* DEPRECATED_VALUE_DO_NOT_USE              = (1 << 4), */

    CUDBG_COREDUMP_SKIP_CONSTBANK_MEMORY        = (1 << 5),

    CUDBG_COREDUMP_LIGHTWEIGHT_FLAGS = CUDBG_COREDUMP_SKIP_NONRELOCATED_ELF_IMAGES
                                     | CUDBG_COREDUMP_SKIP_GLOBAL_MEMORY
                                     | CUDBG_COREDUMP_SKIP_SHARED_MEMORY
                                     | CUDBG_COREDUMP_SKIP_LOCAL_MEMORY
                                     | CUDBG_COREDUMP_SKIP_CONSTBANK_MEMORY
} CUDBGCoredumpGenerationFlags;

/*--------------------------------- Exports --------------------------------*/

typedef const struct CUDBGAPI_st *CUDBGAPI;

CUDBGResult cudbgGetAPI(uint32_t major, uint32_t minor, uint32_t rev, CUDBGAPI *api);
CUDBGResult cudbgGetAPIVersion(uint32_t *major, uint32_t *minor, uint32_t *rev);
CUDBGResult cudbgMain(int apiClientPid, uint32_t apiClientRevision, int sessionId, int attachState,
                      int attachEventInitialized, int writeFd, int detachFd, int attachStubInUse,
                      int enablePreemptionDebugging);
void cudbgApiInit(uint32_t arg);
void cudbgApiAttach(void);
void cudbgApiDetach(void);
void CUDBG_REPORT_DRIVER_API_ERROR(void);
void CUDBG_REPORT_DRIVER_INTERNAL_ERROR(void);

extern uint32_t CUDBG_IPC_FLAG_NAME;
extern uint32_t CUDBG_RPC_ENABLED;
extern uint32_t CUDBG_APICLIENT_PID;
extern uint32_t CUDBG_I_AM_DEBUGGER;
extern uint32_t CUDBG_DEBUGGER_INITIALIZED;
extern uint32_t CUDBG_APICLIENT_REVISION;
extern uint32_t CUDBG_SESSION_ID;
extern uint64_t CUDBG_REPORTED_DRIVER_API_ERROR_CODE;
extern uint64_t CUDBG_REPORTED_DRIVER_API_ERROR_FUNC_NAME_SIZE;
extern uint64_t CUDBG_REPORTED_DRIVER_API_ERROR_FUNC_NAME_ADDR;
extern uint32_t CUDBG_REPORTED_DRIVER_API_ERROR_SOURCE;
extern uint64_t CUDBG_REPORTED_DRIVER_API_ERROR_NAME_SIZE;
extern uint64_t CUDBG_REPORTED_DRIVER_API_ERROR_NAME_ADDR;
extern uint64_t CUDBG_REPORTED_DRIVER_API_ERROR_STRING_SIZE;
extern uint64_t CUDBG_REPORTED_DRIVER_API_ERROR_STRING_ADDR;
extern uint64_t CUDBG_REPORTED_DRIVER_INTERNAL_ERROR_CODE;
extern uint32_t CUDBG_ATTACH_HANDLER_AVAILABLE;
extern uint32_t CUDBG_ENABLE_LAUNCH_BLOCKING;
extern uint32_t CUDBG_ENABLE_PREEMPTION_DEBUGGING;
extern uint32_t CUDBG_RESUME_FOR_ATTACH_DETACH;
extern uint32_t CUDBG_REPORT_DRIVER_API_ERROR_FLAGS;
extern uint32_t CUDBG_DEBUGGER_CAPABILITIES;

/* Deprecated */
extern uint32_t CUDBG_DETACH_SUSPENDED_DEVICES_MASK;

/* Note this has no effect on virtual GPUs (such as NVIDIA GRID) */
extern uint32_t CUDBG_ENABLE_INTEGRATED_MEMCHECK;

struct CUDBGAPI_st {
    /* Initialization */
    CUDBGResult (*initialize)(void);
    CUDBGResult (*finalize)(void);

    /* Device Execution Control */
    CUDBGResult (*suspendDevice)(uint32_t dev);
    CUDBGResult (*resumeDevice)(uint32_t dev);
    CUDBGResult (*singleStepWarp40)(uint32_t dev, uint32_t sm, uint32_t wp);

    /* Breakpoints */
    CUDBGResult (*setBreakpoint31)(uint64_t addr);
    CUDBGResult (*unsetBreakpoint31)(uint64_t addr);

    /* Device State Inspection */
    CUDBGResult (*readGridId50)(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t *gridId);
    CUDBGResult (*readBlockIdx32)(uint32_t dev, uint32_t sm, uint32_t wp, CuDim2 *blockIdx);
    CUDBGResult (*readThreadIdx)(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, CuDim3 *threadIdx);
    CUDBGResult (*readBrokenWarps)(uint32_t dev, uint32_t sm, uint64_t *brokenWarpsMask);
    CUDBGResult (*readValidWarps)(uint32_t dev, uint32_t sm, uint64_t *validWarpsMask);
    CUDBGResult (*readValidLanes)(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t *validLanesMask);
    CUDBGResult (*readActiveLanes)(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t *activeLanesMask);
    CUDBGResult (*readCodeMemory)(uint32_t dev, uint64_t addr, void *buf, uint32_t sz);
    CUDBGResult (*readConstMemory)(uint32_t dev, uint64_t addr, void *buf, uint32_t sz);
    CUDBGResult (*readGlobalMemory31)(uint32_t dev, uint64_t addr, void *buf, uint32_t sz);
    CUDBGResult (*readParamMemory)(uint32_t dev, uint32_t sm, uint32_t wp, uint64_t addr, void *buf, uint32_t sz);
    CUDBGResult (*readSharedMemory)(uint32_t dev, uint32_t sm, uint32_t wp, uint64_t addr, void *buf, uint32_t sz);
    CUDBGResult (*readLocalMemory)(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint64_t addr, void *buf, uint32_t sz);
    CUDBGResult (*readRegister)(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t regno, uint32_t *val);
    CUDBGResult (*readPC)(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint64_t *pc);
    CUDBGResult (*readVirtualPC)(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint64_t *pc);
    CUDBGResult (*readLaneStatus)(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, bool *error);

    /* Device State Alteration */
    CUDBGResult (*writeGlobalMemory31)(uint32_t dev, uint64_t addr, const void *buf, uint32_t sz);
    CUDBGResult (*writeParamMemory)(uint32_t dev, uint32_t sm, uint32_t wp, uint64_t addr, const void *buf, uint32_t sz);
    CUDBGResult (*writeSharedMemory)(uint32_t dev, uint32_t sm, uint32_t wp, uint64_t addr, const void *buf, uint32_t sz);
    CUDBGResult (*writeLocalMemory)(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint64_t addr, const void *buf, uint32_t sz);
    CUDBGResult (*writeRegister)(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t regno, uint32_t val);

    /* Grid Properties */
    CUDBGResult (*getGridDim32)(uint32_t dev, uint32_t sm, uint32_t wp, CuDim2 *gridDim);
    CUDBGResult (*getBlockDim)(uint32_t dev, uint32_t sm, uint32_t wp, CuDim3 *blockDim);
    CUDBGResult (*getTID)(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t *tid);
    CUDBGResult (*getElfImage32)(uint32_t dev, uint32_t sm, uint32_t wp, bool relocated, void **elfImage, uint32_t *size);

    /* Device Properties */
    CUDBGResult (*getDeviceType)(uint32_t dev, char *buf, uint32_t sz);
    CUDBGResult (*getSmType)(uint32_t dev, char *buf, uint32_t sz);
    CUDBGResult (*getNumDevices)(uint32_t *numDev);
    CUDBGResult (*getNumSMs)(uint32_t dev, uint32_t *numSMs);
    CUDBGResult (*getNumWarps)(uint32_t dev, uint32_t *numWarps);
    CUDBGResult (*getNumLanes)(uint32_t dev, uint32_t *numLanes);
    CUDBGResult (*getNumRegisters)(uint32_t dev, uint32_t *numRegs);

    /* DWARF-related routines */
    CUDBGResult (*getPhysicalRegister30)(uint64_t pc, char *reg, uint32_t *buf, uint32_t sz, uint32_t *numPhysRegs, CUDBGRegClass *regClass);
    CUDBGResult (*disassemble)(uint32_t dev, uint64_t addr, uint32_t *instSize, char *buf, uint32_t sz);
    CUDBGResult (*isDeviceCodeAddress55)(uintptr_t addr, bool *isDeviceAddress);
    CUDBGResult (*lookupDeviceCodeSymbol)(char *symName, bool *symFound, uintptr_t *symAddr);

    /* Events */
    CUDBGResult (*setNotifyNewEventCallback31)(CUDBGNotifyNewEventCallback31 callback, void *data);
    CUDBGResult (*getNextEvent30)(CUDBGEvent30 *event);
    CUDBGResult (*acknowledgeEvent30)(CUDBGEvent30 *event);

    /* 3.1 Extensions */
    CUDBGResult (*getGridAttribute)(uint32_t dev, uint32_t sm, uint32_t wp, CUDBGAttribute attr, uint64_t *value);
    CUDBGResult (*getGridAttributes)(uint32_t dev, uint32_t sm, uint32_t wp, CUDBGAttributeValuePair *pairs, uint32_t numPairs);
    CUDBGResult (*getPhysicalRegister40)(uint32_t dev, uint32_t sm, uint32_t wp, uint64_t pc, char *reg, uint32_t *buf, uint32_t sz, uint32_t *numPhysRegs, CUDBGRegClass *regClass);
    CUDBGResult (*readLaneException)(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, CUDBGException_t *exception);
    CUDBGResult (*getNextEvent32)(CUDBGEvent32 *event);
    CUDBGResult (*acknowledgeEvents42)(void);

    /* 3.1 - ABI */
    CUDBGResult (*readCallDepth32)(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t *depth);
    CUDBGResult (*readReturnAddress32)(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t level, uint64_t *ra);
    CUDBGResult (*readVirtualReturnAddress32)(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t level, uint64_t *ra);

    /* 3.2 Extensions */
    CUDBGResult (*readGlobalMemory55)(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint64_t addr, void *buf, uint32_t sz);
    CUDBGResult (*writeGlobalMemory55)(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint64_t addr, const void *buf, uint32_t sz);
    CUDBGResult (*readPinnedMemory)(uint64_t addr, void *buf, uint32_t sz);
    CUDBGResult (*writePinnedMemory)(uint64_t addr, const void *buf, uint32_t sz);
    CUDBGResult (*setBreakpoint)(uint32_t dev, uint64_t addr);
    CUDBGResult (*unsetBreakpoint)(uint32_t dev, uint64_t addr);
    CUDBGResult (*setNotifyNewEventCallback40)(CUDBGNotifyNewEventCallback40 callback);

    /* 4.0 Extensions */
    CUDBGResult (*getNextEvent42)(CUDBGEvent42 *event);
    CUDBGResult (*readTextureMemory)(uint32_t devId, uint32_t vsm, uint32_t wp, uint32_t id, uint32_t dim, uint32_t *coords, void *buf, uint32_t sz);
    CUDBGResult (*readBlockIdx)(uint32_t dev, uint32_t sm, uint32_t wp, CuDim3 *blockIdx);
    CUDBGResult (*getGridDim)(uint32_t dev, uint32_t sm, uint32_t wp, CuDim3 *gridDim);
    CUDBGResult (*readCallDepth)(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t *depth);
    CUDBGResult (*readReturnAddress)(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t level, uint64_t *ra);
    CUDBGResult (*readVirtualReturnAddress)(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t level, uint64_t *ra);
    CUDBGResult (*getElfImage)(uint32_t dev, uint32_t sm, uint32_t wp, bool relocated, void **elfImage, uint64_t *size);

    /* 4.1 Extensions */
    CUDBGResult (*getHostAddrFromDeviceAddr)(uint32_t dev, uint64_t device_addr, uint64_t *host_addr);
    CUDBGResult (*singleStepWarp41)(uint32_t dev, uint32_t sm, uint32_t wp, uint64_t *warpMask);
    CUDBGResult (*setNotifyNewEventCallback)(CUDBGNotifyNewEventCallback callback);
    CUDBGResult (*readSyscallCallDepth)(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t *depth);

    /* 4.2 Extensions */
    CUDBGResult (*readTextureMemoryBindless)(uint32_t devId, uint32_t vsm, uint32_t wp, uint32_t texSymtabIndex, uint32_t dim, uint32_t *coords, void *buf, uint32_t sz);

    /* 5.0 Extensions */
    CUDBGResult (*clearAttachState)(void);
    CUDBGResult (*getNextSyncEvent50)(CUDBGEvent50 *event);
    CUDBGResult (*memcheckReadErrorAddress)(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint64_t *address, ptxStorageKind *storage);
    CUDBGResult (*acknowledgeSyncEvents)(void);
    CUDBGResult (*getNextAsyncEvent50)(CUDBGEvent50 *event);
    CUDBGResult (*requestCleanupOnDetach55)(void);
    CUDBGResult (*initializeAttachStub)(void);
    CUDBGResult (*getGridStatus50)(uint32_t dev, uint32_t gridId, CUDBGGridStatus *status);

    /* 5.5 Extensions */
    CUDBGResult (*getNextSyncEvent55)(CUDBGEvent55 *event);
    CUDBGResult (*getNextAsyncEvent55)(CUDBGEvent55 *event);
    CUDBGResult (*getGridInfo55)(uint32_t dev, uint64_t gridId64, CUDBGGridInfo55 *gridInfo);
    CUDBGResult (*readGridId)(uint32_t dev, uint32_t sm, uint32_t wp, uint64_t *gridId64);
    CUDBGResult (*getGridStatus)(uint32_t dev, uint64_t gridId64, CUDBGGridStatus *status);
    CUDBGResult (*setKernelLaunchNotificationMode)(CUDBGKernelLaunchNotifyMode mode);
    CUDBGResult (*getDevicePCIBusInfo)(uint32_t devId, uint32_t *pciBusId, uint32_t *pciDevId);
    CUDBGResult (*readDeviceExceptionState80)(uint32_t devId, uint64_t *exceptionSMMask);

   /* 6.0 Extensions */
    CUDBGResult (*getAdjustedCodeAddress)(uint32_t devId, uint64_t address, uint64_t *adjustedAddress, CUDBGAdjAddrAction adjAction);
    CUDBGResult (*readErrorPC)(uint32_t devId, uint32_t sm, uint32_t wp, uint64_t *errorPC, bool *errorPCValid);
    CUDBGResult (*getNextEvent)(CUDBGEventQueueType type, CUDBGEvent  *event);
    CUDBGResult (*getElfImageByHandle)(uint32_t devId, uint64_t handle, CUDBGElfImageType type, void *elfImage, uint64_t size);
    CUDBGResult (*resumeWarpsUntilPC)(uint32_t devId, uint32_t sm, uint64_t warpMask, uint64_t virtPC);
    CUDBGResult (*readWarpState60)(uint32_t devId, uint32_t sm, uint32_t wp, CUDBGWarpState60 *state);
    CUDBGResult (*readRegisterRange)(uint32_t devId, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t index, uint32_t registers_size, uint32_t *registers);
    CUDBGResult (*readGenericMemory)(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint64_t addr, void *buf, uint32_t sz);
    CUDBGResult (*writeGenericMemory)(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint64_t addr, const void *buf, uint32_t sz);
    CUDBGResult (*readGlobalMemory)(uint64_t addr, void *buf, uint32_t sz);
    CUDBGResult (*writeGlobalMemory)(uint64_t addr, const void *buf, uint32_t sz);
    CUDBGResult (*getManagedMemoryRegionInfo)(uint64_t startAddress, CUDBGMemoryInfo *memoryInfo, uint32_t memoryInfo_size, uint32_t *numEntries);
    CUDBGResult (*isDeviceCodeAddress)(uintptr_t addr, bool *isDeviceAddress);
    CUDBGResult (*requestCleanupOnDetach)(uint32_t appResumeFlag);

   /* 6.5 Extensions */
    CUDBGResult (*readPredicates)(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t predicates_size, uint32_t *predicates);
    CUDBGResult (*writePredicates)(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t predicates_size, const uint32_t *predicates);
    CUDBGResult (*getNumPredicates)(uint32_t dev, uint32_t *numPredicates);
    CUDBGResult (*readCCRegister)(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t *val);
    CUDBGResult (*writeCCRegister)(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t val);

    CUDBGResult (*getDeviceName)(uint32_t dev, char *buf, uint32_t sz);
    CUDBGResult (*singleStepWarp65)(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t nsteps, uint64_t *warpMask);

    /* 9.0 Extensions */
    CUDBGResult (*readDeviceExceptionState)(uint32_t devId, uint64_t *mask, uint32_t numWords);

    /* 10.0 Extensions */
    CUDBGResult (*getNumUniformRegisters)(uint32_t dev, uint32_t *numRegs);
    CUDBGResult (*readUniformRegisterRange)(uint32_t devId, uint32_t sm, uint32_t wp, uint32_t regno, uint32_t registers_size, uint32_t *registers);
    CUDBGResult (*writeUniformRegister)(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t regno, uint32_t val);
    CUDBGResult (*getNumUniformPredicates)(uint32_t dev, uint32_t *numPredicates);
    CUDBGResult (*readUniformPredicates)(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t predicates_size, uint32_t *predicates);
    CUDBGResult (*writeUniformPredicates)(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t predicates_size, const uint32_t *predicates);

    /* 11.8 Extensions */
    CUDBGResult (*getLoadedFunctionInfo118)(uint32_t devId, uint64_t handle, CUDBGLoadedFunctionInfo *info, uint32_t numEntries);

    /* 12.0 Extensions */
    CUDBGResult (*getGridInfo120)(uint32_t dev, uint64_t gridId64, CUDBGGridInfo120 *gridInfo);
    CUDBGResult (*getClusterDim120)(uint32_t dev, uint64_t gridId64, CuDim3 *clusterDim);
    CUDBGResult (*readWarpState120)(uint32_t dev, uint32_t sm, uint32_t wp, CUDBGWarpState120 *state);
    CUDBGResult (*readClusterIdx)(uint32_t dev, uint32_t sm, uint32_t wp, CuDim3 *clusterIdx);

    /* 12.2 Extensions */
    CUDBGResult (*getErrorStringEx)(char *buf, uint32_t bufSz, uint32_t *msgSz);

    /* 12.3 Extensions */
    CUDBGResult (*getLoadedFunctionInfo)(uint32_t devId, uint64_t handle, CUDBGLoadedFunctionInfo *info, uint32_t startIndex, uint32_t numEntries);
    CUDBGResult (*generateCoredump)(const char* filename, CUDBGCoredumpGenerationFlags flags);
    CUDBGResult (*getConstBankAddress123)(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t bank, uint32_t offset, uint64_t* address);

    /* 12.4 Extensions */
    CUDBGResult (*getDeviceInfoSizes)(uint32_t dev, CUDBGDeviceInfoSizes* sizes);
    CUDBGResult (*getDeviceInfo)(uint32_t dev, CUDBGDeviceInfoQueryType_t type, void *buffer, uint32_t length, uint32_t *dataLength);
    CUDBGResult (*getConstBankAddress)(uint32_t dev, uint64_t gridId64, uint32_t bank, uint64_t* address, uint32_t* size);
    CUDBGResult (*singleStepWarp)(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t laneHint, uint32_t nsteps, uint32_t flags, uint64_t *warpMask);

    /* 12.5 Extensions */
    CUDBGResult (*readAllVirtualReturnAddresses)(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint64_t *addrs, uint32_t numAddrs, uint32_t* callDepth, uint32_t* syscallCallDepth);
    CUDBGResult (*getSupportedDebuggerCapabilities)(CUDBGCapabilityFlags* capabilities);
    CUDBGResult (*readSmException)(uint32_t dev, uint32_t sm, CUDBGException_t *exception, uint64_t *errorPC, bool *errorPCValid);

    /* 12.6 Extensions */
    CUDBGResult (*executeInternalCommand)(const char* command, char* resultBuffer, uint32_t sizeInBytes);

    /* 12.7 Extensions */
    CUDBGResult (*getGridInfo)(uint32_t dev, uint64_t gridId64, CUDBGGridInfo *gridInfo);
    CUDBGResult (*getClusterDim)(uint32_t dev, uint32_t sm, uint32_t wp, CuDim3 *clusterDim);
    CUDBGResult (*readWarpState)(uint32_t dev, uint32_t sm, uint32_t wp, CUDBGWarpState *state);
    CUDBGResult (*getClusterExceptionTargetBlock)(uint32_t dev, uint32_t sm, uint32_t wp, CuDim3 *blockIdx, bool *blockIdxValid);

    /* 12.8 Extensions */
    CUDBGResult (*readWarpResources)(uint32_t dev, uint32_t sm, uint32_t wp, CUDBGWarpResources *resources);
};

#ifdef __cplusplus
}
#endif

#endif
