Identifier: "LaunchStats" DisplayName: "Launch Statistics" Description: "Summary of the configuration used to launch the kernel. The launch configuration defines the size of the kernel grid, the division of the grid into blocks, and the GPU resources needed to execute the kernel. Choosing an efficient launch configuration maximizes device utilization." Order: 70 Sets { Identifier: "basic" } Sets { Identifier: "detailed" } Sets { Identifier: "full" } Filter { CollectionFilter { CollectionScopes: CollectionScope_Launch CollectionScopes: CollectionScope_Range CollectionScopes: CollectionScope_Cmdlist } } Header { Metrics { Label: "Grid Size" Name: "launch__grid_size" } Metrics { Label: "Function Cache Configuration" Name: "launch__func_cache_config" Filter { CollectionFilter { CollectionScopes: CollectionScope_Launch CollectionScopes: CollectionScope_Range } } } Metrics { Label: "Cluster Size" Name: "launch__cluster_size" Filter { MinArch: CC_90 CollectionFilter { CollectionScopes: CollectionScope_Launch CollectionScopes: CollectionScope_Range } } } Metrics { Label: "Registers Per Thread" Name: "launch__registers_per_thread" } Metrics { Label: "Cluster Scheduling Policy" Name: "launch__cluster_scheduling_policy" Filter { MinArch: CC_90 CollectionFilter { CollectionScopes: CollectionScope_Launch CollectionScopes: CollectionScope_Range } } } Metrics { Label: "Static Shared Memory Per Block" Name: "launch__shared_mem_per_block_static" Filter { CollectionFilter { CollectionScopes: CollectionScope_Launch } } } Metrics { Label: "Block Size" Name: "launch__block_size" } Metrics { Label: "Dynamic Shared Memory Per Block" Name: "launch__shared_mem_per_block_dynamic" Filter { CollectionFilter { CollectionScopes: CollectionScope_Launch } } } Metrics { Label: "Threads" Name: "launch__thread_count" } Metrics { Label: "Driver Shared Memory Per Block" Name: "launch__shared_mem_per_block_driver" Filter { CollectionFilter { CollectionScopes: CollectionScope_Launch } } } Metrics { Label: "Waves Per SM" Name: "launch__waves_per_multiprocessor" Filter { CollectionFilter { CollectionScopes: CollectionScope_Launch } } } Metrics { Label: "Shared Memory Configuration Size" Name: "launch__shared_mem_config_size" } Metrics { Label: "Uses Green Context" Name: "launch__uses_green_context" } Metrics { Label: "Stack Size" Name: "launch__stack_size" } Metrics { Label: "# SMs" Name: "launch__sm_count" } Metrics { Label: "# TPCs" Name: "launch__tpc_count" } Metrics { Label: "Enabled TPC IDs" Name: "launch__tpc_enabled" } } Metrics { Metrics { Label: "Achieved Occupancy" Name: "sm__warps_active.avg.pct_of_peak_sustained_active" } Metrics { Label: "Theoretical Occupancy" Name: "sm__maximum_warps_per_active_cycle_pct" Filter { CollectionFilter { CollectionScopes: CollectionScope_Launch } } } } Body { Items { Filter { MinArch: CC_100 } SuffixTable { Label: "WorkID Info" Suffixes { Suffix { Label: "Average" Name: ".avg" } Suffix { Label: "Min" Name: ".min" } Suffix { Label: "Max" Name: ".max" } Suffix { Label: "Sum" Name: ".sum" } } BaseNames { BaseName { Label: "WorkId Requests" Name: "gr__workids_requested" } BaseName { Label: "WorkId Requests Granted" Name: "gr__workids_granted" } BaseName { Label: "WorkId Requests Granted as CTAs" Name: "gr__workids_granted_as_ctas" } } } } } #endif