NVRrj Linux3#86~20.04.2-Ubuntu SMP Mon Jul 17 23:27:17 UTC 2023"AMD Ryzen 7 3700X 8-Core Processor"x86_640"(2 127.0.0.1 "(t2w 127.0.0.1e/usr/local/nsight-compute-2023.3.1/extras/samples/uncoalescedGlobalAccesses/uncoalescedGlobalAccesses :"=ELF3uVV@@.shstrtab.strtab.symtab.symtab_shndx.nv.info.text._Z14addConstDoubleiPddS_.nv.info._Z14addConstDoubleiPddS_.nv.shared._Z14addConstDoubleiPddS_.nv.constant0._Z14addConstDoubleiPddS_.text._Z15addConstDouble3iP7double3dS0_.nv.info._Z15addConstDouble3iP7double3dS0_.nv.shared._Z15addConstDouble3iP7double3dS0_.nv.constant0._Z15addConstDouble3iP7double3dS0_.debug_frame.debug_line.rel.debug_line.nv_debug_line_sass.rel.nv_debug_line_sass.nv_debug_ptx_txt.rel.debug_frame.rela.debug_frame.nv.rel.action.shstrtab.strtab.symtab.symtab_shndx.nv.info_Z14addConstDoubleiPddS_.text._Z14addConstDoubleiPddS_.nv.info._Z14addConstDoubleiPddS_.nv.shared._Z14addConstDoubleiPddS_.nv.constant0._Z14addConstDoubleiPddS__param_Z15addConstDouble3iP7double3dS0_.text._Z15addConstDouble3iP7double3dS0_.nv.info._Z15addConstDouble3iP7double3dS0_.nv.shared._Z15addConstDouble3iP7double3dS0_.nv.constant0._Z15addConstDouble3iP7double3dS0_.debug_frame.debug_line.rel.debug_line.nv_debug_line_sass.rel.nv_debug_line_sass.nv_debug_ptx_txt.rel.debug_frame.rela.debug_frame.nv.rel.actionK: 2(| (((0p ((| (((0p (4 /usr/local/nsight-compute-2023.3.1/extras/samples/uncoalescedGlobalAccessesuncoalescedGlobalAccesses.cuҸާ5 >0  10     g gt .version 7.7.target sm_86.address_size 64.visible .entry _Z15addConstDouble3iP7double3dS0_(.param .u32 _Z15addConstDouble3iP7double3dS0__param_0,.param .u64 _Z15addConstDouble3iP7double3dS0__param_1,.param .f64 _Z15addConstDouble3iP7double3dS0__param_2,.param .u64 _Z15addConstDouble3iP7double3dS0__param_3){.reg .pred %p<2>;.reg .b32 %r<6>;.reg .f64 %fd<8>;.reg .b64 %rd<8>;ld.param.u32 %r2, [_Z15addConstDouble3iP7double3dS0__param_0];ld.param.u64 %rd1, [_Z15addConstDouble3iP7double3dS0__param_1];ld.param.f64 %fd1, [_Z15addConstDouble3iP7double3dS0__param_2];ld.param.u64 %rd2, [_Z15addConstDouble3iP7double3dS0__param_3];mov.u32 %r3, %ntid.x;mov.u32 %r4, %ctaid.x;mov.u32 %r5, %tid.x;mad.lo.s32 %r1, %r4, %r3, %r5;setp.ge.s32 %p1, %r1, %r2;@%p1 bra $L__BB0_2;cvta.to.global.u64 %rd3, %rd1;mul.wide.s32 %rd4, %r1, 24;add.s64 %rd5, %rd3, %rd4;ld.global.f64 %fd2, [%rd5];add.f64 %fd3, %fd2, %fd1;ld.global.f64 %fd4, [%rd5+8];add.f64 %fd5, %fd4, %fd1;ld.global.f64 %fd6, [%rd5+16];add.f64 %fd7, %fd6, %fd1;cvta.to.global.u64 %rd6, %rd2;add.s64 %rd7, %rd6, %rd4;st.global.f64 [%rd7], %fd3;st.global.f64 [%rd7+8], %fd5;st.global.f64 [%rd7+16], %fd7;$L__BB0_2:ret;}.visible .entry _Z14addConstDoubleiPddS_(.param .u32 _Z14addConstDoubleiPddS__param_0,.param .u64 _Z14addConstDoubleiPddS__param_1,.param .f64 _Z14addConstDoubleiPddS__param_2,.param .u64 _Z14addConstDoubleiPddS__param_3){.reg .pred %p<2>;.reg .b32 %r<6>;.reg .f64 %fd<4>;.reg .b64 %rd<8>;ld.param.u32 %r2, [_Z14addConstDoubleiPddS__param_0];ld.param.u64 %rd1, [_Z14addConstDoubleiPddS__param_1];ld.param.f64 %fd1, [_Z14addConstDoubleiPddS__param_2];ld.param.u64 %rd2, [_Z14addConstDoubleiPddS__param_3];mov.u32 %r3, %ctaid.x;mov.u32 %r4, %ntid.x;mov.u32 %r5, %tid.x;mad.lo.s32 %r1, %r3, %r4, %r5;setp.ge.s32 %p1, %r1, %r2;@%p1 bra $L__BB1_2;cvta.to.global.u64 %rd3, %rd1;mul.wide.s32 %rd4, %r1, 8;add.s64 %rd5, %rd3, %rd4;ld.global.f64 %fd2, [%rd5];add.f64 %fd3, %fd2, %fd1;cvta.to.global.u64 %rd6, %rd2;add.s64 %rd7, %rd6, %rd4;st.global.f64 [%rd7], %fd3;$L__BB1_2:ret;}/ #   / #   7u5 `   ! ! ! P7u5 `   ! ! ! P0K /" (08 (08 (08,   M   H z y%(y!$$z zXpbM xzF %vZy %v^)v\Ny MyGyyyyyyyyyyz y %(y!$$z  z XpbM x zF %v Z y y y "%v ^ )v\N)v\Fy  )v\y  /y  MyGyyyyyyyyyyyy@ GI  n{s)p`Qplp`l p      >2  (0 8B6 6 h/usr/local/nsight-compute-2023.3.1/extras/samples/uncoalescedGlobalAccesses/uncoalescedGlobalAccesses.cu5/* * Copyright (c) 2022-2023, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * Sample CUDA application for uncoalesced global memory accesses. * Adds a floating point constant to an input array of double3 elements in * global memory and generates an output array of double3 in global memory. */ #include #include #define BLOCK_SIZE 256 #define RUNTIME_API_CALL(apiFuncCall) \ do { \ cudaError_t _status = apiFuncCall; \ if (_status != cudaSuccess) { \ fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \ __FILE__, __LINE__, #apiFuncCall, cudaGetErrorString(_status));\ exit(EXIT_FAILURE); \ } \ } while (0) __global__ void addConstDouble3(int numElements, double3 *d_in, double k, double3 *d_out) { int index = blockIdx.x * blockDim.x + threadIdx.x; if (index < numElements) { double3 a = d_in[index]; a.x += k; a.y += k; a.z += k; d_out[index] = a; } } __global__ void addConstDouble(int numElements, double *d_in, double k, double *d_out) { int index = blockIdx.x * blockDim.x + threadIdx.x; if (index < numElements) { d_out[index] = d_in[index] + k; } } int main (int argc, char *argv[]) { // Error code to check return values for CUDA calls cudaError_t err = cudaSuccess; double constK = 10.0; int kernelOption = 0; if (argc > 1) { kernelOption = atoi(argv[1]); } int numElements = 1024*1024; if (argc > 2) { numElements = atoi(argv[2]); if (numElements <= 0) { fprintf(stderr, "Invalid number of elements(%s), should be a positive number\n", argv[2]); exit(EXIT_FAILURE); } } printf("double3 constant addition of %d elements\n", numElements); printf("kernelOption=%d\n", kernelOption); size_t size = numElements * sizeof(double3); // Allocate the host input array double3 *h_A = (double3 *)malloc(size); // Allocate the host output array double3 *h_B = (double3 *)malloc(size); // Verify that allocations succeeded if (h_A == NULL || h_B == NULL) { fprintf(stderr, "Failed to allocate host arrays!\n"); exit(EXIT_FAILURE); } // Initialize the host input vectors for (int i = 0; i < numElements; ++i) { h_A[i].x = rand()/(double)RAND_MAX; h_A[i].y = rand()/(double)RAND_MAX; h_A[i].z = rand()/(double)RAND_MAX; } // Allocate the device input array A double3 *d_A = NULL; RUNTIME_API_CALL(cudaMalloc((void **)&d_A, size)); // Allocate the device output array B double3 *d_B = NULL; RUNTIME_API_CALL(cudaMalloc((void **)&d_B, size)); // Copy the host input array A in host memory to the device input array in device memory RUNTIME_API_CALL(cudaMemcpy(d_A, h_A, size, cudaMemcpyHostToDevice)); // Launch the CUDA Kernel int threadsPerBlock = BLOCK_SIZE; if (kernelOption == 0) { int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; printf("CUDA kernel addConstDouble3 launch with %d blocks of %d threads\n", blocksPerGrid, threadsPerBlock); addConstDouble3<<>>(numElements, d_A, constK, d_B); } else if (kernelOption == 1) { int blocksPerGrid =(numElements*3 + threadsPerBlock - 1) / threadsPerBlock; printf("CUDA kernel addConstDouble launch with %d blocks of %d threads\n", blocksPerGrid, threadsPerBlock); addConstDouble<<>>(numElements*3, (double *)d_A, constK, (double *)d_B); } else { fprintf(stderr, "** Invalid kernel option %d\n", kernelOption); exit(EXIT_FAILURE); } err = cudaGetLastError(); if (err != cudaSuccess) { fprintf(stderr, "Failed to launch kernel (error code %s)!\n", cudaGetErrorString(err)); exit(EXIT_FAILURE); } // Copy the device result array in device memory to the host result vector // in host memory. RUNTIME_API_CALL(cudaMemcpy(h_B, d_B, size, cudaMemcpyDeviceToHost)); // Verify that the result vector is correct for (int i = 0; i < numElements; ++i) { if ((fabs(h_A[i].x + constK - h_B[i].x) > 1e-5) || (fabs(h_A[i].y + constK - h_B[i].y) > 1e-5) || (fabs(h_A[i].z + constK - h_B[i].z) > 1e-5)) { fprintf(stderr, "Result verification failed at element %d!\n", i); exit(EXIT_FAILURE); } } // Free device global memory RUNTIME_API_CALL(cudaFree(d_A)); RUNTIME_API_CALL(cudaFree(d_B)); // Free host memory free(h_A); free(h_B); printf("Done\n"); return 0; } J/ - !_Z15addConstDouble3iP7double3dS0_ %" launch__grid_size launch__grid_dim_x launch__grid_dim_y launch__grid_dim_z launch__block_size launch__block_dim_x launch__block_dim_y launch__block_dim_z launch__thread_count #launch__shared_mem_per_block_static $launch__shared_mem_per_block_dynamic #launch__shared_mem_per_block_driver launch__shared_mem_per_block launch__shared_mem_config_size launch__registers_per_thread launch__function_pcs launch__uses_cdp launch__device_id launch__context_id launch__func_cache_config launch__stream_id &sm__maximum_warps_per_active_cycle_pct &sm__maximum_warps_avg_per_active_cycle launch__waves_per_multiprocessor &launch__registers_per_thread_allocated &launch__shared_mem_per_block_allocated !launch__occupancy_limit_registers "launch__occupancy_limit_shared_mem launch__occupancy_limit_warps launch__occupancy_limit_blocks $launch__occupancy_per_register_count launch__occupancy_per_block_size %launch__occupancy_per_shared_mem_size device__attribute_architecture device__attribute_chip device__attribute_device_index 'device__attribute_limits_max_cta_per_sm 'device__attribute_max_gpu_frequency_khz 'device__attribute_max_mem_frequency_khz device__attribute_fb_bus_width device__attribute_fbp_count 3device__attribute_num_schedulers_per_multiprocessor ,device__attribute_num_tex_per_multiprocessor !device__attribute_num_l2s_per_fbp *device__attribute_max_registers_per_thread )device__attribute_max_warps_per_scheduler .device__attribute_max_warps_per_multiprocessor 'device__attribute_max_ipc_per_scheduler ,device__attribute_max_ipc_per_multiprocessor device__attribute_l2s_count device__attribute_implementation device__attribute_sass_level -device__attribute_confidential_computing_mode 'device__attribute_max_threads_per_block !device__attribute_max_block_dim_x !device__attribute_max_block_dim_y !device__attribute_max_block_dim_z device__attribute_max_grid_dim_x device__attribute_max_grid_dim_y device__attribute_max_grid_dim_z -device__attribute_max_shared_memory_per_block 'device__attribute_total_constant_memory device__attribute_warp_size device__attribute_max_pitch )device__attribute_max_registers_per_block device__attribute_clock_rate #device__attribute_texture_alignment device__attribute_gpu_overlap &device__attribute_multiprocessor_count %device__attribute_kernel_exec_timeout device__attribute_integrated %device__attribute_can_map_host_memory device__attribute_compute_mode )device__attribute_maximum_texture1d_width )device__attribute_maximum_texture2d_width *device__attribute_maximum_texture2d_height )device__attribute_maximum_texture3d_width *device__attribute_maximum_texture3d_height )device__attribute_maximum_texture3d_depth 1device__attribute_maximum_texture2d_layered_width 2device__attribute_maximum_texture2d_layered_height 2device__attribute_maximum_texture2d_layered_layers #device__attribute_surface_alignment $device__attribute_concurrent_kernels device__attribute_ecc_enabled device__attribute_pci_bus_id device__attribute_pci_device_id device__attribute_tcc_driver #device__attribute_memory_clock_rate )device__attribute_global_memory_bus_width device__attribute_l2_cache_size 0device__attribute_max_threads_per_multiprocessor $device__attribute_async_engine_count $device__attribute_unified_addressing 1device__attribute_maximum_texture1d_layered_width 2device__attribute_maximum_texture1d_layered_layers "device__attribute_can_tex2d_gather 0device__attribute_maximum_texture2d_gather_width 1device__attribute_maximum_texture2d_gather_height 3device__attribute_maximum_texture3d_width_alternate 4device__attribute_maximum_texture3d_height_alternate 3device__attribute_maximum_texture3d_depth_alternate device__attribute_pci_domain_id )device__attribute_texture_pitch_alignment .device__attribute_maximum_texturecubemap_width 6device__attribute_maximum_texturecubemap_layered_width 7device__attribute_maximum_texturecubemap_layered_layers )device__attribute_maximum_surface1d_width )device__attribute_maximum_surface2d_width *device__attribute_maximum_surface2d_height )device__attribute_maximum_surface3d_width *device__attribute_maximum_surface3d_height )device__attribute_maximum_surface3d_depth 1device__attribute_maximum_surface1d_layered_width 2device__attribute_maximum_surface1d_layered_layers 1device__attribute_maximum_surface2d_layered_width 2device__attribute_maximum_surface2d_layered_height 2device__attribute_maximum_surface2d_layered_layers .device__attribute_maximum_surfacecubemap_width 6device__attribute_maximum_surfacecubemap_layered_width 7device__attribute_maximum_surfacecubemap_layered_layers 0device__attribute_maximum_texture1d_linear_width 0device__attribute_maximum_texture2d_linear_width 1device__attribute_maximum_texture2d_linear_height 0device__attribute_maximum_texture2d_linear_pitch 3device__attribute_maximum_texture2d_mipmapped_width 4device__attribute_maximum_texture2d_mipmapped_height 3device__attribute_maximum_texture1d_mipmapped_width -device__attribute_stream_priorities_supported +device__attribute_global_l1_cache_supported *device__attribute_local_l1_cache_supported 6device__attribute_max_shared_memory_per_multiprocessor 2device__attribute_max_registers_per_multiprocessor device__attribute_managed_memory !device__attribute_multi_gpu_board *device__attribute_multi_gpu_board_group_id .device__attribute_host_native_atomic_supported 7device__attribute_single_to_double_precision_perf_ratio (device__attribute_pageable_memory_access +device__attribute_concurrent_managed_access .device__attribute_compute_preemption_supported 9device__attribute_can_use_host_pointer_for_registered_mem $device__attribute_cooperative_launch 1device__attribute_cooperative_multi_device_launch 3device__attribute_max_shared_memory_per_block_optin )device__attribute_can_flush_remote_writes )device__attribute_host_register_supported >device__attribute_pageable_memory_access_uses_host_page_tables 5device__attribute_direct_managed_mem_access_from_host 6device__attribute_virtual_address_management_supported =device__attribute_handle_type_posix_file_descriptor_supported 4device__attribute_handle_type_win32_handle_supported 8device__attribute_handle_type_win32_kmt_handle_supported /device__attribute_max_blocks_per_multiprocessor /device__attribute_generic_compression_supported .device__attribute_max_persisting_l2_cache_size /device__attribute_max_access_policy_window_size 9device__attribute_gpu_direct_rdma_with_cuda_vmm_supported 2device__attribute_reserved_shared_memory_per_block -device__attribute_sparse_cuda_array_supported (device__attribute_memory_pools_supported +device__attribute_gpu_direct_rdma_supported 6device__attribute_gpu_direct_rdma_flush_writes_options 1device__attribute_gpu_direct_rdma_writes_ordering 0device__attribute_mempool_supported_handle_types device__attribute_cluster_launch 7device__attribute_deferred_mapping_cuda_array_supported %device__attribute_ipc_event_supported +device__attribute_can_use_stream_mem_ops_v1 2device__attribute_can_use_64_bit_stream_mem_ops_v1 2device__attribute_can_use_stream_wait_value_nor_v1 /device__attribute_can_use_64_bit_stream_mem_ops /device__attribute_can_use_stream_wait_value_nor #device__attribute_dma_buf_supported 'device__attribute_mem_sync_domain_count -device__attribute_tensor_map_access_supported +device__attribute_unified_function_pointers device__attribute_numa_config %device__attribute_multicast_supported device__attribute_mps_enabled device__attribute_host_numa_id device__attribute_display_name *device__attribute_compute_capability_major *device__attribute_compute_capability_minor device__attribute_total_memory device__attribute_ram_type device__attribute_ram_location #device__attribute_gpu_pci_device_id 'device__attribute_gpu_pci_sub_system_id %device__attribute_gpu_pci_revision_id 'device__attribute_gpu_pci_ext_device_id !device__attribute_gpu_pci_ext_gen %device__attribute_gpu_pci_ext_gpu_gen +device__attribute_gpu_pci_ext_gpu_link_rate ,device__attribute_gpu_pci_ext_gpu_link_width 2device__attribute_gpu_pci_ext_downstream_link_rate 3device__attribute_gpu_pci_ext_downstream_link_width (smsp__maximum_warps_avg_per_active_cycle sass__inst_executed_per_opcode 6sass__inst_executed_per_opcode_with_modifier_selective 0sass__inst_executed_per_opcode_with_modifier_all *sass__thread_inst_executed_true_per_opcode Bsass__thread_inst_executed_true_per_opcode_with_modifier_selective smsp__pcsamp_warps_issue_stalled_math_pipe_throttle_not_issued 'smsp__pcsamp_warps_issue_stalled_membar 2smsp__pcsamp_warps_issue_stalled_membar_not_issued -smsp__pcsamp_warps_issue_stalled_mio_throttle 8smsp__pcsamp_warps_issue_stalled_mio_throttle_not_issued %smsp__pcsamp_warps_issue_stalled_misc 0smsp__pcsamp_warps_issue_stalled_misc_not_issued 0smsp__pcsamp_warps_issue_stalled_no_instructions ;smsp__pcsamp_warps_issue_stalled_no_instructions_not_issued -smsp__pcsamp_warps_issue_stalled_not_selected 8smsp__pcsamp_warps_issue_stalled_not_selected_not_issued )smsp__pcsamp_warps_issue_stalled_selected 4smsp__pcsamp_warps_issue_stalled_selected_not_issued 1smsp__pcsamp_warps_issue_stalled_short_scoreboard smsp__sass_inst_executed_memdesc_explicit_hitprop_evict_normal Esmsp__sass_inst_executed_memdesc_explicit_hitprop_evict_normal_demote >smsp__sass_inst_executed_memdesc_explicit_missprop_evict_first ?smsp__sass_inst_executed_memdesc_explicit_missprop_evict_normal Jbreakdown:gpu__compute_memory_throughput.avg.pct_of_peak_sustained_elapsed :breakdown:sm__throughput.avg.pct_of_peak_sustained_elapsed group:memory__chart group:memory__dram_table %group:memory__first_level_cache_table )group:memory__l2_cache_evict_policy_table group:memory__l2_cache_table group:memory__shared_table %group:smsp__pcsamp_warp_stall_reasons 0group:smsp__pcsamp_warp_stall_reasons_not_issued !profiler__perfworks_session_reuse smsp__pcsamp_dropped_bytes smsp__pcsamp_buffer_size_bytes smsp__pcsamp_interval smsp__pcsamp_interval_cycles smsp__pcsamp_aggregated_passes #profiler__pmsampler_dropped_samples "profiler__pmsampler_merged_samples %profiler__pmsampler_buffer_size_bytes !profiler__pmsampler_interval_time profiler__pmsampler_pass_groups profiler__pmsampler_ctxsw_0 profiler__pmsampler_ctxsw_1 profiler__pmsampler_ctxsw_2 profiler__pmsampler_ctxsw_3 profiler__pmsampler_ctxsw_4 profiler__pmsampler_ctxsw_5 profiler__pmsampler_ctxsw_6 profiler__replayer_passes %profiler__replayer_passes_type_warmup +profiler__replayer_bytes_mem_accessible.min +profiler__replayer_bytes_mem_accessible.max +profiler__replayer_bytes_mem_accessible.sum +profiler__replayer_bytes_mem_accessible.avg *profiler__replayer_bytes_mem_backed_up.min *profiler__replayer_bytes_mem_backed_up.max *profiler__replayer_bytes_mem_backed_up.sum *profiler__replayer_bytes_mem_backed_up.avg dram__bytes.sum.peak_sustained dram__bytes.sum.per_second dram__bytes_read.sum 2dram__bytes_read.sum.pct_of_peak_sustained_elapsed dram__bytes_read.sum.per_second dram__bytes_write.sum 3dram__bytes_write.sum.pct_of_peak_sustained_elapsed dram__bytes_write.sum.per_second 5dram__cycles_active.avg.pct_of_peak_sustained_elapsed #dram__cycles_elapsed.avg.per_second dram__sectors_read.sum dram__sectors_write.sum 4fbpa__dram_sectors.avg.pct_of_peak_sustained_elapsed "gpc__cycles_elapsed.avg.per_second gpc__cycles_elapsed.max Ggpu__compute_memory_access_throughput.avg.pct_of_peak_sustained_elapsed Hgpu__compute_memory_request_throughput.avg.pct_of_peak_sustained_elapsed @gpu__compute_memory_throughput.avg.pct_of_peak_sustained_elapsed 6gpu__dram_throughput.avg.pct_of_peak_sustained_elapsed gpu__time_duration.sum lts__t_sectors_srcunit_tex_evict_normal_demote_lookup_miss.sum 6lts__t_sectors_srcunit_tex_evict_normal_lookup_hit.sum 7lts__t_sectors_srcunit_tex_evict_normal_lookup_miss.sum )lts__t_sectors_srcunit_tex_lookup_hit.sum *lts__t_sectors_srcunit_tex_lookup_miss.sum Llts__t_sectors_srcunit_tex_op_atom_dot_alu.avg.pct_of_peak_sustained_elapsed =lts__t_sectors_srcunit_tex_op_atom_dot_alu.avg.peak_sustained @lts__t_sectors_srcunit_tex_op_atom_dot_alu.avg.per_cycle_elapsed .lts__t_sectors_srcunit_tex_op_atom_dot_alu.sum 9lts__t_sectors_srcunit_tex_op_atom_dot_alu.sum.per_second 9lts__t_sectors_srcunit_tex_op_atom_dot_alu_lookup_hit.sum :lts__t_sectors_srcunit_tex_op_atom_dot_alu_lookup_miss.sum Llts__t_sectors_srcunit_tex_op_atom_dot_cas.avg.pct_of_peak_sustained_elapsed =lts__t_sectors_srcunit_tex_op_atom_dot_cas.avg.peak_sustained @lts__t_sectors_srcunit_tex_op_atom_dot_cas.avg.per_cycle_elapsed .lts__t_sectors_srcunit_tex_op_atom_dot_cas.sum 9lts__t_sectors_srcunit_tex_op_atom_dot_cas.sum.per_second 9lts__t_sectors_srcunit_tex_op_atom_dot_cas_lookup_hit.sum :lts__t_sectors_srcunit_tex_op_atom_dot_cas_lookup_miss.sum =lts__t_sectors_srcunit_tex_op_atom_evict_first_lookup_hit.sum >lts__t_sectors_srcunit_tex_op_atom_evict_first_lookup_miss.sum lts__t_sectors_srcunit_tex_op_atom_evict_normal_lookup_hit.sum ?lts__t_sectors_srcunit_tex_op_atom_evict_normal_lookup_miss.sum Dlts__t_sectors_srcunit_tex_op_read.avg.pct_of_peak_sustained_elapsed 5lts__t_sectors_srcunit_tex_op_read.avg.peak_sustained 8lts__t_sectors_srcunit_tex_op_read.avg.per_cycle_elapsed <s__t_sectors_srcunit_tex_op_read.sum 1lts__t_sectors_srcunit_tex_op_read.sum.per_second =lts__t_sectors_srcunit_tex_op_read_evict_first_lookup_hit.sum >lts__t_sectors_srcunit_tex_op_read_evict_first_lookup_miss.sum lts__t_sectors_srcunit_tex_op_read_evict_normal_lookup_hit.sum ?lts__t_sectors_srcunit_tex_op_read_evict_normal_lookup_miss.sum 1lts__t_sectors_srcunit_tex_op_read_lookup_hit.sum 2lts__t_sectors_srcunit_tex_op_read_lookup_miss.sum Clts__t_sectors_srcunit_tex_op_red.avg.pct_of_peak_sustained_elapsed 4lts__t_sectors_srcunit_tex_op_red.avg.peak_sustained 7lts__t_sectors_srcunit_tex_op_red.avg.per_cycle_elapsed %lts__t_sectors_srcunit_tex_op_red.sum 0lts__t_sectors_srcunit_tex_op_red.sum.per_second 0lts__t_sectors_srcunit_tex_op_red_lookup_hit.sum 1lts__t_sectors_srcunit_tex_op_red_lookup_miss.sum Elts__t_sectors_srcunit_tex_op_write.avg.pct_of_peak_sustained_elapsed 6lts__t_sectors_srcunit_tex_op_write.avg.peak_sustained 9lts__t_sectors_srcunit_tex_op_write.avg.per_cycle_elapsed 'lts__t_sectors_srcunit_tex_op_write.sum 2lts__t_sectors_srcunit_tex_op_write.sum.per_second >lts__t_sectors_srcunit_tex_op_write_evict_first_lookup_hit.sum ?lts__t_sectors_srcunit_tex_op_write_evict_first_lookup_miss.sum =lts__t_sectors_srcunit_tex_op_write_evict_last_lookup_hit.sum >lts__t_sectors_srcunit_tex_op_write_evict_last_lookup_miss.sum Flts__t_sectors_srcunit_tex_op_write_evict_normal_demote_lookup_hit.sum Glts__t_sectors_srcunit_tex_op_write_evict_normal_demote_lookup_miss.sum ?lts__t_sectors_srcunit_tex_op_write_evict_normal_lookup_hit.sum @lts__t_sectors_srcunit_tex_op_write_evict_normal_lookup_miss.sum 2lts__t_sectors_srcunit_tex_op_write_lookup_hit.sum 3lts__t_sectors_srcunit_tex_op_write_lookup_miss.sum 5lts__t_tag_requests.avg.pct_of_peak_sustained_elapsed 1lts__throughput.avg.pct_of_peak_sustained_elapsed =lts__xbar2lts_cycles_active.avg.pct_of_peak_sustained_elapsed pcie__read_bytes.sum.per_second pcie__write_bytes.sum.per_second sm__cycles_active.avg !sm__cycles_elapsed.avg.per_second 3sm__inst_executed.avg.pct_of_peak_sustained_elapsed &sm__inst_executed.avg.per_cycle_active 'sm__inst_executed.avg.per_cycle_elapsed ;sm__inst_executed_pipe_adu.avg.pct_of_peak_sustained_active sm__pipe_tensor_cycles_active.avg.pct_of_peak_sustained_active ?sm__pipe_tensor_cycles_active.avg.pct_of_peak_sustained_elapsed Fsm__pipe_tensor_op_hmma_cycles_active.avg.pct_of_peak_sustained_active Fsm__pipe_tensor_op_imma_cycles_active.avg.pct_of_peak_sustained_active 1sm__sass_inst_executed_op_ldgsts_cache_access.sum 1sm__sass_inst_executed_op_ldgsts_cache_bypass.sum Nsm__sass_l1tex_data_bytes_write_pipe_lsu_mem_shared_op_ldgsts_cache_access.sum lsm__sass_l1tex_data_bytes_write_pipe_lsu_mem_shared_op_ldgsts_cache_access.sum.pct_of_peak_sustained_elapsed Ysm__sass_l1tex_data_bytes_write_pipe_lsu_mem_shared_op_ldgsts_cache_access.sum.per_second Lsm__sass_l1tex_m_xbar2l1tex_read_bytes_mem_global_op_ldgsts_cache_bypass.sum jsm__sass_l1tex_m_xbar2l1tex_read_bytes_mem_global_op_ldgsts_cache_bypass.sum.pct_of_peak_sustained_elapsed Wsm__sass_l1tex_m_xbar2l1tex_read_bytes_mem_global_op_ldgsts_cache_bypass.sum.per_second ;sm__sass_l1tex_t_requests_pipe_lsu_mem_global_op_ldgsts.sum Ysm__sass_l1tex_t_requests_pipe_lsu_mem_global_op_ldgsts.sum.pct_of_peak_sustained_elapsed Hsm__sass_l1tex_t_requests_pipe_lsu_mem_global_op_ldgsts_cache_access.sum Hsm__sass_l1tex_t_requests_pipe_lsu_mem_global_op_ldgsts_cache_bypass.sum Gsm__sass_l1tex_t_sectors_pipe_lsu_mem_global_op_ldgsts_cache_access.sum esm__sass_l1tex_t_sectors_pipe_lsu_mem_global_op_ldgsts_cache_access.sum.pct_of_peak_sustained_elapsed Gsm__sass_l1tex_t_sectors_pipe_lsu_mem_global_op_ldgsts_cache_bypass.sum esm__sass_l1tex_t_sectors_pipe_lsu_mem_global_op_ldgsts_cache_bypass.sum.pct_of_peak_sustained_elapsed @sm__sass_thread_inst_executed_op_dfma_pred_on.sum.peak_sustained @sm__sass_thread_inst_executed_op_ffma_pred_on.sum.peak_sustained 0sm__throughput.avg.pct_of_peak_sustained_elapsed 1sm__warps_active.avg.pct_of_peak_sustained_active %sm__warps_active.avg.per_cycle_active 0smsp__average_warp_latency_per_inst_issued.ratio 2smsp__average_warps_active_per_inst_executed.ratio @smsp__average_warps_issue_stalled_barrier_per_issue_active.ratio Ismsp__average_warps_issue_stalled_branch_resolving_per_issue_active.ratio Gsmsp__average_warps_issue_stalled_dispatch_stall_per_issue_active.ratio >smsp__average_warps_issue_stalled_drain_per_issue_active.ratio Asmsp__average_warps_issue_stalled_imc_miss_per_issue_active.ratio Dsmsp__average_warps_issue_stalled_lg_throttle_per_issue_active.ratio Hsmsp__average_warps_issue_stalled_long_scoreboard_per_issue_active.ratio Ksmsp__average_warps_issue_stalled_math_pipe_throttle_per_issue_active.ratio ?smsp__average_warps_issue_stalled_membar_per_issue_active.ratio Esmsp__average_warps_issue_stalled_mio_throttle_per_issue_active.ratio =smsp__average_warps_issue_stalled_misc_per_issue_active.ratio Gsmsp__average_warps_issue_stalled_no_instruction_per_issue_active.ratio Esmsp__average_warps_issue_stalled_not_selected_per_issue_active.ratio Asmsp__average_warps_issue_stalled_selected_per_issue_active.ratio Ismsp__average_warps_issue_stalled_short_scoreboard_per_issue_active.ratio Asmsp__average_warps_issue_stalled_sleeping_per_issue_active.ratio Esmsp__average_warps_issue_stalled_tex_throttle_per_issue_active.ratio =smsp__average_warps_issue_stalled_wait_per_issue_active.ratio #smsp__cycles_elapsed.avg.per_second smsp__cycles_elapsed.sum smsp__inst_executed.avg smsp__inst_executed.sum !smsp__inst_executed_op_branch.sum /smsp__inst_executed_op_generic_atom_dot_alu.sum Msmsp__inst_executed_op_generic_atom_dot_alu.sum.pct_of_peak_sustained_elapsed /smsp__inst_executed_op_generic_atom_dot_cas.sum Msmsp__inst_executed_op_generic_atom_dot_cas.sum.pct_of_peak_sustained_elapsed %smsp__inst_executed_op_global_red.sum Csmsp__inst_executed_op_global_red.sum.pct_of_peak_sustained_elapsed !smsp__inst_executed_op_ldgsts.sum ?smsp__inst_executed_op_ldgsts.sum.pct_of_peak_sustained_elapsed smsp__inst_executed_op_ldsm.sum =smsp__inst_executed_op_ldsm.sum.pct_of_peak_sustained_elapsed &smsp__inst_executed_op_shared_atom.sum Dsmsp__inst_executed_op_shared_atom.sum.pct_of_peak_sustained_elapsed /smsp__inst_executed_op_surface_atom_dot_alu.sum Msmsp__inst_executed_op_surface_atom_dot_alu.sum.pct_of_peak_sustained_elapsed /smsp__inst_executed_op_surface_atom_dot_cas.sum Msmsp__inst_executed_op_surface_atom_dot_cas.sum.pct_of_peak_sustained_elapsed %smsp__inst_executed_op_surface_ld.sum Csmsp__inst_executed_op_surface_ld.sum.pct_of_peak_sustained_elapsed &smsp__inst_executed_op_surface_red.sum Dsmsp__inst_executed_op_surface_red.sum.pct_of_peak_sustained_elapsed %smsp__inst_executed_op_surface_st.sum Csmsp__inst_executed_op_surface_st.sum.pct_of_peak_sustained_elapsed "smsp__inst_executed_op_texture.sum @smsp__inst_executed_op_texture.sum.pct_of_peak_sustained_elapsed smsp__inst_issued.avg smsp__inst_issued.sum 3smsp__issue_active.avg.pct_of_peak_sustained_active 'smsp__issue_active.avg.per_cycle_active 2smsp__issue_inst0.avg.pct_of_peak_sustained_active 5smsp__sass_average_branch_targets_threads_uniform.pct /smsp__sass_branch_targets_threads_divergent.avg )smsp__sass_inst_executed_op_global_ld.sum )smsp__sass_inst_executed_op_global_st.sum (smsp__sass_inst_executed_op_local_ld.sum (smsp__sass_inst_executed_op_local_st.sum +smsp__sass_inst_executed_op_memory_128b.sum *smsp__sass_inst_executed_op_memory_16b.sum *smsp__sass_inst_executed_op_memory_32b.sum *smsp__sass_inst_executed_op_memory_64b.sum )smsp__sass_inst_executed_op_memory_8b.sum )smsp__sass_inst_executed_op_shared_ld.sum )smsp__sass_inst_executed_op_shared_st.sum Fsmsp__sass_l1tex_data_bank_conflicts_pipe_lsu_mem_shared_op_ldgsts.sum Bsmsp__sass_l1tex_data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum Bsmsp__sass_l1tex_data_pipe_lsu_wavefronts_mem_shared_op_ldgsts.sum `smsp__sass_l1tex_data_pipe_lsu_wavefronts_mem_shared_op_ldgsts.sum.pct_of_peak_sustained_elapsed Osmsp__sass_l1tex_data_pipe_lsu_wavefronts_mem_shared_op_ldgsts_cache_access.sum msmsp__sass_l1tex_data_pipe_lsu_wavefronts_mem_shared_op_ldgsts_cache_access.sum.pct_of_peak_sustained_elapsed >smsp__sass_l1tex_data_pipe_lsu_wavefronts_mem_shared_op_st.sum \smsp__sass_l1tex_data_pipe_lsu_wavefronts_mem_shared_op_st.sum.pct_of_peak_sustained_elapsed Psmsp__sass_l1tex_m_xbar2l1tex_read_sectors_mem_global_op_ldgsts_cache_bypass.sum nsmsp__sass_l1tex_m_xbar2l1tex_read_sectors_mem_global_op_ldgsts_cache_bypass.sum.pct_of_peak_sustained_elapsed Esmsp__sass_thread_inst_executed_op_dadd_pred_on.sum.per_cycle_elapsed Esmsp__sass_thread_inst_executed_op_dfma_pred_on.sum.per_cycle_elapsed Esmsp__sass_thread_inst_executed_op_dmul_pred_on.sum.per_cycle_elapsed Esmsp__sass_thread_inst_executed_op_fadd_pred_on.sum.per_cycle_elapsed Esmsp__sass_thread_inst_executed_op_ffma_pred_on.sum.per_cycle_elapsed Esmsp__sass_thread_inst_executed_op_fmul_pred_on.sum.per_cycle_elapsed 2smsp__thread_inst_executed_per_inst_executed.ratio :smsp__thread_inst_executed_pred_on_per_inst_executed.ratio %smsp__warps_active.avg.peak_sustained 'smsp__warps_active.avg.per_cycle_active )smsp__warps_eligible.avg.per_cycle_active sass__inst_executed_global_loads !sass__inst_executed_global_stores sass__inst_executed_local_loads sass__inst_executed_local_stores sass__inst_executed_shared_loads !sass__inst_executed_shared_stores GFBSP.TriageSCG.dramc__read_throughput.avg.pct_of_peak_sustained_elapsed BFBSP.TriageSCG.dramc__throughput.avg.pct_of_peak_sustained_elapsed HFBSP.TriageSCG.dramc__write_throughput.avg.pct_of_peak_sustained_elapsed .FE_B.TriageAC.gr__ctas_launched_queue_sync.sum 8TriageAC.tpc__warps_active_realtime.avg.per_cycle_active 8TriageAC.tpc__warps_active_realtime.sum.per_cycle_active 9LTS.TriageSCG.lts__average_t_sector_hit_rate_realtime.pct ?LTS.TriageSCG.lts__throughput.avg.pct_of_peak_sustained_elapsed 1SM_A.TriageAC.l1tex__data_pipe_lsu_wavefronts.avg -SM_A.TriageAC.l1tex__lsu_writeback_active.avg BSM_A.TriageSCG.l1tex__throughput.avg.pct_of_peak_sustained_elapsed #SM_A.TriageAC.sm__cycles_active.avg =SM_A.TriageAC.sm__inst_executed_realtime.avg.per_cycle_active TSM_A.TriageSCG.sm__inst_executed_pipe_alu_realtime.avg.pct_of_peak_sustained_elapsed RSM_C.TriageSCG.smsp__inst_executed_pipe_fmaheavy.avg.pct_of_peak_sustained_elapsed QSM_C.TriageSCG.smsp__inst_executed_pipe_fmalite.avg.pct_of_peak_sustained_elapsed :TriageSCG.sm__throughput.avg.pct_of_peak_sustained_elapsed Spmsampling:sm__pipe_tensor_cycles_active_realtime.avg.pct_of_peak_sustained_elapsed *SM_B.TriageAC.l1tex__t_sector_hit_rate.pct derived__avg_thread_executed !derived__avg_thread_executed_true (derived__memory_l1_conflicts_shared_nway .derived__memory_l1_wavefronts_shared_excessive 7derived__memory_l2_theoretical_sectors_global_excessive 9derived__sm__sass_thread_inst_executed_op_dfma_pred_on_x2 9derived__sm__sass_thread_inst_executed_op_ffma_pred_on_x2 *derived__smsp__inst_executed_op_branch_pct ;derived__smsp__sass_thread_inst_executed_op_dfma_pred_on_x2 ;derived__smsp__sass_thread_inst_executed_op_ffma_pred_on_x2( 2w 127.0.0.1e/usr/local/nsight-compute-2023.3.1/extras/samples/uncoalescedGlobalAccesses/uncoalescedGlobalAccessesqQܓ ɖ*!_Z15addConstDouble3iP7double3dS0_2addConstDouble3:2addConstDouble3(int, double3 *, double, double3 *)@JR Zj @ j @ j ? j ? j p@ j p@ j ? j ? j 0A j   j   j  @ j  @ j  @ j 0@ j t(B (( j  j  j ? j CachePreferNone j @ j Y@ j H@ j aa(@ j @ j @ j 0@ j  @ j @ j 0@ j p@ ((0 ((0 ((0 ((0 ((0 ((0 ((0 ((0 ((0 ( (0 ( (0 ( (0 ( (0 ( (0 ((0 ((0 ((0 ((0 ((0 ((0 ((0 ((0 ((0 ((0 ((0 ((0 ((0 ((0 ((0 ((0 ((0 ((0 ( (0 (!(0 ("(0 (#(0 ($(0 (%(0 (&(0 ('(0 (((0 ()(( (*(( (+(( (,(( (-(( (.(( (/(( (0(( (1(  (2(  (3(  (4(  (5(  (6(  (7(  (8(  (9(  (:(  (;(  (<(  (=(  (>(  (?(  (@(  (A( (B( (C( (D( (E( (F( (G( (H( (I( (J( (K( (L( (M( (N( (O( (P( (Q( (R( (S( (T( (U( (V( (W( (X( (Y( (Z( ([( (\( (]( (^( (_( (`( (a( (b( (c( (d( (e( (f( (g( (h( (i( (j( (k( (l( (m( (n( (o( (p( (q( (r( (s( (t( (u( (v( (w( (x( (y( (z( ({( (|( (}( (~( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( j D@ (( (( ( ( (0( (@( (P( (`( (p(  ((  ((( ((( ((0 ((0 ((* ((* ((0 ((0 ((- ((- ((( ((( ((, ((, ((0 ((0 ((' ((' ((* ((* ((- ((- ((0 ((0 ((" ((" (($ (($ ((& ((& ((( ((( ((* ((* ((, ((, ((. ((. ((0 ((0 (( (( (( (( (( (( (( (( (( (( (( (( (( (( ((  (( j  @ ((0 ((0 ((0 ( (( ((  (( (( (( ( ( ($( ((( (,( (0( (4( (8( (<( (@( (D( (H( (L( (P( (T( (X( (\( (`( (d( (h( (l( (p( (t( (x( (|( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( (( j !( j "( j#( j$( j %(d j &( j '( j(( j)( j*( j+( j ,( j-( j.(0 j/( j0( j1(( j 2( j3( j 4 No-CC j 5( j 6( j 7( j8(@ j 9( j :( j ;( j <( j =( j>( j ?( j @( j A(d j B( jC( jD(8 jE( jF( jG( jH( j I( j J( j K( j L( j M( j N( j O( j P( j Q( j R( jS( jT( jU(. jV( jW( j X( j Y( j Z( j [( j\( j]( j ^( j _( j`( j a( j b( j c(@ j d(@ j e( jf( jg( j h( j i( j j( j k( j l( j m( j n( j o( j p( j q( j r( j s( j t( j u( j v( j w( j x( j y( j z( j {( j |( j }( j ~( j ( j ( j ( j ( j ( j ( j ( j ( j ( j ( j ( j ( j ( j ( j ( j ( j ( j ( j ( j ( j ( j ( j ( j ( j ( j ( j ( j ( j ( j (? j ( j ( j ( j ( j ( j ( j ( j ( j ( j ( j ( j ( j ( j ( j ( j ( j ( j ( j ( j ( j ( j ( j ( j ( j NVIDIA RTX A4500 j ( j ( j (M j ( j ( j (ޡȑ j (ޡ j ( j (D j ( j ( j (} j ( j (} j ( j (@ j $A  STG(  LDG(  IMAD(  DADD(  S2R(  MOV(  EXIT(  ULDC(  ISETP( j $A  STG(  LDG(  IMAD(  DADD(  S2R(  MOV(  EXIT(  ULDC(  ISETP( j $A STG.E.64( LDG.E.64(  DADD(  S2R(  MOV( IMAD.WIDE(  EXIT( ULDC.64(  ISETP.GE.AND(  IMAD( j sA  STG(  LDG(  IMAD(  DADD(  S2R(  MOV(  ULDC(@  ISETP(@  EXIT(@ j sA  STG(  LDG(  IMAD(  DADD(  S2R(  MOV(  ULDC(@  ISETP(@  EXIT(@ j sA STG.E.64( LDG.E.64(  DADD(  S2R(  MOV( IMAD.WIDE( ULDC.64(@  ISETP.GE.AND(@  IMAD(@  EXIT(@ j  j  j  j  j  j  j  j  j  j  j  j  j  j8( ( NVIDIA RTX A4500 ( Quadro P620 j&  (Z( (\( j& @ (Z( (\( j ( j ( j$  (( (( j&  (Z( (\( j8( ( Quadro P620 ( NVIDIA RTX A4500 j%( ( 0-15 ( 0-15 j( ( 0 ( 0 j( ( 0 ( 0 j(( (( (( (( (( (( (Ф( (ग( (𤗙( (( (( (( (( (( (Х( (ॗ( (𥗙( (( (( (( (( ji( ((@ ((@ ((@ (𥗙(@ ((@ ((@ ji( (( (( (( (𥗙( (( (( jw(H ((  ((  ((  (𥗙(  ((  (( j ( j ( jx( ((0 ((0 ((0 (𥗙(0 ((0 ((0 jw(` (( (( (( (𥗙( (( (( j ( ji( (( (( (( (𥗙( (( (( j ( ji( (( (( (( (𥗙( (( (( j(  ((@ ((@ ((@ ((@ ((@ (Ф(@ (ग(@ (𤗙(@ ((@ ((@ ((@ ((@ ((@ (Х(@ (ॗ(@ (𥗙(@ ((@ ((@ ((@ ((@ j(  ((@ ((@ ((@ ((@ ((@ (Ф( (ग(@ (𤗙(@ ((@ ((@ ((@ ((@ ((@ (Х(@ (ॗ(@ (𥗙(@ ((@ ((@ ((@ ((@ j(& ((( (( (( (( (( (Ф(  (ग(  (( (( ((f ((` (( (Х(  (ॗ(/ (𥗙( (( (( (( (( (( j( (( (( (( (( (( (Ф( (ग( (( (( (( (( (( (Х( (ॗ( (𥗙( (( (( (( (( (( j( (( (( (( (( (( (Ф( (ग( (( (( (( (( (( (Х( (ॗ( (𥗙( (( (( (( (( (( j( (( (( (( (( (( (Ф( (ग( (( (( (( (( (( (Х( (ॗ( (𥗙( (( (( (( (( (( j( (( (( (( (( (( (Ф( (ग( (( (( (( (( (( (Х( (ॗ( (𥗙( (( (( (( (( (( j( (( (( (( (( (( (Ф( (ग( (( (( (( (( (( (Х( (ॗ( (𥗙( (( (( (( (( (( j( (( (( (( (( (( (Ф( (ग( (( (( (( (( (( (Х( (ॗ( (𥗙( (( (( (( (( (( j(  (( (( (( (( (( (Ф( (ग( (( (( (( (( (( (Х( (ॗ( (𥗙( (( (( (( (( (( j( (( (( (( (( (( (Ф( (ग( (( (( (( (( (( (Х( (ॗ( (𥗙( (( (( (( (( (( j(4 (($ (( (( (( (( (Ф( (ग( (( (( (( (( (( (Х( (ॗ( (𥗙( (( (( (( (( (( j(2 ((" (( (( (( (( (Ф( (ग( (( (( (( (( (( (Х( (ॗ( (𥗙( (( (( (( (( (( j( (( (( (( (( (( (Ф( (ग( (( (( ((6 ((6 (( (Х( (ॗ( (𥗙( (( ((t ((S (( (( j( (( (( (( (( (( (Ф( (ग( (( (( ((4 ((4 (( (Х( (ॗ( (𥗙( (( ((q ((P (( (( j(  (( (( (( (( (( (Ф( (ग( (( (( (( (( (( (Х(  (ॗ( (𥗙( (( (( (( (( (( j(  (( (( (( (( (( (Ф( (ग( (( (( (( (( (( (Х(  (ॗ( (𥗙( (( (( (( (( (( j( (( (( (( (( (( (Ф( (ग( (( (( (( (( (( (Х( (ॗ( (𥗙( (( (( (( (( (( j( (( (( (( (( (( (Ф( (ग( (( (( (( (( (( (Х( (ॗ( (𥗙( (( (( (( (( (( j( (( (( (( (( (( (Ф( (ग( (( (( (( (( (( (Х( (ॗ( (𥗙( (( (( (( (( (( j( (( (( (( (( (( (Ф( (ग( (( (( (( (( (( (Х( (ॗ( (𥗙( (( (( (( (( (( j( (( (( (( (( (( (Ф( (ग( (( (( ((' ((( (( (Х( (ॗ( (𥗙(j (( ((? (( (( (( j( (( (( (( (( (( (Ф( (ग( (( (( ((& ((( (( (Х( (ॗ( (𥗙(j (( ((? (( (( (( j( (( (( (( (( (( (Ф( (ग( (( (( (( (( (( (Х( (ॗ( (𥗙( (( (( (( (( (( j( (( (( (( (( (( (Ф( (ग( (( (( (( (( (( (Х( (ॗ( (𥗙( (( (( (( (( (( j( (( (( (( (( (( (Ф( (ग( (( (( (( (( (( (Х( (ॗ( (𥗙( (( (( (( (( (( j( (( (( (( (( (( (Ф( (ग( (( (( (( (( (( (Х( (ॗ( (𥗙( (( (( (( (( (( j(  (( (( (( (( (( (Ф( (ग( (( (( (( (( (( (Х( (ॗ( (𥗙( (( (( (( (( (( j( (( (( (( (( (( (Ф( (ग( (( (( (( (( (( (Х( (ॗ( (𥗙( (( (( (( (( (( j( (( (( (( (( (( (Ф( (ग( (( (( (( (( (( (Х( (ॗ( (𥗙( (( (( (( (( (( j( (( (( (( (( (( (Ф( (ग( (( (( (( (( (( (Х( (ॗ( (𥗙( (( (( (( (( (( j( (( (( (( (( (( (Ф( (ग( (( (( (( (( (( (Х( (ॗ( (𥗙(y (( (() ((\ (( (( j( (( (( (( (( (( (Ф( (ग( (( (( (( (( (( (Х( (ॗ( (𥗙(w (( ((( ((X (( (( j( (( (( (( (( (( (Ф( (ग( (( (( (( (( (( (Х( (ॗ( (𥗙( (( (( (( (( (( j( (( (( (( (( (( (Ф( (ग( (( (( (( (( (( (Х( (ॗ( (𥗙( (( (( (( (( (( j( (( (( (( (( (( (Ф( (ग( (( (( (( (( (( (Х(K (ॗ(( (𥗙( (( (( (( (( (( j( (( (( (( (( (( (Ф( (ग( (( (( (( (( (( (Х(H (ॗ(( (𥗙( (( (( (( (( (( j(* (( (( (( (( (( (Ф(  (ग( (( (( (( (( (( (Х( (ॗ( (𥗙( (( (( (( (( (( j(# (( (( (( (( (( (Ф(  (ग( (( (( (( (( (( (Х( (ॗ( (𥗙( (( (( (( (( (( ji( (( (( (( (𥗙( (( (( ji( (( (( (( (𥗙( (( (( ji( (( (( (( (𥗙( (( (( ji( (( (( (( (𥗙( (( (( ji( (( (( (( (𥗙( (( (( ji( (( (( (( (𥗙( (( (( j  dram__cycles_active.avg.pct_of_peak_sustained_elapsed,fbpa__dram_sectors.avg.pct_of_peak_sustained_elapsed,l1tex__data_bank_reads.avg.pct_of_peak_sustained_elapsed,l1tex__data_bank_writes.avg.pct_of_peak_sustained_elapsed,l1tex__data_pipe_lsu_wavefronts.avg.pct_of_peak_sustained_elapsed,l1tex__data_pipe_tex_wavefronts.avg.pct_of_peak_sustained_elapsed,l1tex__f_wavefronts.avg.pct_of_peak_sustained_elapsed,l1tex__lsu_writeback_active.avg.pct_of_peak_sustained_elapsed,l1tex__lsuin_requests.avg.pct_of_peak_sustained_elapsed,l1tex__m_l1tex2xbar_req_cycles_active.avg.pct_of_peak_sustained_elapsed,l1tex__m_xbar2l1tex_read_sectors.avg.pct_of_peak_sustained_elapsed,l1tex__tex_writeback_active.avg.pct_of_peak_sustained_elapsed,l1tex__texin_sm2tex_req_cycles_active.avg.pct_of_peak_sustained_elapsed,lts__d_atomic_input_cycles_active.avg.pct_of_peak_sustained_elapsed,lts__d_sectors.avg.pct_of_peak_sustained_elapsed,lts__d_sectors_fill_device.avg.pct_of_peak_sustained_elapsed,lts__d_sectors_fill_sysmem.avg.pct_of_peak_sustained_elapsed,lts__lts2xbar_cycles_active.avg.pct_of_peak_sustained_elapsed,lts__t_sectors.avg.pct_of_peak_sustained_elapsed,lts__t_tag_requests.avg.pct_of_peak_sustained_elapsed,lts__xbar2lts_cycles_active.avg.pct_of_peak_sustained_elapsed j  idc__request_cycles_active.avg.pct_of_peak_sustained_elapsed,sm__inst_executed.avg.pct_of_peak_sustained_elapsed,sm__inst_executed_pipe_adu.avg.pct_of_peak_sustained_elapsed,sm__inst_executed_pipe_cbu_pred_on_any.avg.pct_of_peak_sustained_elapsed,sm__inst_executed_pipe_ipa.avg.pct_of_peak_sustained_elapsed,sm__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_elapsed,sm__inst_executed_pipe_tex.avg.pct_of_peak_sustained_elapsed,sm__inst_executed_pipe_uniform.avg.pct_of_peak_sustained_elapsed,sm__inst_executed_pipe_xu.avg.pct_of_peak_sustained_elapsed,sm__issue_active.avg.pct_of_peak_sustained_elapsed,sm__mio2rf_writeback_active.avg.pct_of_peak_sustained_elapsed,sm__mio_inst_issued.avg.pct_of_peak_sustained_elapsed,sm__mio_pq_read_cycles_active.avg.pct_of_peak_sustained_elapsed,sm__mio_pq_write_cycles_active.avg.pct_of_peak_sustained_elapsed,sm__pipe_alu_cycles_active.avg.pct_of_peak_sustained_elapsed,sm__pipe_fma_cycles_active.avg.pct_of_peak_sustained_elapsed,sm__pipe_fmaheavy_cycles_active.avg.pct_of_peak_sustained_elapsed,sm__pipe_fp64_cycles_active.avg.pct_of_peak_sustained_elapsed,sm__pipe_tensor_cycles_active.avg.pct_of_peak_sustained_elapsed j22 2dram__bytes_read.sum,dram__bytes_read.sum.pct_of_peak_sustained_elapsed,dram__bytes_read.sum.per_second,dram__bytes_write.sum,dram__bytes_write.sum.pct_of_peak_sustained_elapsed,dram__bytes_write.sum.per_second,l1tex__data_pipe_lsu_wavefronts_mem_shared.sum.pct_of_peak_sustained_elapsed,l1tex__m_l1tex2xbar_write_bytes.sum,l1tex__m_l1tex2xbar_write_bytes.sum.pct_of_peak_sustained_elapsed,l1tex__m_l1tex2xbar_write_bytes.sum.per_second,l1tex__m_xbar2l1tex_read_bytes.sum,l1tex__m_xbar2l1tex_read_bytes.sum.pct_of_peak_sustained_elapsed,l1tex__m_xbar2l1tex_read_bytes.sum.per_second,l1tex__t_requests_pipe_lsu_mem_global_op_atom.sum,l1tex__t_requests_pipe_lsu_mem_global_op_atom.sum.pct_of_peak_sustained_elapsed,l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum,l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum.pct_of_peak_sustained_elapsed,l1tex__t_requests_pipe_lsu_mem_global_op_red.sum,l1tex__t_requests_pipe_lsu_mem_global_op_red.sum.pct_of_peak_sustained_elapsed,l1tex__t_requests_pipe_lsu_mem_global_op_st.sum,l1tex__t_requests_pipe_lsu_mem_global_op_st.sum.pct_of_peak_sustained_elapsed,l1tex__t_requests_pipe_lsu_mem_local_op_ld.sum,l1tex__t_requests_pipe_lsu_mem_local_op_ld.sum.pct_of_peak_sustained_elapsed,l1tex__t_requests_pipe_lsu_mem_local_op_st.sum,l1tex__t_requests_pipe_lsu_mem_local_op_st.sum.pct_of_peak_sustained_elapsed,l1tex__t_requests_pipe_tex_mem_surface_op_atom.sum,l1tex__t_requests_pipe_tex_mem_surface_op_atom.sum.pct_of_peak_sustained_elapsed,l1tex__t_requests_pipe_tex_mem_surface_op_ld.sum,l1tex__t_requests_pipe_tex_mem_surface_op_ld.sum.pct_of_peak_sustained_elapsed,l1tex__t_requests_pipe_tex_mem_surface_op_red.sum,l1tex__t_requests_pipe_tex_mem_surface_op_red.sum.pct_of_peak_sustained_elapsed,l1tex__t_requests_pipe_tex_mem_surface_op_st.sum,l1tex__t_requests_pipe_tex_mem_surface_op_st.sum.pct_of_peak_sustained_elapsed,l1tex__t_requests_pipe_tex_mem_texture.sum,l1tex__t_requests_pipe_tex_mem_texture.sum.pct_of_peak_sustained_elapsed,l1tex__t_sector_hit_rate.pct,lts__average_gcomp_output_sector_compression_achieved_rate.ratio,lts__gcomp_input_sectors.sum,lts__gcomp_input_sectors.sum.pct_of_peak_sustained_elapsed,lts__gcomp_input_sectors.sum.per_second,lts__gcomp_output_sectors.sum,lts__gcomp_output_sectors.sum.pct_of_peak_sustained_elapsed,lts__gcomp_output_sectors.sum.per_second,lts__t_sector_hit_rate.pct,lts__t_sectors_srcunit_tex_aperture_peer_op_read_lookup_miss.sum,lts__t_sectors_srcunit_tex_aperture_peer_op_read_lookup_miss.sum.pct_of_peak_sustained_elapsed,lts__t_sectors_srcunit_tex_aperture_peer_op_read_lookup_miss.sum.per_second,lts__t_sectors_srcunit_tex_aperture_peer_op_red_lookup_miss.sum,lts__t_sectors_srcunit_tex_aperture_peer_op_red_lookup_miss.sum.pct_of_peak_sustained_elapsed,lts__t_sectors_srcunit_tex_aperture_peer_op_red_lookup_miss.sum.per_second,lts__t_sectors_srcunit_tex_aperture_peer_op_write_lookup_miss.sum,lts__t_sectors_srcunit_tex_aperture_peer_op_write_lookup_miss.sum.pct_of_peak_sustained_elapsed,lts__t_sectors_srcunit_tex_aperture_peer_op_write_lookup_miss.sum.per_second,lts__t_sectors_srcunit_tex_aperture_sysmem_op_read_lookup_miss.sum,lts__t_sectors_srcunit_tex_aperture_sysmem_op_read_lookup_miss.sum.pct_of_peak_sustained_elapsed,lts__t_sectors_srcunit_tex_aperture_sysmem_op_read_lookup_miss.sum.per_second,lts__t_sectors_srcunit_tex_aperture_sysmem_op_red_lookup_miss.sum,lts__t_sectors_srcunit_tex_aperture_sysmem_op_red_lookup_miss.sum.pct_of_peak_sustained_elapsed,lts__t_sectors_srcunit_tex_aperture_sysmem_op_red_lookup_miss.sum.per_second,lts__t_sectors_srcunit_tex_aperture_sysmem_op_write_lookup_miss.sum,lts__t_sectors_srcunit_tex_aperture_sysmem_op_write_lookup_miss.sum.pct_of_peak_sustained_elapsed,lts__t_sectors_srcunit_tex_aperture_sysmem_op_write_lookup_miss.sum.per_second,lts__t_sectors_srcunit_tex_op_read.avg.pct_of_peak_sustained_elapsed,lts__t_sectors_srcunit_tex_op_read.sum,lts__t_sectors_srcunit_tex_op_write.avg.pct_of_peak_sustained_elapsed,lts__t_sectors_srcunit_tex_op_write.sum,sm__sass_l1tex_data_bytes_write_pipe_lsu_mem_shared_op_ldgsts_cache_access.sum,sm__sass_l1tex_data_bytes_write_pipe_lsu_mem_shared_op_ldgsts_cache_access.sum.pct_of_peak_sustained_elapsed,sm__sass_l1tex_data_bytes_write_pipe_lsu_mem_shared_op_ldgsts_cache_access.sum.per_second,sm__sass_l1tex_m_xbar2l1tex_read_bytes_mem_global_op_ldgsts_cache_bypass.sum,sm__sass_l1tex_m_xbar2l1tex_read_bytes_mem_global_op_ldgsts_cache_bypass.sum.pct_of_peak_sustained_elapsed,sm__sass_l1tex_m_xbar2l1tex_read_bytes_mem_global_op_ldgsts_cache_bypass.sum.per_second,sm__sass_l1tex_t_requests_pipe_lsu_mem_global_op_ldgsts.sum,sm__sass_l1tex_t_requests_pipe_lsu_mem_global_op_ldgsts.sum.pct_of_peak_sustained_elapsed,smsp__cycles_elapsed.sum,smsp__inst_executed_op_generic_atom_dot_alu.sum,smsp__inst_executed_op_generic_atom_dot_alu.sum.pct_of_peak_sustained_elapsed,smsp__inst_executed_op_generic_atom_dot_cas.sum,smsp__inst_executed_op_generic_atom_dot_cas.sum.pct_of_peak_sustained_elapsed,smsp__inst_executed_op_global_red.sum,smsp__inst_executed_op_global_red.sum.pct_of_peak_sustained_elapsed,smsp__inst_executed_op_ldgsts.sum,smsp__inst_executed_op_ldgsts.sum.pct_of_peak_sustained_elapsed,smsp__inst_executed_op_ldsm.sum,smsp__inst_executed_op_ldsm.sum.pct_of_peak_sustained_elapsed,smsp__inst_executed_op_shared_atom.sum,smsp__inst_executed_op_shared_atom.sum.pct_of_peak_sustained_elapsed,smsp__inst_executed_op_surface_atom_dot_alu.sum,smsp__inst_executed_op_surface_atom_dot_alu.sum.pct_of_peak_sustained_elapsed,smsp__inst_executed_op_surface_atom_dot_cas.sum,smsp__inst_executed_op_surface_atom_dot_cas.sum.pct_of_peak_sustained_elapsed,smsp__inst_executed_op_surface_ld.sum,smsp__inst_executed_op_surface_ld.sum.pct_of_peak_sustained_elapsed,smsp__inst_executed_op_surface_red.sum,smsp__inst_executed_op_surface_red.sum.pct_of_peak_sustained_elapsed,smsp__inst_executed_op_surface_st.sum,smsp__inst_executed_op_surface_st.sum.pct_of_peak_sustained_elapsed,smsp__inst_executed_op_texture.sum,smsp__inst_executed_op_texture.sum.pct_of_peak_sustained_elapsed,smsp__sass_inst_executed_op_global_ld.sum,smsp__sass_inst_executed_op_global_st.sum,smsp__sass_inst_executed_op_local_ld.sum,smsp__sass_inst_executed_op_local_st.sum,smsp__sass_inst_executed_op_shared_ld.sum,smsp__sass_inst_executed_op_shared_st.sum,smsp__sass_l1tex_data_pipe_lsu_wavefronts_mem_shared_op_ldgsts.sum,smsp__sass_l1tex_data_pipe_lsu_wavefronts_mem_shared_op_ldgsts.sum.pct_of_peak_sustained_elapsed j dram__bytes_read.sum,dram__bytes_read.sum.pct_of_peak_sustained_elapsed,dram__bytes_read.sum.per_second,dram__bytes_write.sum,dram__bytes_write.sum.pct_of_peak_sustained_elapsed,dram__bytes_write.sum.per_second,dram__sectors_read.sum,dram__sectors_write.sum jCC Cl1tex__lsu_writeback_active_mem_lg.sum,l1tex__lsu_writeback_active_mem_lg.sum.pct_of_peak_sustained_elapsed,l1tex__m_l1tex2xbar_write_sectors_mem_global_op_atom.sum,l1tex__m_l1tex2xbar_write_sectors_mem_global_op_atom.sum.pct_of_peak_sustained_elapsed,l1tex__m_l1tex2xbar_write_sectors_mem_global_op_red.sum,l1tex__m_l1tex2xbar_write_sectors_mem_global_op_red.sum.pct_of_peak_sustained_elapsed,l1tex__m_l1tex2xbar_write_sectors_mem_lg_op_st.sum,l1tex__m_l1tex2xbar_write_sectors_mem_lg_op_st.sum.pct_of_peak_sustained_elapsed,l1tex__m_l1tex2xbar_write_sectors_mem_surface_op_atom.sum,l1tex__m_l1tex2xbar_write_sectors_mem_surface_op_atom.sum.pct_of_peak_sustained_elapsed,l1tex__m_l1tex2xbar_write_sectors_mem_surface_op_red.sum,l1tex__m_l1tex2xbar_write_sectors_mem_surface_op_red.sum.pct_of_peak_sustained_elapsed,l1tex__m_l1tex2xbar_write_sectors_mem_surface_op_st.sum,l1tex__m_l1tex2xbar_write_sectors_mem_surface_op_st.sum.pct_of_peak_sustained_elapsed,l1tex__m_xbar2l1tex_read_sectors_mem_global_op_atom.sum,l1tex__m_xbar2l1tex_read_sectors_mem_global_op_atom.sum.pct_of_peak_sustained_elapsed,l1tex__m_xbar2l1tex_read_sectors_mem_lg_op_ld.sum,l1tex__m_xbar2l1tex_read_sectors_mem_lg_op_ld.sum.pct_of_peak_sustained_elapsed,l1tex__m_xbar2l1tex_read_sectors_mem_surface_op_atom.sum,l1tex__m_xbar2l1tex_read_sectors_mem_surface_op_atom.sum.pct_of_peak_sustained_elapsed,l1tex__m_xbar2l1tex_read_sectors_mem_surface_op_ld.sum,l1tex__m_xbar2l1tex_read_sectors_mem_surface_op_ld.sum.pct_of_peak_sustained_elapsed,l1tex__m_xbar2l1tex_read_sectors_mem_texture.sum,l1tex__m_xbar2l1tex_read_sectors_mem_texture.sum.pct_of_peak_sustained_elapsed,l1tex__t_output_wavefronts_pipe_lsu_mem_global_op_atom.sum,l1tex__t_output_wavefronts_pipe_lsu_mem_global_op_atom.sum.pct_of_peak_sustained_elapsed,l1tex__t_output_wavefronts_pipe_lsu_mem_global_op_ld.sum,l1tex__t_output_wavefronts_pipe_lsu_mem_global_op_ld.sum.pct_of_peak_sustained_elapsed,l1tex__t_output_wavefronts_pipe_lsu_mem_global_op_red.sum,l1tex__t_output_wavefronts_pipe_lsu_mem_global_op_red.sum.pct_of_peak_sustained_elapsed,l1tex__t_output_wavefronts_pipe_lsu_mem_global_op_st.sum,l1tex__t_output_wavefronts_pipe_lsu_mem_global_op_st.sum.pct_of_peak_sustained_elapsed,l1tex__t_output_wavefronts_pipe_lsu_mem_local_op_ld.sum,l1tex__t_output_wavefronts_pipe_lsu_mem_local_op_ld.sum.pct_of_peak_sustained_elapsed,l1tex__t_output_wavefronts_pipe_lsu_mem_local_op_st.sum,l1tex__t_output_wavefronts_pipe_lsu_mem_local_op_st.sum.pct_of_peak_sustained_elapsed,l1tex__t_output_wavefronts_pipe_tex_mem_surface_op_atom.sum,l1tex__t_output_wavefronts_pipe_tex_mem_surface_op_atom.sum.pct_of_peak_sustained_elapsed,l1tex__t_output_wavefronts_pipe_tex_mem_surface_op_ld.sum,l1tex__t_output_wavefronts_pipe_tex_mem_surface_op_ld.sum.pct_of_peak_sustained_elapsed,l1tex__t_output_wavefronts_pipe_tex_mem_surface_op_red.sum,l1tex__t_output_wavefronts_pipe_tex_mem_surface_op_red.sum.pct_of_peak_sustained_elapsed,l1tex__t_output_wavefronts_pipe_tex_mem_surface_op_st.sum,l1tex__t_output_wavefronts_pipe_tex_mem_surface_op_st.sum.pct_of_peak_sustained_elapsed,l1tex__t_output_wavefronts_pipe_tex_mem_texture.sum,l1tex__t_output_wavefronts_pipe_tex_mem_texture.sum.pct_of_peak_sustained_elapsed,l1tex__t_requests_pipe_lsu_mem_global_op_atom.sum,l1tex__t_requests_pipe_lsu_mem_global_op_atom.sum.pct_of_peak_sustained_elapsed,l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum,l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum.pct_of_peak_sustained_elapsed,l1tex__t_requests_pipe_lsu_mem_global_op_red.sum,l1tex__t_requests_pipe_lsu_mem_global_op_red.sum.pct_of_peak_sustained_elapsed,l1tex__t_requests_pipe_lsu_mem_global_op_st.sum,l1tex__t_requests_pipe_lsu_mem_global_op_st.sum.pct_of_peak_sustained_elapsed,l1tex__t_requests_pipe_lsu_mem_local_op_ld.sum,l1tex__t_requests_pipe_lsu_mem_local_op_ld.sum.pct_of_peak_sustained_elapsed,l1tex__t_requests_pipe_lsu_mem_local_op_st.sum,l1tex__t_requests_pipe_lsu_mem_local_op_st.sum.pct_of_peak_sustained_elapsed,l1tex__t_requests_pipe_tex_mem_surface_op_atom.sum,l1tex__t_requests_pipe_tex_mem_surface_op_atom.sum.pct_of_peak_sustained_elapsed,l1tex__t_requests_pipe_tex_mem_surface_op_ld.sum,l1tex__t_requests_pipe_tex_mem_surface_op_ld.sum.pct_of_peak_sustained_elapsed,l1tex__t_requests_pipe_tex_mem_surface_op_red.sum,l1tex__t_requests_pipe_tex_mem_surface_op_red.sum.pct_of_peak_sustained_elapsed,l1tex__t_requests_pipe_tex_mem_surface_op_st.sum,l1tex__t_requests_pipe_tex_mem_surface_op_st.sum.pct_of_peak_sustained_elapsed,l1tex__t_requests_pipe_tex_mem_texture.sum,l1tex__t_requests_pipe_tex_mem_texture.sum.pct_of_peak_sustained_elapsed,l1tex__t_sector_pipe_lsu_mem_global_op_atom_hit_rate.pct,l1tex__t_sector_pipe_lsu_mem_global_op_ld_hit_rate.pct,l1tex__t_sector_pipe_lsu_mem_global_op_red_hit_rate.pct,l1tex__t_sector_pipe_lsu_mem_global_op_st_hit_rate.pct,l1tex__t_sector_pipe_lsu_mem_local_op_ld_hit_rate.pct,l1tex__t_sector_pipe_lsu_mem_local_op_st_hit_rate.pct,l1tex__t_sector_pipe_tex_mem_surface_op_atom_hit_rate.pct,l1tex__t_sector_pipe_tex_mem_surface_op_ld_hit_rate.pct,l1tex__t_sector_pipe_tex_mem_surface_op_red_hit_rate.pct,l1tex__t_sector_pipe_tex_mem_surface_op_st_hit_rate.pct,l1tex__t_sector_pipe_tex_mem_texture_op_tex_hit_rate.pct,l1tex__t_sectors_pipe_lsu_mem_global_op_atom.sum,l1tex__t_sectors_pipe_lsu_mem_global_op_atom_lookup_hit.sum,l1tex__t_sectors_pipe_lsu_mem_global_op_atom_lookup_miss.sum,l1tex__t_sectors_pipe_lsu_mem_global_op_ld.sum,l1tex__t_sectors_pipe_lsu_mem_global_op_ld_lookup_hit.sum,l1tex__t_sectors_pipe_lsu_mem_global_op_ld_lookup_miss.sum,l1tex__t_sectors_pipe_lsu_mem_global_op_red.sum,l1tex__t_sectors_pipe_lsu_mem_global_op_red_lookup_hit.sum,l1tex__t_sectors_pipe_lsu_mem_global_op_red_lookup_miss.sum,l1tex__t_sectors_pipe_lsu_mem_global_op_st.sum,l1tex__t_sectors_pipe_lsu_mem_global_op_st_lookup_hit.sum,l1tex__t_sectors_pipe_lsu_mem_global_op_st_lookup_miss.sum,l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum,l1tex__t_sectors_pipe_lsu_mem_local_op_ld_lookup_hit.sum,l1tex__t_sectors_pipe_lsu_mem_local_op_ld_lookup_miss.sum,l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum,l1tex__t_sectors_pipe_lsu_mem_local_op_st_lookup_hit.sum,l1tex__t_sectors_pipe_lsu_mem_local_op_st_lookup_miss.sum,l1tex__t_sectors_pipe_tex_mem_surface_op_atom.sum,l1tex__t_sectors_pipe_tex_mem_surface_op_atom_lookup_hit.sum,l1tex__t_sectors_pipe_tex_mem_surface_op_atom_lookup_miss.sum,l1tex__t_sectors_pipe_tex_mem_surface_op_ld.sum,l1tex__t_sectors_pipe_tex_mem_surface_op_ld_lookup_hit.sum,l1tex__t_sectors_pipe_tex_mem_surface_op_ld_lookup_miss.sum,l1tex__t_sectors_pipe_tex_mem_surface_op_red.sum,l1tex__t_sectors_pipe_tex_mem_surface_op_red_lookup_hit.sum,l1tex__t_sectors_pipe_tex_mem_surface_op_red_lookup_miss.sum,l1tex__t_sectors_pipe_tex_mem_surface_op_st.sum,l1tex__t_sectors_pipe_tex_mem_surface_op_st_lookup_hit.sum,l1tex__t_sectors_pipe_tex_mem_surface_op_st_lookup_miss.sum,l1tex__t_sectors_pipe_tex_mem_texture.sum,l1tex__t_sectors_pipe_tex_mem_texture_lookup_hit.sum,l1tex__t_sectors_pipe_tex_mem_texture_lookup_miss.sum,l1tex__tex_writeback_active.sum,l1tex__tex_writeback_active.sum.pct_of_peak_sustained_elapsed,sm__sass_inst_executed_op_ldgsts_cache_access.sum,sm__sass_inst_executed_op_ldgsts_cache_bypass.sum,sm__sass_l1tex_t_requests_pipe_lsu_mem_global_op_ldgsts_cache_access.sum,sm__sass_l1tex_t_requests_pipe_lsu_mem_global_op_ldgsts_cache_bypass.sum,sm__sass_l1tex_t_sectors_pipe_lsu_mem_global_op_ldgsts_cache_access.sum,sm__sass_l1tex_t_sectors_pipe_lsu_mem_global_op_ldgsts_cache_access.sum.pct_of_peak_sustained_elapsed,sm__sass_l1tex_t_sectors_pipe_lsu_mem_global_op_ldgsts_cache_bypass.sum,sm__sass_l1tex_t_sectors_pipe_lsu_mem_global_op_ldgsts_cache_bypass.sum.pct_of_peak_sustained_elapsed,smsp__inst_executed_op_generic_atom_dot_alu.sum,smsp__inst_executed_op_generic_atom_dot_cas.sum,smsp__inst_executed_op_global_red.sum,smsp__inst_executed_op_surface_atom_dot_alu.sum,smsp__inst_executed_op_surface_atom_dot_cas.sum,smsp__inst_executed_op_surface_ld.sum,smsp__inst_executed_op_surface_red.sum,smsp__inst_executed_op_surface_st.sum,smsp__inst_executed_op_texture.sum,smsp__sass_inst_executed_op_global_ld.sum,smsp__sass_inst_executed_op_global_st.sum,smsp__sass_inst_executed_op_local_ld.sum,smsp__sass_inst_executed_op_local_st.sum,smsp__sass_l1tex_data_pipe_lsu_wavefronts_mem_shared_op_ldgsts_cache_access.sum,smsp__sass_l1tex_data_pipe_lsu_wavefronts_mem_shared_op_ldgsts_cache_access.sum.pct_of_peak_sustained_elapsed,smsp__sass_l1tex_m_xbar2l1tex_read_sectors_mem_global_op_ldgsts_cache_bypass.sum,smsp__sass_l1tex_m_xbar2l1tex_read_sectors_mem_global_op_ldgsts_cache_bypass.sum.pct_of_peak_sustained_elapsed j lts__t_sectors_evict_first_lookup_hit.sum,lts__t_sectors_evict_first_lookup_miss.sum,lts__t_sectors_evict_last_lookup_hit.sum,lts__t_sectors_evict_last_lookup_miss.sum,lts__t_sectors_evict_normal_demote_lookup_hit.sum,lts__t_sectors_evict_normal_demote_lookup_miss.sum,lts__t_sectors_evict_normal_lookup_hit.sum,lts__t_sectors_evict_normal_lookup_miss.sum,lts__t_sectors_srcunit_tex_evict_first_lookup_hit.sum,lts__t_sectors_srcunit_tex_evict_first_lookup_miss.sum,lts__t_sectors_srcunit_tex_evict_last_lookup_hit.sum,lts__t_sectors_srcunit_tex_evict_last_lookup_miss.sum,lts__t_sectors_srcunit_tex_evict_normal_demote_lookup_hit.sum,lts__t_sectors_srcunit_tex_evict_normal_demote_lookup_miss.sum,lts__t_sectors_srcunit_tex_evict_normal_lookup_hit.sum,lts__t_sectors_srcunit_tex_evict_normal_lookup_miss.sum,lts__t_sectors_srcunit_tex_op_atom_evict_first_lookup_hit.sum,lts__t_sectors_srcunit_tex_op_atom_evict_first_lookup_miss.sum,lts__t_sectors_srcunit_tex_op_atom_evict_last_lookup_hit.sum,lts__t_sectors_srcunit_tex_op_atom_evict_last_lookup_miss.sum,lts__t_sectors_srcunit_tex_op_atom_evict_normal_lookup_hit.sum,lts__t_sectors_srcunit_tex_op_atom_evict_normal_lookup_miss.sum,lts__t_sectors_srcunit_tex_op_read_evict_first_lookup_hit.sum,lts__t_sectors_srcunit_tex_op_read_evict_first_lookup_miss.sum,lts__t_sectors_srcunit_tex_op_read_evict_last_lookup_hit.sum,lts__t_sectors_srcunit_tex_op_read_evict_last_lookup_miss.sum,lts__t_sectors_srcunit_tex_op_read_evict_normal_demote_lookup_hit.sum,lts__t_sectors_srcunit_tex_op_read_evict_normal_demote_lookup_miss.sum,lts__t_sectors_srcunit_tex_op_read_evict_normal_lookup_hit.sum,lts__t_sectors_srcunit_tex_op_read_evict_normal_lookup_miss.sum,lts__t_sectors_srcunit_tex_op_write_evict_first_lookup_hit.sum,lts__t_sectors_srcunit_tex_op_write_evict_first_lookup_miss.sum,lts__t_sectors_srcunit_tex_op_write_evict_last_lookup_hit.sum,lts__t_sectors_srcunit_tex_op_write_evict_last_lookup_miss.sum,lts__t_sectors_srcunit_tex_op_write_evict_normal_demote_lookup_hit.sum,lts__t_sectors_srcunit_tex_op_write_evict_normal_demote_lookup_miss.sum,lts__t_sectors_srcunit_tex_op_write_evict_normal_lookup_hit.sum,lts__t_sectors_srcunit_tex_op_write_evict_normal_lookup_miss.sum j"! !lts__t_requests.sum,lts__t_requests_srcunit_tex.sum,lts__t_requests_srcunit_tex_op_atom_dot_alu.sum,lts__t_requests_srcunit_tex_op_atom_dot_cas.sum,lts__t_requests_srcunit_tex_op_read.sum,lts__t_requests_srcunit_tex_op_red.sum,lts__t_requests_srcunit_tex_op_write.sum,lts__t_sectors.avg.pct_of_peak_sustained_elapsed,lts__t_sectors.avg.peak_sustained,lts__t_sectors.avg.per_cycle_elapsed,lts__t_sectors.sum,lts__t_sectors.sum.per_second,lts__t_sectors_aperture_device_lookup_miss.sum,lts__t_sectors_aperture_peer_lookup_miss.sum,lts__t_sectors_aperture_sysmem_lookup_miss.sum,lts__t_sectors_data_ecc.avg.pct_of_peak_sustained_elapsed,lts__t_sectors_data_ecc.avg.peak_sustained,lts__t_sectors_data_ecc.avg.per_cycle_elapsed,lts__t_sectors_data_ecc.sum,lts__t_sectors_data_ecc.sum.per_second,lts__t_sectors_lookup_hit.sum,lts__t_sectors_lookup_miss.sum,lts__t_sectors_srcunit_tex.avg.pct_of_peak_sustained_elapsed,lts__t_sectors_srcunit_tex.avg.peak_sustained,lts__t_sectors_srcunit_tex.avg.per_cycle_elapsed,lts__t_sectors_srcunit_tex.sum,lts__t_sectors_srcunit_tex.sum.per_second,lts__t_sectors_srcunit_tex_aperture_device_lookup_miss.sum,lts__t_sectors_srcunit_tex_aperture_device_op_atom_dot_alu_lookup_miss.sum,lts__t_sectors_srcunit_tex_aperture_device_op_atom_dot_cas_lookup_miss.sum,lts__t_sectors_srcunit_tex_aperture_device_op_read_lookup_miss.sum,lts__t_sectors_srcunit_tex_aperture_device_op_red_lookup_miss.sum,lts__t_sectors_srcunit_tex_aperture_device_op_write_lookup_miss.sum,lts__t_sectors_srcunit_tex_aperture_peer_lookup_miss.sum,lts__t_sectors_srcunit_tex_aperture_peer_op_atom_dot_alu_lookup_miss.sum,lts__t_sectors_srcunit_tex_aperture_peer_op_atom_dot_cas_lookup_miss.sum,lts__t_sectors_srcunit_tex_aperture_peer_op_read_lookup_miss.sum,lts__t_sectors_srcunit_tex_aperture_peer_op_red_lookup_miss.sum,lts__t_sectors_srcunit_tex_aperture_peer_op_write_lookup_miss.sum,lts__t_sectors_srcunit_tex_aperture_sysmem_lookup_miss.sum,lts__t_sectors_srcunit_tex_aperture_sysmem_op_atom_dot_alu_lookup_miss.sum,lts__t_sectors_srcunit_tex_aperture_sysmem_op_atom_dot_cas_lookup_miss.sum,lts__t_sectors_srcunit_tex_aperture_sysmem_op_read_lookup_miss.sum,lts__t_sectors_srcunit_tex_aperture_sysmem_op_red_lookup_miss.sum,lts__t_sectors_srcunit_tex_aperture_sysmem_op_write_lookup_miss.sum,lts__t_sectors_srcunit_tex_lookup_hit.sum,lts__t_sectors_srcunit_tex_lookup_miss.sum,lts__t_sectors_srcunit_tex_op_atom_dot_alu.avg.pct_of_peak_sustained_elapsed,lts__t_sectors_srcunit_tex_op_atom_dot_alu.avg.peak_sustained,lts__t_sectors_srcunit_tex_op_atom_dot_alu.avg.per_cycle_elapsed,lts__t_sectors_srcunit_tex_op_atom_dot_alu.sum,lts__t_sectors_srcunit_tex_op_atom_dot_alu.sum.per_second,lts__t_sectors_srcunit_tex_op_atom_dot_alu_lookup_hit.sum,lts__t_sectors_srcunit_tex_op_atom_dot_alu_lookup_miss.sum,lts__t_sectors_srcunit_tex_op_atom_dot_cas.avg.pct_of_peak_sustained_elapsed,lts__t_sectors_srcunit_tex_op_atom_dot_cas.avg.peak_sustained,lts__t_sectors_srcunit_tex_op_atom_dot_cas.avg.per_cycle_elapsed,lts__t_sectors_srcunit_tex_op_atom_dot_cas.sum,lts__t_sectors_srcunit_tex_op_atom_dot_cas.sum.per_second,lts__t_sectors_srcunit_tex_op_atom_dot_cas_lookup_hit.sum,lts__t_sectors_srcunit_tex_op_atom_dot_cas_lookup_miss.sum,lts__t_sectors_srcunit_tex_op_read.avg.pct_of_peak_sustained_elapsed,lts__t_sectors_srcunit_tex_op_read.avg.peak_sustained,lts__t_sectors_srcunit_tex_op_read.avg.per_cycle_elapsed,lts__t_sectors_srcunit_tex_op_read.sum,lts__t_sectors_srcunit_tex_op_read.sum.per_second,lts__t_sectors_srcunit_tex_op_read_lookup_hit.sum,lts__t_sectors_srcunit_tex_op_read_lookup_miss.sum,lts__t_sectors_srcunit_tex_op_red.avg.pct_of_peak_sustained_elapsed,lts__t_sectors_srcunit_tex_op_red.avg.peak_sustained,lts__t_sectors_srcunit_tex_op_red.avg.per_cycle_elapsed,lts__t_sectors_srcunit_tex_op_red.sum,lts__t_sectors_srcunit_tex_op_red.sum.per_second,lts__t_sectors_srcunit_tex_op_red_lookup_hit.sum,lts__t_sectors_srcunit_tex_op_red_lookup_miss.sum,lts__t_sectors_srcunit_tex_op_write.avg.pct_of_peak_sustained_elapsed,lts__t_sectors_srcunit_tex_op_write.avg.peak_sustained,lts__t_sectors_srcunit_tex_op_write.avg.per_cycle_elapsed,lts__t_sectors_srcunit_tex_op_write.sum,lts__t_sectors_srcunit_tex_op_write.sum.per_second,lts__t_sectors_srcunit_tex_op_write_lookup_hit.sum,lts__t_sectors_srcunit_tex_op_write_lookup_miss.sum j  l1tex__data_bank_conflicts_pipe_lsu_mem_shared.sum,l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_atom.sum,l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum,l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum,l1tex__data_pipe_lsu_wavefronts_mem_shared.sum,l1tex__data_pipe_lsu_wavefronts_mem_shared.sum.pct_of_peak_sustained_elapsed,l1tex__data_pipe_lsu_wavefronts_mem_shared_op_atom.sum,l1tex__data_pipe_lsu_wavefronts_mem_shared_op_atom.sum.pct_of_peak_sustained_elapsed,l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum,l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum.pct_of_peak_sustained_elapsed,l1tex__data_pipe_lsu_wavefronts_mem_shared_op_st.sum,l1tex__data_pipe_lsu_wavefronts_mem_shared_op_st.sum.pct_of_peak_sustained_elapsed,smsp__inst_executed_op_ldgsts.sum,smsp__inst_executed_op_ldsm.sum,smsp__inst_executed_op_shared_atom.sum,smsp__sass_inst_executed_op_shared_ld.sum,smsp__sass_inst_executed_op_shared_st.sum,smsp__sass_l1tex_data_bank_conflicts_pipe_lsu_mem_shared_op_ldgsts.sum,smsp__sass_l1tex_data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum,smsp__sass_l1tex_data_pipe_lsu_wavefronts_mem_shared_op_ldgsts.sum,smsp__sass_l1tex_data_pipe_lsu_wavefronts_mem_shared_op_ldgsts.sum.pct_of_peak_sustained_elapsed,smsp__sass_l1tex_data_pipe_lsu_wavefronts_mem_shared_op_st.sum,smsp__sass_l1tex_data_pipe_lsu_wavefronts_mem_shared_op_st.sum.pct_of_peak_sustained_elapsed j smsp__pcsamp_warps_issue_stalled_barrier,smsp__pcsamp_warps_issue_stalled_branch_resolving,smsp__pcsamp_warps_issue_stalled_dispatch_stall,smsp__pcsamp_warps_issue_stalled_drain,smsp__pcsamp_warps_issue_stalled_imc_miss,smsp__pcsamp_warps_issue_stalled_lg_throttle,smsp__pcsamp_warps_issue_stalled_long_scoreboard,smsp__pcsamp_warps_issue_stalled_math_pipe_throttle,smsp__pcsamp_warps_issue_stalled_membar,smsp__pcsamp_warps_issue_stalled_mio_throttle,smsp__pcsamp_warps_issue_stalled_misc,smsp__pcsamp_warps_issue_stalled_no_instructions,smsp__pcsamp_warps_issue_stalled_not_selected,smsp__pcsamp_warps_issue_stalled_selected,smsp__pcsamp_warps_issue_stalled_short_scoreboard,smsp__pcsamp_warps_issue_stalled_sleeping,smsp__pcsamp_warps_issue_stalled_tex_throttle,smsp__pcsamp_warps_issue_stalled_wait j smsp__pcsamp_warps_issue_stalled_barrier_not_issued,smsp__pcsamp_warps_issue_stalled_branch_resolving_not_issued,smsp__pcsamp_warps_issue_stalled_dispatch_stall_not_issued,smsp__pcsamp_warps_issue_stalled_drain_not_issued,smsp__pcsamp_warps_issue_stalled_imc_miss_not_issued,smsp__pcsamp_warps_issue_stalled_lg_throttle_not_issued,smsp__pcsamp_warps_issue_stalled_long_scoreboard_not_issued,smsp__pcsamp_warps_issue_stalled_math_pipe_throttle_not_issued,smsp__pcsamp_warps_issue_stalled_membar_not_issued,smsp__pcsamp_warps_issue_stalled_mio_throttle_not_issued,smsp__pcsamp_warps_issue_stalled_misc_not_issued,smsp__pcsamp_warps_issue_stalled_no_instructions_not_issued,smsp__pcsamp_warps_issue_stalled_not_selected_not_issued,smsp__pcsamp_warps_issue_stalled_selected_not_issued,smsp__pcsamp_warps_issue_stalled_short_scoreboard_not_issued,smsp__pcsamp_warps_issue_stalled_sleeping_not_issued,smsp__pcsamp_warps_issue_stalled_tex_throttle_not_issued,smsp__pcsamp_warps_issue_stalled_wait_not_issued j ( j ( j ( j @ j @ j ( jO( (( (( (( (( (( (( (( jO( (( (( (( (( (( (( (( jc( ((@ (( (( (( (( (( (( jV( (( (( (( (( (( (( (( j ( ( FBSP.TriageSCG.dramc__read_throughput.avg.pct_of_peak_sustained_elapsed,FBSP.TriageSCG.dramc__throughput.avg.pct_of_peak_sustained_elapsed,FBSP.TriageSCG.dramc__write_throughput.avg.pct_of_peak_sustained_elapsed ( FE_B.TriageAC.gr__ctas_launched_queue_sync.sum,TriageAC.tpc__warps_active_realtime.avg.per_cycle_active,TriageAC.tpc__warps_active_realtime.sum.per_cycle_active ({ yLTS.TriageSCG.lts__average_t_sector_hit_rate_realtime.pct,LTS.TriageSCG.lts__throughput.avg.pct_of_peak_sustained_elapsed ( SM_A.TriageAC.l1tex__data_pipe_lsu_wavefronts.avg,SM_A.TriageAC.l1tex__lsu_writeback_active.avg,SM_A.TriageSCG.l1tex__throughput.avg.pct_of_peak_sustained_elapsedi (c aSM_A.TriageAC.sm__cycles_active.avg,SM_A.TriageAC.sm__inst_executed_realtime.avg.per_cycle_active ( SM_A.TriageSCG.sm__inst_executed_pipe_alu_realtime.avg.pct_of_peak_sustained_elapsed,SM_C.TriageSCG.smsp__inst_executed_pipe_fmaheavy.avg.pct_of_peak_sustained_elapsed,SM_C.TriageSCG.smsp__inst_executed_pipe_fmalite.avg.pct_of_peak_sustained_elapsed,TriageSCG.sm__throughput.avg.pct_of_peak_sustained_elapsed,pmsampling:sm__pipe_tensor_cycles_active_realtime.avg.pct_of_peak_sustained_elapsed2 (, *SM_B.TriageAC.l1tex__t_sector_hit_rate.pct j) ? ( ? j) ? (ƃ ? j) ? (ݮ ? j) ? (ڕ ? j) ? ( ? j) ? ( ? j) ? (۟ ? j E@ j  j  A j  A j `A j  A j  A j  A j `A j  A j, (  (  (  (  (  (๸  (  (ɸ  (Ѹ  (ٸ  (  (  (  (  (  (  (  (  (  (  (  (ඹ  (  (ƹ  (ι  (ֹ  (ݹ  (  (  (  (  ( Y7"? (  (  ( VUUUUU? (  (  (೺  ( Y7"? (ú VUUUUU? (˺  (Ӻ  (ں  (  (  (  (  (  (  (  (  (  (  (఻  (  (  (Ȼ  (л  (׻  (߻  (  (  (  (  (  (  (  (  (  (୼  ( UUUUUU? ( ? (ż UUUUU? (ͼ  (Լ  (ܼ  (  (  ( 88? ( Y7"? ( Y7"? ( VUUUUU? ( VUUUUU? ( VUUUUU? ( ! ? (ઽ VUUUUU? (  (  (½  (ʽ  (ѽ  (ٽ  (  (  (  (  (  (  (  (  (  (৾ VUUUUU? (  ( ? ( n0E>? (Ǿ S@ (ξ UUUUUX@ (־ X@ (޾ ́D+lT@ (  B@ ( *Y7,@ ( ?@ ( UUUUUS@ ( 88lS@ ( #u)2K@ ( UUUUUW@ ( 8"u>@ (ि 01@ ( 3@ ( $Zas }P@ ( K@ (Ŀ >S!R@ (˿ UUUUU%E@ (ӿ L@ (ۿ 4@ ( ۥ)@ ( TUUUUE<@ ( h`|T@ ( M@ ( VUUUUQ@ ( B@ ( rqG@ ( 3=@ ( D@ ( L@ ( H@ ( dQ@ ( 98*G@ ( g`fE@ ( Ϻt:@ ( UUUUUG@ ( VUUUUeD@ ( UUUUUP@ ( ۥ\H@ ( VUUUU]B@ ( LϺ D@ ( C@ ( VUUUUD@ ( O@ ( ~ԓGQ@ ( VUUUUE@ ( LϺ?@ ( <@ ( D@ ( ǏI@ ( ޹sK@ ( ZT@ ( Sn09F@ ( @@ ( 2@ ( D@ ( HR@ ( VUUUUJ@ ( LQ@ ( UUUUUD@ ( C@ ( 5@ ( t 7@ ( 6T@ ( g`P@ ( UUUUUL@ ( VUUUUC@ ( @@ ( RO@@ ( 09@ ( )Y7lB@ ( S@ ( Q@ ( K=I@ ( H@@ ( u)8@ ( 7@ ( (F@ ( K@ ( pz2~GX@ ( F@ ( | TC@ (  ?@ ( VUUUU=@ ( VUUUU7@ ( q-&@ (  g] @ ( Y7"? (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  j, (  (  (  (  (  (๸  (  (ɸ  (Ѹ  (ٸ  (  (  (  (  (  (  (  (  (  (  (  (ඹ  (  (ƹ  (ι  (ֹ  (ݹ  (  (  (  (  ( Y7"? (  (  ( VUUUUU? (  (  (೺  ( Y7"? (ú VUUUUU? (˺  (Ӻ  (ں  (  (  (  (  (  (  (  (  (  (  (఻  (  (  (Ȼ  (л  (׻  (߻  (  (  (  (  (  (  (  (  (  (୼  ( UUUUUU? ( ? (ż UUUUU? (ͼ  (Լ  (ܼ  (  (  ( 88? ( Y7"? ( Y7"? ( VUUUUU? ( VUUUUU? ( VUUUUU? ( ! ? (ઽ VUUUUU? (  (  (½  (ʽ  (ѽ  (ٽ  (  (  (  (  (  (  (  (  (  (৾ VUUUUU? (  ( ? ( n0E>? (Ǿ S@ (ξ UUUUUX@ (־ UUUUUX@ (޾ /dvX@ ( RU@ ( #u)hR@ ( rS@ ( jV@ ( 88X@ ( X@ ( UUUUUX@ ( 1E>!X@ (ि UUUUUW@ ( .W@ ( Poz.Y@ ( W@ (Ŀ S@W@ (˿ VUUUU1V@ (ӿ V@ (ۿ W@ ( ROX@ ( UUUUUW@ (  gW@ ( UUUUU1X@ ( BX@ ( VUUUU]W@ ( 8dW@ ( LϬW@ ( `|V@ ( UUUUUX@ ( W@ (  X@ ( W@ ( `|+X@ ( "uX@ ( V@ ( VUUUUYV@ ( X@ ( PozfX@ ( W@ ( Y7"W@ ( W@ ( V@ (  X@ ( ^6Y@ ( UUUUUV@ ( E>SU@ ( V@ ( dV@ ( ݹsέX@ ( [lٲX@ ( LX@ ( ϺW@ ( W@ ( W@ ( VUUUUV@ ( @W@ ( W@ ( Y7")X@ ( W@ ( W@ ( W@ ( 9hW@ ( @X@ ( gxX@ ( bW@ ( VUUUUV@ ( V@ ( $ZasX@ ( .V@ ( L W@ ( ~W@ ( W@ ( RWY@ ( rW@ ( LV@ (  T@ ( UUUUUU@ ( W@ ( oz2 Z@ ( X@ ( D>SW@ ( VUUUUW@ ( W@ ( dW@ ( qǝT@ ( ϺF@ ( S@ (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  j, (  (  (  (  (  (๸  (  (ɸ  (Ѹ  (ٸ  (  (  (  (  (  (  (  (  (  (  (  (ඹ  (  (ƹ  (ι  (ֹ  (ݹ  (  (  (  (  (  (  (  (  (  (  (೺  (  (ú  (˺  (Ӻ  (ں  (  (  (  (  (  (  (  (  (  (  (఻  (  (  (Ȼ  (л  (׻  (߻  (  (  (  (  (  (  (  (  (  (୼  (  (  (ż  (ͼ  (Լ  (ܼ  (  (  (  (  (  (  (  (  (  (ઽ  (  (  (½  (ʽ  (ѽ  (ٽ  (  (  (  (  (  (  (  (  (  (৾  (  (  (  (Ǿ  (ξ  (־ TUUUUU? (޾ .e0@ ( H@ ( M@ ( UUUUUeG@ ( *&@ ( 5@ ( Sn0 E@ ( @ ( #u)tP@ (ि UUUUUR@ ( nR@ ( WHbA@ ( (D@ (Ŀ n0E>{4@ (˿ UUUUU=G@ (ӿ BA@ (ۿ R@ ( z2~pU@ ( P@ ( v)Y,@ ( B@ ( UUUUU:@ ( L@ ( F@ ( >S_P@ ( LCI@ ( C@ ( XG@ ( :@ ( qqH@ ( LJ@ ( n0EvQ@ ( E@ ( VUUUUMH@ (  >@ (  VpH@ ( BM@ ( "uK@ ( UUUUUEK@ ( H@ ( VUUUU%@@ ( ~ԓ?@ ( UUUUUH@ ( n0EK@ ( UUUUUO@ ( G@ ( ȏ?~G@ ( ٲe˖E@ ( /@ ( ϺH@ ( VUUUUO@ ( UUUUUR@ ( H@ ( z2~D4@ ( D@ ( u)Y8@ ( J@ ( UUUUUL@ ( UUUUU9R@ ( h́Q@ ( VUUUU%0@ ( L?@ ( A@ ( UUUUUH@ ( "M@ ( 'K5P@ ( UUUUUO@ ( g`K@ ( ,@ (  8@ ( HI@ ( TUUUUN@ ( LP@ ( VUUUU-L@ ( *E@ ( C@ ( íD+@ ( VUUUUI@ ( K@ ( VUUUUO@ ( ^P@ ( nQ@ ( 88Q@ ( n0E>D@ ( )Y7@ (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  j+ @ (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (ƒ  (ƒ  (ƒ  (ƒ  (ƒ  (ƒ  (ƒ  (ƒ  (ƒ  (ƒ  (ƒ  (ƒ  (ƒ  (ƒ  (ƒ  (ƒ  (à  (à  (à  (à  (à  (à  (à  (à  (à  (à  (à  (à  (à  (à  (à  (à  (ă  (ă  (ă  (ă  (ă  (ă  (ă  (ă  (ă  (ă  (ă  (ă  (ă  (ă  (ă  (ă  (ă  (Ń  (Ń  (Ń  (Ń  (Ń  (Ń  (Ń  (Ń  (Ń  (Ń  (Ń  (Ń  (Ń  (Ń  (Ń  (Ń  (ƃ  (ƃ  (ƃ  (ƃ  (ƃ  (ƃ  (ƃ  (ƃ u@ (ƃ  (ƃ $@ (ƃ M@ (ƃ O@ (ƃ O@ (ƃ N@ (ƃ @P@ (ƃ G@ (ƃ C@ (ǃ =@ (ǃ ;@ (ǃ J@ (ǃ I@ (ǃ P@ (ǃ N@ (ǃ D@ (ǃ F@ (ǃ 4@ (ǃ ;@ (ǃ I@ (ǃ R@ (ǃ M@ (ǃ G@ (ǃ E@ (ǃ F@ (ȃ @@ (ȃ F@ (ȃ F@ (ȃ R@ (ȃ I@ (ȃ E@ (ȃ D@ (ȃ D@ (ȃ C@ (ȃ O@ (ȃ P@ (ȃ D@ (ȃ 9@ (ȃ H@ (ȃ C@ (ȃ P@ (Ƀ J@ (Ƀ G@ (Ƀ D@ (Ƀ C@ (Ƀ 8@ (Ƀ G@ (Ƀ J@ (Ƀ N@ (Ƀ K@ (Ƀ A@ (Ƀ C@ (Ƀ I@ (Ƀ N@ (Ƀ M@ (Ƀ @@ (Ƀ D@ (Ƀ P@ (ʃ B@ (ʃ F@ (ʃ F@ (ʃ M@ (ʃ I@ (ʃ C@ (ʃ E@ (ʃ @@ (ʃ .@ (ʃ G@ (ʃ L@ (ʃ R@ (ʃ L@ (ʃ B@ (ʃ >@ (ʃ G@ (˃ R@ (˃ J@ (˃ M@ (˃ B@ (˃ =@ (˃ D@ (˃ K@ (˃ 1@ (˃  (˃  (˃  (˃  (˃  (˃  (˃  (˃  (̃  (̃  (̃  (̃  (̃  (̃  (̃  (̃  (̃  (̃  (̃  (̃  (̃  (̃  (̃  (̃  (̃  (̓  (̓  (̓  (̓  (̓  (̓  (̓  (̓  (̓  (̓  (̓  (̓  (̓  (̓  (̓  (̓  (΃  j+ ëOw@ (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (ƒ  (ƒ  (ƒ  (ƒ  (ƒ  (ƒ  (ƒ  (ƒ  (ƒ  (ƒ  (ƒ  (ƒ  (ƒ  (ƒ  (ƒ  (ƒ  (à  (à  (à  (à  (à  (à  (à  (à  (à  (à  (à  (à  (à  (à  (à  (à  (ă  (ă  (ă  (ă  (ă  (ă  (ă  (ă  (ă  (ă  (ă  (ă  (ă  (ă  (ă  (ă  (ă  (Ń  (Ń  (Ń  (Ń  (Ń  (Ń  (Ń  (Ń  (Ń  (Ń  (Ń  (Ń  (Ń  (Ń  (Ń  (Ń  (ƃ  (ƃ  (ƃ  (ƃ  (ƃ  (ƃ  (ƃ  (ƃ )H_ȎB@ (ƃ ⪻W@ (ƃ tU@ (ƃ _`S@ (ƃ 48S6T@ (ƃ Z%,)T@ (ƃ |ET3T@ (ƃ eKu6T@ (ƃ ca4U@ (ƃ ρW9T@ (ǃ JhhS@ (ǃ 蘱_R@ (ǃ iAKaR@ (ǃ LXMwR@ (ǃ `*R@ (ǃ %8kS@ (ǃ Ϻ'sS@ (ǃ t*BS@ (ǃ וEqR@ (ǃ k18R@ (ǃ  pQ@ (ǃ lJPQ@ (ǃ BzljS@ (ǃ `S@ (ǃ qp0S@ (ǃ D#??S@ (ȃ 1רR@ (ȃ \jιQ@ (ȃ oAQ@ (ȃ  R@ (ȃ t9S@ (ȃ *w*R@ (ȃ AFR@ (ȃ C /R@ (ȃ 63 R@ (ȃ 6Q@ (ȃ -!DR@ (ȃ !R@ (ȃ &fDR@ (ȃ fx^Q@ (ȃ RaQ@ (ȃ (H:R@ (Ƀ yeR@ (Ƀ :ؼR@ (Ƀ mCQЊR@ (Ƀ 'q:ER@ (Ƀ 4%R@ (Ƀ Q@ (Ƀ sQ@ (Ƀ IK,WR@ (Ƀ rC,R@ (Ƀ JR@ (Ƀ y,6Q@ (Ƀ Q@ (Ƀ 渑Q@ (Ƀ s*=R@ (Ƀ q57!R@ (Ƀ xExR@ (Ƀ zybR@ (ʃ en'ĉR@ (ʃ گ4R@ (ʃ 2.Q@ (ʃ {Q@ (ʃ kXR@ (ʃ uR@ (ʃ LZER@ (ʃ ~7R@ (ʃ @R@ (ʃ yxR@ (ʃ 9o*'@ (ʃ  _@ (ʃ ^03@ (ʃ 8,'J@ (ʃ "@ (ʃ NO@ (ʃ ޙ@ (ʃ U[Š@ (ʃ *6@ (ʃ `sE@ (ʃ  ڞ@ (˃ 1{Bꙟ@ (˃ oD@ (˃ m@ (˃ 6l@ (˃ a#@ (˃ _@ (˃ {o@ (˃ :*3G@ (˃ O(!qԗ@ (˃ I(ҳo@ (˃ 0]^a@ (˃ ):3u@ (˃ ސR@ (˃ /~@ (˃  (˃  (̃  (̃  (̃  (̃  (̃  (̃  (̃  (̃  (̃  (̃  (̃  (̃  (̃  (̃  (̃  (̃  (̃  (̓  (̓  (̓  (̓  (̓  (̓  (̓  (̓  (̓  (̓  (̓  (̓  (̓  (̓  (̓  (̓  (΃  j+ ( Y@ ( Y@ ( Y@ (  ( Y@ (ĩ Y@ (̩ Y@ (ԩ Y@ (ܩ Y@ (䩡 Y@ (멡 Y@ (  (  ( Y@ ( Y@ (  ( Y@ (  (  ( Y@ (  (  (ɪ  (Ѫ  (٪  (᪡  (誡  (  (  (  (  (  (  (  ( Y@ (  (  (ྫ  (ƫ  (Ϋ  (֫  (ޫ Y@ (嫡  ( Y@ (  (  (  (  (  (  (  (  (  (໬  (ì  (ˬ  (Ӭ  (۬  (⬡  (ꬡ  ( Y@ ( Y@ ( Y@ ( Y@ ( Y@ ( Y@ ( Y@ ( Y@ ( Y@ (อ Y@ ( Y@ (ȭ Y@ (Э Y@ (ح Y@ (߭ Y@ (筡 Y@ (ﭡ  (  (  (  (  (  (  (  (  (൮  (  (Ů  (ͮ Y@ (ծ  (ܮ Y@ (䮡  (쮡  ( Y@ ( Y@ ( ? ( SFF@ ( wA%W@ (  IT@ ( 655U@ ( r8RW@ (ಯ  tlP@ ( =E@ (¯ Z.7IJ@ (ʯ ;wQ@ (ү Ge'R@ (ٯ /V^V@ (ᯡ k~E FV@ (鯡 J=8Q@ ( ׊J@ ( 6I@ ( !$2L@ ( cwQ@ ( [uxS@ (  1U@ ( xqS@ ( )vJM@ (௰ BedG@ ( BC*N@ ( 焵Q@ (ǰ GT@ (ϰ S@ (ְ  'R@ (ް 5c7K@ (氡 ņ6K@ ( K;iBN@ ( j_IWP@ ( JU@ ( ;NV@ ( ˀOR@ ( qt9O@ ( ]}J@ ( YQL@ (଱ |O@ ( -b3R@ ( TbpV@ (ı fJAۊR@ (̱ 외[-P@ (ӱ _ixbK@ (۱ |P@ (㱡 ؔQ@ (뱡 Q@ ( JFR@ ( }D=S@ ( pZJO@ ( h! J@ ( ҀP@ ( )eR@ ( o-R@ (ੲ WPHQ@ ( *O@ ( Yn,K@ ( z8CiM@ (ɲ (8ڝ9S@ (в LvB? ( Q+\o? ( pl:? ( 5'Ps? ( 5'Psr? (  ( @? ( o+? (໬ U{? (ì @&6s ? (ˬ o+? (Ӭ U{? (۬ ș? (⬡ 5'Ps? (ꬡ 镱^? ( ñ@ ( c^ 1@ ( !_"@ ( m۶m@ ( 1sm? ( &~f? ( : ą@ ( I-S@ ( s<@ (อ i5 Q@ ( LϺ@ (ȭ ޽x,YK@ (Э ^q F5@ (ح 2݅? (߭ qr? (筡 ,{D>? (ﭡ #qsm? ( u\? ( &~f? ( ù? ( Q+\o? ( O w? ( к? ( u\? ( к? (൮ z~X? ( u\? (Ů j? (ͮ E^ ? (ծ [9 ą? (ܮ BI .? (䮡 @&6s ? (쮡 +c? ( Ps ? ( h`|? ( &(O@ ( = OI@ ( ût8P@ ( D`P4N@ ( _6heN@ ( b:,&N@ (ಯ wN@ ( (N@ (¯ c]q I@ (ʯ 1qA@ (ү f=-K@ (ٯ ~ I@ (ᯡ o۶mJO@ (鯡 T%aN@ ( QgJ@ ( _E@ ( E@ ( !Q6C@ ( ggB@ ( GGM@ ( ("7M@ ( TQM@ (௰ _rF@ ( Q@@ ( jkD@ (ǰ VQ9I@ (ϰ XP M@ (ְ 瘬M@ (ް WHsN@ (氡  mC@ ( N5I@yF@ ( QQF@ ( e'GC@ ( ḯL@ ( R^Cy M@ ( ="'7N@ ( ~FQFK@ ( X B@ (଱ DF@ ( p^F@ ( _J@ (ı u)L@ (̱ ҶH@H@ (ӱ n/J@ (۱ kI@ (㱡 55wA@ (뱡 VF@ ( M@ ( Ǐ?N@ ( a.N@ ( Kd#D@ ( q=@ ( {J@ ( .=|K@ (ੲ )AJ@ ( 64eGF@ ( o`M@ ( 'F@ (ɲ E@ (в ˳VPH@ (ز z0K@ (ಡ /|L@ (財  5'K@ ( m۶m[I@ ( <%SF@ (  -Q?@ ( ZFG@ ( `kFK@ ( J@ ( D"4D@ (঳ 9wCJ@ ( V+M@ ( aelK@ ( 'F@ (Ƴ $I$B@ (ͳ ʗ |yF@ (ճ I-SE@ (ݳ -nK@ (峡 #ECqSM@ ( 7'K`K@ ( ErC@ (  ^ }_C@ ( SOor3@ ( l!}-DI@ ( sYC@ ( K]I@ (ࣴ PV_F@ ( Vy1@ ( H>@ ( q[D@@ (ô  (ʴ  (Ҵ  (ڴ  (ⴡ  (괡  (  (  (  (  (  (  (࠵  (  (  (  (  (ǵ  (ϵ  (׵ Q+\o? (ߵ  (絡  (ﵡ  (  (  (  (  (  (  (  (  (  (  (Ķ  (̶  (Զ  j* $IP@ (  (  (  (  (  (  (Ȑ  (А  (ؐ  (  (  (  (  (  (  (  (  (  (  (  (  (ད  (ő  (͑  (Ց  (ݑ  (  (  (  (  (  (  (  (  (  (  (  (ຒ  (’  (ʒ  (Ғ  (ڒ  (  (  (  (  (  (  (  (  (  (  (  (ී  (࿓  (Ǔ  (ϓ  (ד  (ߓ  (  (  (  (  (  (  (  (  (  (  (ഔ  (༔  (Ĕ  (̔  (Ԕ  (ܔ  (  (  (  (  (  (  (  (  (  (  (ౕ  (๕  (  (ɕ  (ѕ  (ٕ  (  (  (  ( H@ ( n۶mۮS@ ( $I$yi@ ( $I$r@ ( I$Io@ ( I$I`c@ ( ۶m۶T@ (஖ ۶m۶W@ (ඖ $I$me@ ( I$In@ (Ɩ I$IV@ (Ζ $I$i@ (֖ I$Ia@ (ݖ U@ ( %I$I:W@ ( m۶mb@ ( m۶me@ ( m۶m a@ ( I$Ic@ ( n۶m`@ ( I$IU@ ( n۶m>Z@ ( m۶m[@ (૗ 0c@ (೗ I$IPg@ ( n۶mێ`@ (× $I$`@ (˗ m۶ma@ (ӗ %I$I^@ (ڗ _@ ( %I$Ie@ ( I$Ib@ ( m۶mc@ ( %I$I\@ ( 0^@ ( m۶m]@ ( m۶mOb@ ( $I$`@ ( Tc@ (ਘ ۶m۶a@ (ఘ %I$IV@ ( m۶mC[@ ( m۶m^@ (Ș n۶m:d@ (И m۶ma@ (ט $`@ (ߘ I$I`@ ( I$If@ ( m۶m^@ ( I$I_@ ( n۶mne@ ( I$IY@ ( ۶m۶e^@ ( I$I@ ( !;-Z;9@ ( /ŲSL.@ ( 6pg @ (஖ j;!@ (ඖ +?ԡ0@ ( j/b8@ (Ɩ H̰i "@ (Ζ JkE4@ (֖ 07-e+@ (ݖ }= @ ( R~|?"@ (  5'-@ ( wT1@ ( -'*@ ( c'-@ ( Wkrh)@ (  @ ( 4=$@ ( 3~3%@ (૗ l:.@ (೗ QU2@ ( <˜)@ (× نs)@ (˗ Qf>+@ (ӗ }x '@ (ڗ r:(@ ( 90L1@ (  y,,@ ( ? (Ò ?)7ͳ? (˒ 4? (Ӓ \m2*? (ے Tm? ( Vn}? ( ִe? ( FK$? ( K? ( 1u? ( KZ",Y? ( *? ( >wr? ( 3O&? ( Rp=? ( 'h? ( zaz? ( G%? (ȓ 9#Is? (Г {g? (ؓ e? ( SR7X ? ( 'E)? ( h? ( >8d? ( O"\~? ( үl? ( s(? ( .M? ( ]^? ( êH? ( itW? ( bN)R? (པ 2q? (Ŕ ? (͔ 6/? (Ք _? (ݔ j? ( 'H? ( m.鉍L? ( >HA? ( 'Y*? ( ܃(a? ( %!? ( Efcr? ( ,sԈG? ( ]0? ( {Iaž? ( l-? (ຕ '? (•  HG? (ʕ ~S? (ҕ ? (ڕ p$? ( #(HX? ( 5r(? ( K? ( ? ( ܒ]R? ( wጋ? ( {l? ( 0ǎ? ( _V6? ( p7괼? ( áw? (ූ 8ݡE? (࿖ ? ( 6a? ( V? ( `? ( %? ( nJ6I? ( BQ1oo@ ( WS~? ( 6j\? ( 6nu? ( O? ( 5l3? ( Ay#? ( \2^? ( )Ԩ{? ( x+4z7? ( 8? ( (y? (  ? ( Z? ( '/{b? ( K=Q ? ( ΉkN? ( Äkh? ( G0? ( 7? ( Cc}? ( Beu? ( IUpk? ( oR|? ( C=N=? ( ؽu? ( ^d)s? ( $>"? (  \? ( v:c\? ( b:s? ( ݹ|(? ( ,|0? ( Ql3k? ( 3l[? ( :Wl? ( _ÎQ? ( V$M? ( ]QT? ( ? e? ( 5z? ( 0l߇? ( t~? ( /-? ( uWl? ( Y Ё? ( &? ( -V? ( (etU? ( 熪.}? ( Ne? (  ? ( 7E? ( YaU3? ( 0΍ ? ( MC ? ( ¯? ( =B\? ( IVl ? ( ~A^? ( R" ? ( z{Q? (  Ho? ( \2j? ( >? ( "? ( _? ( r? ( 40ź? ( ](4Z[? ( KXhf? ( εK? ( T*2? (  ^? ( Jôr? ( w!? ( r ? ( Rn? ( [? ( GW? (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (Ā  (̀  (Ԁ  (܀  (  (  (  (  (  (  j+  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (Ā  (̀  (Ԁ  (܀  (  (  (  (  (  (  j+ (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  ( ۻ@ ( \W7s*@ ( Ю@ ( @zw@ ( 91m;@ ( 4~*@ ( $DTv@ ( \S` @ ( s<@ ( @ ( P% @ ( l,1@ ( y @ ( o$@ ( .lC@ ( 5@ ( %9w-@ ( 2@ ( @ ( %A\9%@ ( 2@ ( d@ ( s.* @ ( ims@ ( j>J@ ( 2㫦|@ ( ;s LW@ ( ɒ@ ( AG@ (  @ ( ebx>@ ( E@ ( ( @ ( J@ ( H:S@ ( ZLg1}@ ( . $ @ ( 6d0l @ ( @#@ ( Ǭy @ ( l @ ( ~L:@ ( gU@ ( $;} @ ( Rۊc@ ( m0'@ ( 3k#E@ (  e@ ( c\? ( /ތ@ ( c&@ ( C8@ ( &bI!@ ( Z -/ @ ( C @ ( 2$@ ( Q+@ ( BB7? ( ̗ @ ( Ջ5@ ( $"@ ( ҎC @ ( ۚw? @ ( PW? ( ?<=? ( ,@ ( ^y@ ( \p|@ ( Q * @ ( @] @ ( Z]U@ ( Ǯ@ ( ֺ@ ( 鱱 @ ( >n1@ ( { @ (  CH@ ( 5V@ ( &ݟ@ (  Z @ ( Sȷ@ ( 0q>@ ( HJ @ (  W~@ ( ǂnX? ( 'J;? ( jK㵫? ( TFS? ( >Y\R? (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (Ā  (̀  (Ԁ  (܀  (  (  (  (  (  (  j+  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (Ā  (̀  (Ԁ  (܀  (  (  (  (  (  (  j+ (՟  (՟  (՟  (՟  (՟  (՟  (՟  (՟  (՟  (֟  (֟  (֟  (֟  (֟  (֟  (֟  (֟  (֟  (֟  (֟  (֟  (֟  (֟  (֟  (֟  (֟  (ן  (ן  (ן  (ן  (ן  (ן  (ן  (ן  (ן  (ן  (ן  (ן  (ן  (ן  (ן  (ן  (؟  (؟  (؟  (؟  (؟  (؟  (؟  (؟  (؟  (؟  (؟  (؟  (؟  (؟  (؟  (؟  (ٟ  (ٟ  (ٟ  (ٟ  (ٟ  (ٟ  (ٟ  (ٟ  (ٟ  (ٟ  (ٟ  (ٟ  (ٟ  (ٟ  (ٟ  (ٟ  (ٟ  (ڟ  (ڟ  (ڟ  (ڟ  (ڟ  (ڟ  (ڟ  (ڟ  (ڟ  (ڟ  (ڟ  (ڟ  (ڟ  (ڟ  (ڟ  (ڟ  (۟  (۟  (۟  (۟  (۟  (۟  (۟ 9TA֏P@ (۟ NnI@ (۟ G5N@ (۟ "\E;N@ (۟  7|L@ (۟ $R3AN@ (۟ 8ꆷP@ (۟ Q2L}P@ (۟ ;P@ (ܟ ĭbVN@ (ܟ jwnP@ (ܟ U1ʦO@ (ܟ 5P@ (ܟ rO@ (ܟ W.l8O@ (ܟ ,DTOO@ (ܟ `7P@ (ܟ 5g"P@ (ߟ G1P@ (ߟ G L$MO@ (ߟ cX, P@ (ߟ 0hO@ (ߟ OP@ (ߟ ނo5O@ ( Tr!VP@ ( ,~O@ ( %}tRP@ ( ,3RrO@ ( 5O@ ( ],cO@ ( MAe O@ ( (O@ ( hO@ ( 6P^O@ ( |gQ@ (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  (  j T@ j  3FPi'_B j xA j  Mp G@ j ϜwrNPB j uA j )J @ j yؚ? j jkO@ j W?@ j 퉓 @ j uY!@ j W3Z*@ j z2vnw@ j Wk!-@ j (<7@ j W? j (<7? j -sN? j \[.? j \[.? j W?@ j (<7>@ j  j  j  j  j  j  j  j  j  j  j  j  j  j  j  j  j  j  j  j  j \@ j @ j (<7>@ j sS@ j MM!XB@ j 6vq@ j Eq@ j  j *Gꤨ9? j ky? j G>F@ j ?p? j s?pHH@ j @iU@ j Ϛ? j  j  @ j (@ j pF5"@ j vI? j( ((  ((  ((  ((  ((  (Ф(  (ग(  (𤗙(  ((  ((  ((  ((  ((  (Х(  (ॗ(  (𥗙(  ((  ((  ((  (( j( ((  ((  ((  ((  ((  (Ф( (ग(  (𤗙(  ((  ((  ((  ((  ((  (Х(  (ॗ(  (𥗙(  ((  ((  ((  (( jx( ((  ((  ((  (𥗙(  ((  (( j ( j( (( (( (( (( (( (Ф( (ग( (𤗙( (( (( (( (( (( (Х( (ॗ( (𥗙( (( (( (( (( j ? j @ j l@ j  j  j ( j ( pz ComputeWorkloadAnalysisCompute Workload Analysis"? 'sm__inst_executed.avg.per_cycle_elapsedExecuted Ipc ElapsedF ;sm__instruction_throughput.avg.pct_of_peak_sustained_activeSM Busy= &sm__inst_executed.avg.per_cycle_activeExecuted Ipc ActiveD 0sm__inst_issued.avg.pct_of_peak_sustained_activeIssue Slots Busy9 $sm__inst_issued.avg.per_cycle_activeIssued Ipc Active * *  %Pipe Utilization (% of active cycles)" Utilization [%]d*B ;sm__pipe_alu_cycles_active.avg.pct_of_peak_sustained_activeALU*B ;sm__pipe_fma_cycles_active.avg.pct_of_peak_sustained_activeFMA*D sm__pipe_tensor_cycles_active.avg.pct_of_peak_sustained_active Tensor (All)*c Fsm__pipe_tensor_op_dmma_cycles_active.avg.pct_of_peak_sustained_active Tensor (DP)2 PPZZ*U Fsm__pipe_tensor_op_hmma_cycles_active.avg.pct_of_peak_sustained_active Tensor (FP)*Z Fsm__pipe_tensor_op_imma_cycles_active.avg.pct_of_peak_sustained_active Tensor (INT)2H* >sm__pipe_shared_cycles_active.avg.pct_of_peak_sustained_activeShared (FP64+FP16+Tensor)2FF:\ >sm__pipe_shared_cycles_active.avg.pct_of_peak_sustained_activeShared (FP16+Tensor)HK:d >sm__pipe_shared_cycles_active.avg.pct_of_peak_sustained_activeShared (FP64+Tensor) PPZZ*H ;sm__pipe_tma_cycles_active.avg.pct_of_peak_sustained_activeTMA2ZZ08@JPipeline utilization based on the number of cycles the pipeline was active. This takes the rates of different instructions executing on the pipeline into account. For an instruction requiring 4 cycles to complete execution, the counter is increased by 1 for 4 cycles.  2Pipe Utilization (% of peak instructions executed)" Utilization [%]d*B ;sm__inst_executed_pipe_adu.avg.pct_of_peak_sustained_activeADU*B ;sm__inst_executed_pipe_alu.avg.pct_of_peak_sustained_activeALU*B ;sm__inst_executed_pipe_cbu.avg.pct_of_peak_sustained_activeCBU*B ;sm__inst_executed_pipe_fma.avg.pct_of_peak_sustained_activeFMA* Table with NUMA IDs based on CPU affinity and memory affinity.2 , numa__dev_display_name_all Device Names  $ numa__cpu_affinity CPU Affinity  ) numa__id_cpuNUMA ID by CPU Affinity  / numa__id_memoryNUMA ID by Memory Affinity :_Non-uniform memory access (NUMA) affinities based on compute and memory distances for all GPUs.B fullZFFKVY Nvlink_Tables NVLink Tables>*O  Logical NVLink Properties  Logical NVLink Throughput NVLink Tables:0Detailed tables with properties for each NVLink.B fullB nvlinkJNvlink Nvlink_TopologyNVLink Topology=*)  NVLink TopologyNVLink Topology:ZNVLink Topology diagram shows logical NVLink connections with transmit/receive throughput.B fullB nvlinkJNvlink Occupancy OccupancyP"? &sm__maximum_warps_per_active_cycle_pctTheoretical Occupancy: !launch__occupancy_limit_registersBlock Limit RegistersI &sm__maximum_warps_avg_per_active_cycleTheoretical Active Warps per SM< "launch__occupancy_limit_shared_memBlock Limit Shared MemO 1sm__warps_active.avg.pct_of_peak_sustained_activeAchieved Occupancy2"2 launch__occupancy_limit_warpsBlock Limit WarpsM %sm__warps_active.avg.per_cycle_activeAchieved Active Warps Per SM2"0 launch__occupancy_limit_blocksBlock Limit SM: "launch__cluster_max_potential_sizeMax Cluster Size2Z5 launch__cluster_max_activeMax Active Clusters2Z6 launch__occupancy_cluster_pctCluster Occupancy2Z> !launch__occupancy_cluster_gpu_pctOverall GPU Occupancy2Z* " +Impact of Varying Register Count Per Thread Registers Per Thread!( Warp Occupancy("K $launch__occupancy_per_register_count!Warp Occupancy Per Register Count *6 4 launch__registers_per_threadRegisters Per Thread " Impact of Varying Block Size Block Size!( Warp Occupancy("C launch__occupancy_per_block_sizeWarp Occupancy Per Block Size *" launch__block_size Block Size " /Impact of Varying Shared Memory Usage Per Block! Shared Memory Per Block!( Warp Occupancy("M %launch__occupancy_per_shared_mem_size"Warp Occupancy Per Shared Mem Size *9 7 launch__shared_mem_per_blockShared Memory Per Block " Impact of Varying Cluster Size" Potential Cluster Size( Active Clusters("7 "launch__occupancy_per_cluster_sizeActive Clusters *& $ launch__cluster_size Cluster Size Z2 H %smsp__warps_active.avg.peak_sustainedGPU Maximum Warps Per Scheduler K (smsp__maximum_warps_avg_per_active_cycleTheoretical Warps Per Scheduler:Occupancy is the ratio of the number of active warps per multiprocessor to the maximum number of possible active warps. Another way to view occupancy is the percentage of the hardware's ability to process warps that is actively in use. Higher occupancy does not always result in higher performance, however, low occupancy always reduces the ability to hide latencies, resulting in overall performance degradation. Large discrepancies between the theoretical and the achieved occupancy during execution typically indicates highly imbalanced workloads.B basicB detailedB fullZ"- PmSampling PM Sampling*)  Compute"r"p ;TPC.TriageA.tpc__warps_active_realtime.avg.per_cycle_activeAverage Active Warps Per CycleJsampling_compute1"p"n ;TPC.TriageA.tpc__warps_active_realtime.sum.per_cycle_activeTotal Active Warps Per CycleJsampling_compute1"I"G SM.TriageA.sm__cycles_active.avgSM Active CyclesJsampling_compute1"f"d :SM.TriageA.sm__inst_executed_realtime.avg.per_cycle_activeExecuted Ipc ActiveJsampling_compute2 L1 Cache"V"T *SM.TriageA.l1tex__lsu_writeback_active.avgWriteback ThroughputJsampling_memory1"M"K -TriageA.l1tex__t_sector_hit_rate_realtime.pctHit RateJsampling_memory2"W"U .SM.TriageA.l1tex__data_pipe_lsu_wavefronts.avgWavefronts (Data)Jsampling_memory1 KK   Overview"H"F pmsampling:sm__ctas_launched.sumBlocks LaunchedJsampling_compute1"I"G pmsampling:sm__cycles_active.avgSM Active CyclesJsampling_compute1"f"d :pmsampling:sm__inst_executed_realtime.avg.per_cycle_activeExecuted Ipc ActiveJsampling_compute2 SM"m"k Gpmsampling:sm__inst_executed_realtime.avg.pct_of_peak_sustained_elapsed SM ThroughputJsampling_compute2""} Ppmsampling:sm__inst_executed_pipe_alu_realtime.avg.pct_of_peak_sustained_elapsedSM ALU Pipe ThroughputJsampling_compute2"" Spmsampling:sm__pipe_tensor_cycles_active_realtime.avg.pct_of_peak_sustained_elapsedSM Tensor Pipe ThroughputJsampling_compute2 DRAM"e"c >pmsampling:dramc__throughput.avg.pct_of_peak_sustained_elapsedDRAM ThroughputJsampling_memory5"o"m Cpmsampling:dramc__read_throughput.avg.pct_of_peak_sustained_elapsedDRAM Read ThroughputJsampling_memory5"q"o Dpmsampling:dramc__write_throughput.avg.pct_of_peak_sustained_elapsedDRAM Write ThroughputJsampling_memory5 L1 Cache"t"r Hpmsampling:l1tex__lsu_writeback_active.avg.pct_of_peak_sustained_elapsedWriteback ThroughputJsampling_memory1"G"E 'pmsampling:l1tex__t_sector_hit_rate.pctHit RateJsampling_memory3"W"U .pmsampling:l1tex__data_pipe_lsu_wavefronts.avgWavefronts (Data)Jsampling_memory4 PP  Overview"o"m 8TriageAC.tpc__warps_active_realtime.avg.per_cycle_activeAverage Active Warps Per CycleJsampling_compute1"m"k 8TriageAC.tpc__warps_active_realtime.sum.per_cycle_activeTotal Active Warps Per CycleJsampling_compute1"V"T .FE_B.TriageAC.gr__ctas_launched_queue_sync.sumBlocks LaunchedJsampling_compute1"L"J #SM_A.TriageAC.sm__cycles_active.avgSM Active CyclesJsampling_compute2"i"g =SM_A.TriageAC.sm__inst_executed_realtime.avg.per_cycle_activeExecuted Ipc ActiveJsampling_compute2 SM"`"^ :TriageSCG.sm__throughput.avg.pct_of_peak_sustained_elapsed SM ThroughputJsampling_compute3"" TSM_A.TriageSCG.sm__inst_executed_pipe_alu_realtime.avg.pct_of_peak_sustained_elapsedSM ALU Pipe ThroughputJsampling_compute3"" QSM_C.TriageSCG.smsp__inst_executed_pipe_fmalite.avg.pct_of_peak_sustained_elapsedSM FMA Light Pipe ThroughputJsampling_compute3"" RSM_C.TriageSCG.smsp__inst_executed_pipe_fmaheavy.avg.pct_of_peak_sustained_elapsedSM FMA Heavy Pipe ThroughputJsampling_compute3"" Spmsampling:sm__pipe_tensor_cycles_active_realtime.avg.pct_of_peak_sustained_elapsedSM Tensor Pipe ThroughputJsampling_compute3 DRAM"i"g BFBSP.TriageSCG.dramc__throughput.avg.pct_of_peak_sustained_elapsedDRAM ThroughputJsampling_memory3"r"p GFBSP.TriageSCG.dramc__read_throughput.avg.pct_of_peak_sustained_elapsedDRAM Read BandwidthJsampling_memory3"t"r HFBSP.TriageSCG.dramc__write_throughput.avg.pct_of_peak_sustained_elapsedDRAM Write BandwidthJsampling_memory3 L2 Cache"d"b ?LTS.TriageSCG.lts__throughput.avg.pct_of_peak_sustained_elapsed L2 ThroughputJsampling_memory4"\"Z 9LTS.TriageSCG.lts__average_t_sector_hit_rate_realtime.pct L2 Hit RateJsampling_memory4 L1 Cache"g"e BSM_A.TriageSCG.l1tex__throughput.avg.pct_of_peak_sustained_elapsed L1 ThroughputJsampling_memory1"Y"W -SM_A.TriageAC.l1tex__lsu_writeback_active.avgWriteback ThroughputJsampling_memory1"J"H *SM_B.TriageAC.l1tex__t_sector_hit_rate.pctHit RateJsampling_memory2"Z"X 1SM_A.TriageAC.l1tex__data_pipe_lsu_wavefronts.avgWavefronts (Data)Jsampling_memory1 VY   Overview"H"F pmsampling:sm__ctas_launched.sumBlocks LaunchedJsampling_compute1"I"G pmsampling:sm__cycles_active.avgSM Active CyclesJsampling_compute1"f"d :pmsampling:sm__inst_executed_realtime.avg.per_cycle_activeExecuted Ipc ActiveJsampling_compute2 SM"m"k Gpmsampling:sm__inst_executed_realtime.avg.pct_of_peak_sustained_elapsed SM ThroughputJsampling_compute2""} Ppmsampling:sm__inst_executed_pipe_alu_realtime.avg.pct_of_peak_sustained_elapsedSM ALU Pipe ThroughputJsampling_compute2"" Spmsampling:sm__pipe_tensor_cycles_active_realtime.avg.pct_of_peak_sustained_elapsedSM Tensor Pipe ThroughputJsampling_compute2 DRAM"e"c >pmsampling:dramc__throughput.avg.pct_of_peak_sustained_elapsedDRAM ThroughputJsampling_memory5"o"m Cpmsampling:dramc__read_throughput.avg.pct_of_peak_sustained_elapsedDRAM Read ThroughputJsampling_memory5"q"o Dpmsampling:dramc__write_throughput.avg.pct_of_peak_sustained_elapsedDRAM Write ThroughputJsampling_memory5 L1 Cache"G"E 'pmsampling:l1tex__t_sector_hit_rate.pctHit RateJsampling_memory3"W"U .pmsampling:l1tex__data_pipe_lsu_wavefronts.avgWavefronts (Data)Jsampling_memory4 ZZ:Timeline view of PM metrics sampled periodically over the workload duration. Data is collected across multiple passes. Use this section to understand workload behavior changes over its runtime.B fullb sampling_compute1b sampling_compute2b sampling_compute3b sampling_compute4b sampling_memory1b sampling_memory2b sampling_memory3b sampling_memory4b sampling_memory5b sampling_memory6b sampling_memory7 SchedulerStatsScheduler Statistics("E 'smsp__warps_active.avg.per_cycle_activeActive Warps Per SchedulerA 2smsp__issue_inst0.avg.pct_of_peak_sustained_active No EligibleI )smsp__warps_eligible.avg.per_cycle_activeEligible Warps Per SchedulerK 3smsp__issue_active.avg.pct_of_peak_sustained_activeOne or More EligibleD 'smsp__issue_active.avg.per_cycle_activeIssued Warp Per Scheduler *  Warps Per Scheduler"*H %smsp__warps_active.avg.peak_sustainedGPU Maximum Warps Per Scheduler*K (smsp__maximum_warps_avg_per_active_cycleTheoretical Warps Per Scheduler*E 'smsp__warps_active.avg.per_cycle_activeActive Warps Per Scheduler*I )smsp__warps_eligible.avg.per_cycle_activeEligible Warps Per Scheduler*D 'smsp__issue_active.avg.per_cycle_activeIssued Warp Per Scheduler2Q O %smsp__warps_active.avg.peak_sustained&Theoretical Active Warps Per Scheduler:Summary of the activity of the schedulers issuing instructions. Each scheduler maintains a pool of warps that it can issue instructions for. The upper bound of warps in the pool (Theoretical Warps) is limited by the launch configuration. On every cycle each scheduler checks the state of the allocated warps in the pool (Active Warps). Active warps that are not stalled (Eligible Warps) are ready to issue their next instruction. From the set of eligible warps the scheduler selects a single warp from which to issue one or more instructions (Issued Warp). On cycles with no eligible warps, the issue slot is skipped and no instruction is issued. Having many skipped issue slots indicates poor latency hiding.B full@ SourceCountersSource Countersd"8 !smsp__inst_executed_op_branch.sumBranch InstructionsP 5smsp__sass_average_branch_targets_threads_uniform.pctBranch Efficiency2"G *derived__smsp__inst_executed_op_branch_pctBranch Instructions RatioP /smsp__sass_branch_targets_threads_divergent.avgAvg. Divergent Branches2"* * N:L !Warp Stall Sampling (All Samples)' %group:smsp__pcsamp_warp_stall_reasons M:D Most Instructions Executed& inst_executedInstructions Executed "Hotspot Locations:Source metrics, including branch efficiency and sampled warp stall reasons. Warp Stall Sampling metrics are periodically sampled over the kernel runtime. They indicate when warps were stalled and couldn't be scheduled. See the documentation for a description of all stall reasons. Only focus on stalls if the schedulers fail to issue every cycle.B detailedB fullR D derived__avg_thread_executed$thread_inst_executed / inst_executed N !derived__avg_thread_executed_true)thread_inst_executed_true / inst_executed  7derived__memory_l2_theoretical_sectors_global_excessiveQmemory_l2_theoretical_sectors_global - memory_l2_theoretical_sectors_global_ideal q .derived__memory_l1_wavefronts_shared_excessive?memory_l1_wavefronts_shared - memory_l1_wavefronts_shared_ideal W (derived__memory_l1_conflicts_shared_nway+memory_l1_wavefronts_shared / inst_executed i *derived__smsp__inst_executed_op_branch_pct;smsp__inst_executed_op_branch.sum / smsp__inst_executed.sumb2 group_inst_executedInstructions Executedb* group_memory_access Memory Accessb group_l1L1 Cacheb group_l2L2 Cacheb; group_warp_stall_aggregatedWarp Stalls Aggregatedb$ group_warp_stall Warp Stallb= group_warp_stall_not_issuedWarp Stalls (Not Issued)j2 g %group:smsp__pcsamp_warp_stall_reasons!Warp Stall Sampling (All Samples)Jgroup_warp_stall_aggregated  0group:smsp__pcsamp_warp_stall_reasons_not_issued(Warp Stall Sampling (Not-issued Samples)BJgroup_warp_stall_aggregated I smsp__pcsamp_sample_count # SamplesBJgroup_warp_stall_aggregated G inst_executedInstructions Executed2"BJgroup_inst_executed O thread_inst_executedThread Instructions Executed2"Jgroup_inst_executed b thread_inst_executed_true*Predicated-On Thread Instructions Executed2"Jgroup_inst_executed T derived__avg_thread_executedAvg. Threads ExecutedB Jgroup_inst_executed g !derived__avg_thread_executed_true#Avg. Predicated-On Threads ExecutedB Jgroup_inst_executed W &smsp__branch_targets_threads_divergentDivergent Branches2"Jgroup_inst_executed 7 memory_type Address Space2"Jgroup_memory_access A memory_access_typeAccess Operation2"Jgroup_memory_access A memory_access_size_type Access Size2"Jgroup_memory_access , l1tex__cycles_active.sumL1 Active Cycles . l1tex__cycles_elapsed.sumL1 Elapsed Cycles G memory_l1_tag_requests_globalL1 Tag Requests Global2"Jgroup_l1 U (derived__memory_l1_conflicts_shared_nwayL1 Conflicts Shared N-WayBJgroup_l1 ` .derived__memory_l1_wavefronts_shared_excessiveL1 Wavefronts Shared ExcessiveBJgroup_l1 C memory_l1_wavefronts_sharedL1 Wavefronts Shared2"Jgroup_l1 O !memory_l1_wavefronts_shared_idealL1 Wavefronts Shared Ideal2"Jgroup_l1 * lts__cycles_active.sumL2 Active Cycles , lts__cycles_elapsed.sumL2 Elapsed Cycles r 7derived__memory_l2_theoretical_sectors_global_excessive'L2 Theoretical Sectors Global ExcessiveBJgroup_l2 U $memory_l2_theoretical_sectors_globalL2 Theoretical Sectors Global2"Jgroup_l2 a *memory_l2_theoretical_sectors_global_ideal#L2 Theoretical Sectors Global Ideal2"Jgroup_l2 S #memory_l2_theoretical_sectors_localL2 Theoretical Sectors Local2"Jgroup_l2 d 4smsp__sass_inst_executed_memdesc_explicit_evict_typeL2 Explicit Evict Policies2P"Jgroup_l2 u =smsp__sass_inst_executed_memdesc_explicit_hitprop_evict_first"L2 Explicit Hit Policy Evict First2P"Jgroup_l2 s smsp__sass_inst_executed_memdesc_explicit_hitprop_evict_normal#L2 Explicit Hit Policy Evict Normal2P"Jgroup_l2  Esmsp__sass_inst_executed_memdesc_explicit_hitprop_evict_normal_demote*L2 Explicit Hit Policy Evict Normal Demote2P"Jgroup_l2 w >smsp__sass_inst_executed_memdesc_explicit_missprop_evict_first#L2 Explicit Miss Policy Evict First2P"Jgroup_l2 y ?smsp__sass_inst_executed_memdesc_explicit_missprop_evict_normal$L2 Explicit Miss Policy Evict Normal2P"Jgroup_l2 K (smsp__pcsamp_warps_issue_stalled_barrier stall_barrierJgroup_warp_stall ] 1smsp__pcsamp_warps_issue_stalled_branch_resolvingstall_branch_resolvingJgroup_warp_stall S /smsp__pcsamp_warps_issue_stalled_dispatch_stallstall_dispatchJgroup_warp_stall G &smsp__pcsamp_warps_issue_stalled_drain stall_drainJgroup_warp_stall H )smsp__pcsamp_warps_issue_stalled_imc_miss stall_imcJgroup_warp_stall J ,smsp__pcsamp_warps_issue_stalled_lg_throttlestall_lgJgroup_warp_stall S 0smsp__pcsamp_warps_issue_stalled_long_scoreboard stall_long_sbJgroup_warp_stall S 3smsp__pcsamp_warps_issue_stalled_math_pipe_throttle stall_mathJgroup_warp_stall I 'smsp__pcsamp_warps_issue_stalled_membar stall_membarJgroup_warp_stall L -smsp__pcsamp_warps_issue_stalled_mio_throttle stall_mioJgroup_warp_stall E %smsp__pcsamp_warps_issue_stalled_misc stall_miscJgroup_warp_stall S 0smsp__pcsamp_warps_issue_stalled_no_instructions stall_no_instJgroup_warp_stall U -smsp__pcsamp_warps_issue_stalled_not_selectedstall_not_selectedJgroup_warp_stall M )smsp__pcsamp_warps_issue_stalled_selectedstall_selectedJgroup_warp_stall U 1smsp__pcsamp_warps_issue_stalled_short_scoreboardstall_short_sbJgroup_warp_stall J )smsp__pcsamp_warps_issue_stalled_sleeping stall_sleepJgroup_warp_stall L -smsp__pcsamp_warps_issue_stalled_tex_throttle stall_texJgroup_warp_stall E %smsp__pcsamp_warps_issue_stalled_wait stall_waitJgroup_warp_stall n 3smsp__pcsamp_warps_issue_stalled_barrier_not_issuedstall_barrier (Not Issued)Jgroup_warp_stall_not_issued  smsp__pcsamp_warps_issue_stalled_math_pipe_throttle_not_issuedstall_math (Not Issued)Jgroup_warp_stall_not_issued l 2smsp__pcsamp_warps_issue_stalled_membar_not_issuedstall_membar (Not Issued)Jgroup_warp_stall_not_issued o 8smsp__pcsamp_warps_issue_stalled_mio_throttle_not_issuedstall_mio (Not Issued)Jgroup_warp_stall_not_issued h 0smsp__pcsamp_warps_issue_stalled_misc_not_issuedstall_misc (Not Issued)Jgroup_warp_stall_not_issued v ;smsp__pcsamp_warps_issue_stalled_no_instructions_not_issuedstall_no_inst (Not Issued)Jgroup_warp_stall_not_issued x 8smsp__pcsamp_warps_issue_stalled_not_selected_not_issuedstall_not_selected (Not Issued)Jgroup_warp_stall_not_issued p 4smsp__pcsamp_warps_issue_stalled_selected_not_issuedstall_selected (Not Issued)Jgroup_warp_stall_not_issued x 6gpu__dram_throughput.avg.pct_of_peak_sustained_elapsedKV:< 6gpu__dram_throughput.avg.pct_of_peak_sustained_elapsedY #dram__cycles_elapsed.avg.per_secondDRAM Frequency2F:+ #dram__cycles_elapsed.avg.per_secondKV:) #dram__cycles_elapsed.avg.per_secondY*  GPU Throughput"" Speed Of Light (SOL) [%]d *D 0sm__throughput.avg.pct_of_peak_sustained_elapsedCompute (SM) [%]*N @gpu__compute_memory_throughput.avg.pct_of_peak_sustained_elapsed Memory [%]GPU Throughput Chart* * } { Compute Throughput Breakdown*U :breakdown:sm__throughput.avg.pct_of_peak_sustained_elapsed{UNIT}: {COMPOUND_NAME}08   Memory Throughput Breakdown*e Jbreakdown:gpu__compute_memory_throughput.avg.pct_of_peak_sustained_elapsed{UNIT}: {COMPOUND_NAME}08GPU Throughput Breakdown28 6 launch__waves_per_multiprocessor Waves Per SM2":High-level overview of the throughput for compute and memory resources of the GPU. For each unit, the throughput reports the achieved percentage of utilization with respect to the theoretical maximum. Breakdowns show the throughput for each individual sub-metric of Compute and Memory to clearly identify the highest contributor.B basicB detailedB fullB roofline SpeedOfLight_RooflineChart!GPU Speed Of Light Roofline Chart * 2 "Floating Point Operations Roofline" Arithmetic Intensity [FLOP/byte] Performance [FLOP/s]"  f 9derived__sm__sass_thread_inst_executed_op_ffma_pred_on_x2)Theoretical Predicated-On FFMA Operations1 !sm__cycles_elapsed.avg.per_second SM Frequency  dram__bytes.sum.peak_sustained!Theoretical DRAM Bytes Accessible2F:& dram__bytes.sum.peak_sustainedKV:$ dram__bytes.sum.peak_sustainedY #dram__cycles_elapsed.avg.per_secondDRAM Frequency2F:+ #dram__cycles_elapsed.avg.per_secondKV:) #dram__cycles_elapsed.avg.per_secondY Single Precision Roofline"  f 9derived__sm__sass_thread_inst_executed_op_dfma_pred_on_x2)Theoretical Predicated-On DFMA Operations1 !sm__cycles_elapsed.avg.per_second SM Frequency  dram__bytes.sum.peak_sustained!Theoretical DRAM Bytes Accessible2F:& dram__bytes.sum.peak_sustainedKV:$ dram__bytes.sum.peak_sustainedY #dram__cycles_elapsed.avg.per_secondDRAM Frequency2F:+ #dram__cycles_elapsed.avg.per_secondKV:) #dram__cycles_elapsed.avg.per_secondY Double Precision Roofline*   Esmsp__sass_thread_inst_executed_op_fadd_pred_on.sum.per_cycle_elapsed9Predicated-On FADD Thread Instructions Executed Per Cycle  Esmsp__sass_thread_inst_executed_op_fmul_pred_on.sum.per_cycle_elapsed9Predicated-On FMUL Thread Instructions Executed Per Cycle f ;derived__smsp__sass_thread_inst_executed_op_ffma_pred_on_x2'Predicated-On FFMA Operations Per Cycle3 #smsp__cycles_elapsed.avg.per_second SM Frequencyxv dram__bytes.sum.per_secondDRAM Bandwidth2F:" dram__bytes.sum.per_secondKV: dram__bytes.sum.per_secondY! Single Precision Achieved Value*   Esmsp__sass_thread_inst_executed_op_dadd_pred_on.sum.per_cycle_elapsed9Predicated-On DADD Thread Instructions Executed Per Cycle  Esmsp__sass_thread_inst_executed_op_dmul_pred_on.sum.per_cycle_elapsed9Predicated-On DMUL Thread Instructions Executed Per Cycle f ;derived__smsp__sass_thread_inst_executed_op_dfma_pred_on_x2'Predicated-On DFMA Operations Per Cycle3 #smsp__cycles_elapsed.avg.per_second SM Frequencyxv dram__bytes.sum.per_secondDRAM Bandwidth2F:" dram__bytes.sum.per_secondKV: dram__bytes.sum.per_secondY! Double Precision Achieved ValueGPU Throughput Rooflines2  @sm__sass_thread_inst_executed_op_ffma_pred_on.sum.peak_sustained;Theoretical Predicated-On FFMA Thread Instructions Executed  @sm__sass_thread_inst_executed_op_dfma_pred_on.sum.peak_sustained;Theoretical Predicated-On DFMA Thread Instructions Executed  Esmsp__sass_thread_inst_executed_op_ffma_pred_on.sum.per_cycle_elapsed9Predicated-On FFMA Thread Instructions Executed Per Cycle  Esmsp__sass_thread_inst_executed_op_dfma_pred_on.sum.per_cycle_elapsed9Predicated-On DFMA Thread Instructions Executed Per Cycle:qHigh-level overview of the utilization for compute and memory resources of the GPU presented as a roofline chart.B detailedB fullB rooflineJ SpeedOfLightR  9derived__sm__sass_thread_inst_executed_op_ffma_pred_on_x2Dsm__sass_thread_inst_executed_op_ffma_pred_on.sum.peak_sustained * 2  9derived__sm__sass_thread_inst_executed_op_dfma_pred_on_x2Dsm__sass_thread_inst_executed_op_dfma_pred_on.sum.peak_sustained * 2  ;derived__smsp__sass_thread_inst_executed_op_ffma_pred_on_x2Ismsp__sass_thread_inst_executed_op_ffma_pred_on.sum.per_cycle_elapsed * 2  ;derived__smsp__sass_thread_inst_executed_op_dfma_pred_on_x2Ismsp__sass_thread_inst_executed_op_dfma_pred_on.sum.per_cycle_elapsed * 2Z" WarpStateStatsWarp State Statistics2"V 0smsp__average_warp_latency_per_inst_issued.ratio"Warp Cycles Per Issued InstructionR 2smsp__thread_inst_executed_per_inst_executed.ratioAvg. Active Threads Per WarpZ 2smsp__average_warps_active_per_inst_executed.ratio$Warp Cycles Per Executed Instructionf :smsp__thread_inst_executed_pred_on_per_inst_executed.ratio(Avg. Not Predicated Off Threads Per Warp*  Warp State (All Cycles) Warp States" Cycles per Instruction*M >smsp__average_warps_issue_stalled_drain_per_issue_active.ratio Stall Drain*S Asmsp__average_warps_issue_stalled_imc_miss_per_issue_active.ratioStall IMC Miss*Q @smsp__average_warps_issue_stalled_barrier_per_issue_active.ratio Stall Barrier*O =smsp__average_warps_issue_stalled_gmma_per_issue_active.ratio Stall GMMA2Z*c Ismsp__average_warps_issue_stalled_branch_resolving_per_issue_active.ratioStall Branch Resolving*O ?smsp__average_warps_issue_stalled_membar_per_issue_active.ratio Stall Membar*c Ismsp__average_warps_issue_stalled_short_scoreboard_per_issue_active.ratioStall Short Scoreboard*S Asmsp__average_warps_issue_stalled_sleeping_per_issue_active.ratioStall Sleeping*K =smsp__average_warps_issue_stalled_wait_per_issue_active.ratio Stall Wait*_ Gsmsp__average_warps_issue_stalled_no_instruction_per_issue_active.ratioStall No Instruction*g Ksmsp__average_warps_issue_stalled_math_pipe_throttle_per_issue_active.ratioStall Math Pipe Throttle*[ Esmsp__average_warps_issue_stalled_tex_throttle_per_issue_active.ratioStall Tex Throttle*Y Dsmsp__average_warps_issue_stalled_lg_throttle_per_issue_active.ratioStall LG Throttle*_ Gsmsp__average_warps_issue_stalled_dispatch_stall_per_issue_active.ratioStall Dispatch Stall*K =smsp__average_warps_issue_stalled_misc_per_issue_active.ratio Stall Misc*[ Esmsp__average_warps_issue_stalled_not_selected_per_issue_active.ratioStall Not Selected*M Asmsp__average_warps_issue_stalled_selected_per_issue_active.ratioSelected*a Hsmsp__average_warps_issue_stalled_long_scoreboard_per_issue_active.ratioStall Long Scoreboard*[ Esmsp__average_warps_issue_stalled_mio_throttle_per_issue_active.ratioStall MIO Throttle08@2F D 'smsp__issue_active.avg.per_cycle_activeIssued Warp Per Scheduler:Analysis of the states in which all warps spent cycles during the kernel execution. The warp states describe a warp's readiness or inability to issue its next instruction. The warp cycles per instruction define the latency between two consecutive instructions. The higher the value, the more warp parallelism is required to hide this latency. For each warp state, the chart shows the average number of cycles spent in that state per issued instruction. Stalls are not always impacting the overall performance nor are they completely avoidable. Only focus on stall reasons if the schedulers fail to issue every cycle. When executing a kernel with mixed library and user code, these metrics show the combined values.B full SOLBottleneck Bottleneck   The kernel is utilizing greater than 80.0% of the available compute or memory performance of the device. To further improve performance, work will likely need to be shifted from the most utilized to another unit. Start by analyzing DRAM in the @section:MemoryWorkloadAnalysis:Memory Workload Analysis@ section."High Throughput" SpeedOfLight+ ComputeCompute Bottleneck" SpeedOfLight IssueSlotUtilizationIssue Slot Utilization   Every scheduler is capable of issuing one instruction per cycle, but for this kernel each scheduler only issues an instruction every 29.9 cycles. This might leave hardware resources underutilized and may lead to less optimal performance. Out of the maximum of 12 warps per scheduler, this kernel allocates an average of 9.10 active warps per scheduler, but only an average of 0.06 warps were eligible per cycle. Eligible warps are the subset of active warps that are ready to issue their next instruction. Every cycle with no eligible warp results in no instruction being issued and the issue slot remains unused. To increase the number of eligible warps, reduce the time the active warps are stalled by inspecting the top stall reasons on the @section:WarpStateStats:Warp State Statistics@ and @section:SourceCounters:Source Counters@ sections.Bt'smsp__issue_active.avg.per_cycle_activeu*!? * 6device__attribute_maximum_texturecubemap_layered_width(> 7device__attribute_maximum_texturecubemap_layered_layers(1 )device__attribute_maximum_surface1d_width(1 )device__attribute_maximum_surface2d_width(2 *device__attribute_maximum_surface2d_height(1 )device__attribute_maximum_surface3d_width(2 *device__attribute_maximum_surface3d_height(1 )device__attribute_maximum_surface3d_depth(9 1device__attribute_maximum_surface1d_layered_width(9 2device__attribute_maximum_surface1d_layered_layers(9 1device__attribute_maximum_surface2d_layered_width(: 2device__attribute_maximum_surface2d_layered_height(9 2device__attribute_maximum_surface2d_layered_layers(6 .device__attribute_maximum_surfacecubemap_width(> 6device__attribute_maximum_surfacecubemap_layered_width(> 7device__attribute_maximum_surfacecubemap_layered_layers(: 0device__attribute_maximum_texture1d_linear_width(8 0device__attribute_maximum_texture2d_linear_width(9 1device__attribute_maximum_texture2d_linear_height(8 0device__attribute_maximum_texture2d_linear_pitch(; 3device__attribute_maximum_texture2d_mipmapped_width(< 4device__attribute_maximum_texture2d_mipmapped_height(; 3device__attribute_maximum_texture1d_mipmapped_width(3 -device__attribute_stream_priorities_supported(1 +device__attribute_global_l1_cache_supported(0 *device__attribute_local_l1_cache_supported(> 6device__attribute_max_shared_memory_per_multiprocessor(: 2device__attribute_max_registers_per_multiprocessor(& device__attribute_managed_memory(' !device__attribute_multi_gpu_board(0 *device__attribute_multi_gpu_board_group_id(4 .device__attribute_host_native_atomic_supported(= 7device__attribute_single_to_double_precision_perf_ratio( . (device__attribute_pageable_memory_access(1 +device__attribute_concurrent_managed_access(4 .device__attribute_compute_preemption_supported(? 9device__attribute_can_use_host_pointer_for_registered_mem(* $device__attribute_cooperative_launch(7 1device__attribute_cooperative_multi_device_launch(; 3device__attribute_max_shared_memory_per_block_optin(/ )device__attribute_can_flush_remote_writes(/ )device__attribute_host_register_supported(D >device__attribute_pageable_memory_access_uses_host_page_tables(; 5device__attribute_direct_managed_mem_access_from_host(< 6device__attribute_virtual_address_management_supported(C =device__attribute_handle_type_posix_file_descriptor_supported(: 4device__attribute_handle_type_win32_handle_supported(> 8device__attribute_handle_type_win32_kmt_handle_supported(5 /device__attribute_max_blocks_per_multiprocessor(5 /device__attribute_generic_compression_supported(7 .device__attribute_max_persisting_l2_cache_size(8 /device__attribute_max_access_policy_window_size(?? 9device__attribute_gpu_direct_rdma_with_cuda_vmm_supported(9 2device__attribute_reserved_shared_memory_per_block(3 -device__attribute_sparse_cuda_array_supported(. (device__attribute_memory_pools_supported(1 +device__attribute_gpu_direct_rdma_supported(< 6device__attribute_gpu_direct_rdma_flush_writes_options(7 1device__attribute_gpu_direct_rdma_writes_ordering(6 0device__attribute_mempool_supported_handle_types(& device__attribute_cluster_launch(= 7device__attribute_deferred_mapping_cuda_array_supported(+ %device__attribute_ipc_event_supported(1 +device__attribute_can_use_stream_mem_ops_v1(8 2device__attribute_can_use_64_bit_stream_mem_ops_v1(8 2device__attribute_can_use_stream_wait_value_nor_v1(5 /device__attribute_can_use_64_bit_stream_mem_ops(5 /device__attribute_can_use_stream_wait_value_nor() #device__attribute_dma_buf_supported(- 'device__attribute_mem_sync_domain_count(3 -device__attribute_tensor_map_access_supported(1 +device__attribute_unified_function_pointers(# device__attribute_numa_config(+ %device__attribute_multicast_supported(# device__attribute_mps_enabled($ device__attribute_host_numa_id(4 device__attribute_display_name NVIDIA RTX A45000 *device__attribute_compute_capability_major(0 *device__attribute_compute_capability_minor(( device__attribute_total_memory(M device__attribute_ram_type($ device__attribute_ram_location(- #device__attribute_gpu_pci_device_id(ޡȑ1 'device__attribute_gpu_pci_sub_system_id(ޡ, %device__attribute_gpu_pci_revision_id(. 'device__attribute_gpu_pci_ext_device_id(D' !device__attribute_gpu_pci_ext_gen(+ %device__attribute_gpu_pci_ext_gpu_gen(2 +device__attribute_gpu_pci_ext_gpu_link_rate(}2 ,device__attribute_gpu_pci_ext_gpu_link_width(9 2device__attribute_gpu_pci_ext_downstream_link_rate(}9 3device__attribute_gpu_pci_ext_downstream_link_width("(2 127.0.0.1 5B CUDA Version12.3"(2 127.0.0.1 A"B Display Driver Version545.19"(2 127.0.0.1 3B PerfWorks0.1.0"(2 127.0.0.1 fGBE Nsight Compute Target,2023.3.0.0 (build 33266684) (public-release)"(2 127.0.0.1 " nvlrx__bytes.sum.per_second(2w 127.0.0.1e/usr/local/nsight-compute-2023.3.1/extras/samples/uncoalescedGlobalAccesses/uncoalescedGlobalAccesses" nvltx__bytes.sum.per_second(2w 127.0.0.1e/usr/local/nsight-compute-2023.3.1/extras/samples/uncoalescedGlobalAccesses/uncoalescedGlobalAccesses!*"(2 127.0.0.1 l"j Linux3#86~20.04.2-Ubuntu SMP Mon Jul 17 23:27:17 UTC 2023"AMD Ryzen 7 3700X 8-Core Processor"x86_640"(2 127.0.0.1 Z/usr/local/nsight-compute-2023.3.1/target/linux-desktop-glibc_2_11_3-x64/ncu --set full --import-source on -o addConstDouble3.ncu-rep ./uncoalescedGlobalAccesses 0 "(2 127.0.0.1