
    gg              
          S SK Jr  S SKrS SKJrJrJr  \" SS\R                  SS5      \" SS\R                  SS5      \" S	S
5      \" SS\R                  S 5      \" SS\R                  S 5      /rS r	S r
S rS rS rS rS rg)    )defaultdictN)
ImportanceMetricRequestRequestedMetricsParser$memory_l2_theoretical_sectors_global
l2_sectorsF*memory_l2_theoretical_sectors_global_ideall2_sectors_ideal7derived__memory_l2_theoretical_sectors_global_excessiveexcessive_sectorszlts__cycles_active.suml2_cycles_activezlts__cycles_elapsed.suml2_cycles_elapsedc                      g)NUncoalescedGlobalAccess r       5nsight-compute-2025.1.1/sections/UncoalescedAccess.pyget_identifierr   '   s    $r   c                      gNzUncoalesced Global Accessesr   r   r   r   get_namer   *       (r   c                      gr   r   r   r   r   get_descriptionr   -   r   r   c                      g)NSourceCountersr   r   r   r   get_section_identifierr   0   s    r   c                      S/$ )NMemoryr   r   r   r   get_parent_rules_identifiersr    3   s
    :r   c                 :   U S   R                  5       nU S   R                  5       nU S   R                  5       nU S   R                  5       nUS:  a.  US:  a(  X-  X4-  -  S-  n[        R                  R                  U4$ [        R                  R                  S4$ )zEstimate potential speedup from reducing uncoalesced global memory accesses.

The performance improvement is approximated as relative proportion of excessive
L2 sectors weighted by time spent in the L2 unit.

r   r   r   r   r   d   )valueNvRules	IFrontendSpeedupType_GLOBALSpeedupType_LOCAL)metricsactive_cycleselapsed_cyclesr   total_sectorsimprovement_percents         r   get_estimated_speedupr-   7   s     ./557M01779N 34::<L)//1M!2+0A0QRUXX 	   335HHH  22A55r   c                    [         R                  " U 5      nUR                  S5      R                  S5      nUR	                  5       nUR                  5       [         R                  R                  :w  a  g [        X5      R                  [        5      n[        S UR                  5        5       5      (       a  g US   nUR                  5       nUS   nUR                  5       nUR                  5       n	X::  a  g UR                  5       n
UR                  5       nX:w  a  g Sn[!        ["        5      n[!        ["        5      n[%        U
5       GH  nUR'                  U5      nUR'                  U5      nUU:w  a  U[)        UU-
  5      -  nUU:  d  ME  UR'                  U5      nUR+                  U5      n[)        UU-
  5      nUR-                  SR/                  UU-  S-  5      U[         R0                  R2                  [         R0                  R4                  5        Uc  M  UR7                  5       nUR9                  5       nUU==   U-  ss'   UU==   U-  ss'   GM     UR;                  5        Hc  u  nnUR-                  SR/                  UUU   -  S-  5      U[         R0                  R<                  W[         R0                  R4                  5        Me     US:  a  SR/                  USU-  U-  U5      nUR?                  [         R0                  R@                  U5      n[C        U5      u  nnURE                  UUU5        URG                  UUS	   RI                  5       U[         R0                  RJ                  S
5        URM                  S5        g g )Nr   c              3   (   #    U  H  oS L v   M
     g 7f)Nr   ).0metrics     r   	<genexpr>apply.<locals>.<genexpr>U   s     
9(8fT>(8s   r   r
   z5{:.2f}% of this line's global accesses are excessive.r"   a  This kernel has uncoalesced global accesses resulting in a total of {} excessive sectors ({:.0f}% of the total {} sectors). Check the L2 Theoretical Sectors Global Excessive table for the primary source locations. The @url:CUDA Programming Guide:https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#device-memory-accesses@ has additional information on reducing uncoalesced device memory accesses.g      Y@r   z/Reduce the number of excessive wavefronts in L2zUncoalescedAccess.chart)'r$   get_contextrange_by_idxaction_by_idxfrontendworkload_typeIActionWorkloadType_KERNELr   parserequested_metricsanyvaluescorrelation_idsr#   num_instancesr   intrange	as_uint64abssource_infosource_markerformatr%   MarkerKind_SASSMsgType_MSG_WARNINGline	file_nameitemsMarkerKind_SOURCEmessageMsgType_MSG_OPTIMIZATIONr-   speedupfocus_metricnameSeverity_SEVERITY_DEFAULTload_chart_from_file)handlectxactionfer(   l2_sectors_metricl2_sectors_correlation_idsideal_l2_sectors_metrictotal_l2_sectorstotal_ideal_l2_sectorsnum_l2_sectors_instancesnum_ideal_l2_sectors_instances
total_diffexcess_by_linetotal_by_lineiper_instance_l2_sectorsper_instance_ideal_l2_sectorsaddressrE   excessrJ   rK   line_number
local_diffrN   msg_idspeedup_typespeedup_values                                r   applyrm   L   s   


f
%Ca ..q1F	B!D!DD$V4::;LMG

9(8
999 	-!2!B!B!D%&89(..04::<10>>@%<%J%J%L"AJ %N$M+,"3"="=a"@(?(I(I!(L%#'DD#;>UUVVJ#&CC0::1=G ,,W5K69PPQF T[[\be|\|  @C  ]C  D  FM  OV  O`  O`  Op  Op  ry  rC  rC  rW  rW  X &"'')'113	t$.$d#'>>#' -* $2#7#7#9Z
PWWXbers~eX  CF  YF  G  IT  V]  Vg  Vg  Vy  Vy  {D  FM  FW  FW  Fk  Fk  	l $: A~S VJz 14D DFVW 	 G--FFP&;G&D#m


6<7
(; < A A CZQXQbQbQ|Q|  p  	q
 9: r   )collectionsr   r$   RequestedMetricsr   r   r   OPTIONALr<   r   r   r   r   r    r-   rm   r   r   r   <module>rq      s   2 $  N N 8,
H[H[]achi>@RT^TgTgimotuKM`a*,>
@S@SUVW+-@*BUBUWXY %))6*C;r   