
    gg              
          S SK Jr  S SKrS SKJrJrJr  \" SS\R                  SS5      \" SS\R                  SS5      \" S	S
5      \" SS\R                  S 5      \" SS\R                  S 5      /rS r	S r
S rS rS rS rS rg)    )defaultdictN)
ImportanceMetricRequestRequestedMetricsParsermemory_l1_wavefronts_sharedshared_wavefrontsF!memory_l1_wavefronts_shared_idealshared_wavefronts_ideal.derived__memory_l1_wavefronts_shared_excessiveexcessive_wavefrontszl1tex__cycles_active.suml1tex_cycles_activezl1tex__cycles_elapsed.suml1tex_cycles_elapsedc                      g)NUncoalescedSharedAccess r       ;nsight-compute-2025.1.1/sections/UncoalescedSharedAccess.pyget_identifierr   &   s    $r   c                      gNzUncoalesced Shared Accessesr   r   r   r   get_namer   )       (r   c                      gr   r   r   r   r   get_descriptionr   ,   r   r   c                      g)NSourceCountersr   r   r   r   get_section_identifierr   /   s    r   c                      S/$ )NMemoryr   r   r   r   get_parent_rules_identifiersr    2   s
    :r   c                 :   U S   R                  5       nU S   R                  5       nU S   R                  5       nU S   R                  5       nUS:  a.  US:  a(  X-  X4-  -  S-  n[        R                  R                  U4$ [        R                  R                  S4$ )zEstimate potential speedup from reducing uncoalesced shared memory accesses.

The performance improvement is approximated as relative proportion of excessive
wavefronts weighted by time spent in the L1TEX unit.

r   r   r   r   r   d   )valueNvRules	IFrontendSpeedupType_GLOBALSpeedupType_LOCAL)metricsactive_cycleselapsed_cyclesr   total_wavefrontsimprovement_percents         r   get_estimated_speedupr-   6   s     1288:M34::<N"#9:@@B2399;!1A!5+#68 	
   335HHH  22A55r   c                     [         R                  " U 5      nUR                  S5      R                  S5      nUR	                  5       nUR                  5       [         R                  R                  :w  a  g [        X5      R                  [        5      n[        S UR                  5        5       5      (       a  g US   nUR                  5       nUS   nUR                  5       nUR                  5       n	X::  a  g UR                  5       n
UR                  5       nX:w  a  g Sn[!        ["        5      n[!        ["        5      n[%        U
5       GH  nUR'                  U5      nUR'                  U5      nUU:w  a  U[)        UU-
  5      -  nUU:  d  ME  UR'                  U5      nUR+                  U5      n[)        UU-
  5      nUR-                  SR/                  UU-  S-  5      U[         R0                  R2                  [         R0                  R4                  5        US :w  d  M  UR7                  5       nUR9                  5       nUU==   U-  ss'   UU==   U-  ss'   GM     UR;                  5        Hc  u  nnUR-                  SR/                  UUU   -  S-  5      U[         R0                  R<                  W[         R0                  R4                  5        Me     US:  a  SR/                  USU-  U-  U5      nUR?                  [         R0                  R@                  U5      n[C        U5      u  nnURE                  UUU5        URG                  UUS	   RI                  5       U[         R0                  RJ                  S
5        URM                  S5        g g )Nr   c              3   (   #    U  H  oS L v   M
     g 7f)Nr   ).0metrics     r   	<genexpr>apply.<locals>.<genexpr>V   s     
9(8fT>(8s   r   r
   z7{:.2f}% of this line's shared wavefronts are excessive.r"   a  This kernel has uncoalesced shared accesses resulting in a total of {} excessive wavefronts ({:.0f}% of the total {} wavefronts). Check the L1 Wavefronts Shared Excessive table for the primary source locations. The @url:CUDA Best Practices Guide:https://docs.nvidia.com/cuda/cuda-c-best-practices-guide/index.html#shared-memory-in-matrix-multiplication-c-ab@ has an example on optimizing shared memory accesses.g      Y@r   z2Reduce the number of excessive wavefronts in L1TEXzUncoalescedSharedAccess.chart)'r$   get_contextrange_by_idxaction_by_idxfrontendworkload_typeIActionWorkloadType_KERNELr   parserequested_metricsanyvaluescorrelation_idsr#   num_instancesr   intrange	as_uint64abssource_infosource_markerformatr%   MarkerKind_SASSMsgType_MSG_WARNINGline	file_nameitemsMarkerKind_SOURCEmessageMsgType_MSG_OPTIMIZATIONr-   speedupfocus_metricnameSeverity_SEVERITY_DEFAULTload_chart_from_file)handlectxactionfer(   shared_wavefronts_metric!shared_wavefronts_correlation_idsideal_shared_wavefronts_metrictotal_shared_wavefrontstotal_ideal_shared_wavefrontsnum_shared_wavefronts_instances%num_ideal_shared_wavefronts_instances
total_diffexcess_by_linetotal_by_lineiper_instance_shared_wavefronts$per_instance_ideal_shared_wavefrontsaddressrE   excessrJ   rK   line_number
local_diffrN   msg_idspeedup_typespeedup_values                                r   applyrm   M   s   


f
%Ca ..q1F	B!D!DD$V4::;LMG

9(8
999 	&':;(@(P(P(R%%,-F%G"6<<>$B$H$H$J!?&>&L&L&N#,J,X,X,Z)&OJ %N$M23)A)K)KA)N&/M/W/WXY/Z,*.RR#BEccddJ*-QQ7AA!DG ,,W5K=@^^_F V]]^d  hF  _F  IL  _L  M  OV  X_  Xi  Xi  Xy  Xy  {B  {L  {L  {`  {`  a d""'')'113	t$.$d#'EE#' 4* $2#7#7#9Z
RYYZdgt  vA  hB  [B  EH  [H  I  KV  X_  Xi  Xi  X{  X{  }F  HO  HY  HY  Hm  Hm  	n $: A~X VJz 14K KMde 	 G--FFP&;G&D#m


6<7
(> ? D D F
T[TeTeTT  Bv  	w
 ?@ r   )collectionsr   r$   RequestedMetricsr   r   r   OPTIONALr<   r   r   r   r   r    r-   rm   r   r   r   <module>rq      s   2 $  N N /1DjFYFY[_afg57PR\ReRegkmrsBDZ[,.CZEXEXZ[\-/EzGZGZ\]^ %))6.DAr   