
    gg!                     |   S SK JrJr  S SKrS SKJrJrJr  \" SS\R                  SS5      \" SS\R                  SS5      \" S	S\R                  SS5      \" S
S\R                  SS5      \" SS\R                  SS5      \" SS\R                  SS5      /r	 " S S\5      r
S rS rS rS rS rS rS rS rS rg)    )StrEnumautoN)
ImportanceMetricRequestRequestedMetricsParser(smsp__maximum_warps_avg_per_active_cycletheoretical_warpsFz%smsp__warps_active.avg.peak_sustained	max_warpslaunch__occupancy_limit_blocks!launch__occupancy_limit_registers"launch__occupancy_limit_shared_memlaunch__occupancy_limit_warpsc                   L    \ rS rSr\" 5       r\" 5       r\" 5       r\" 5       rSr	g)LIMIT_TYPES(    N)
__name__
__module____qualname____firstlineno__r   blocks	registers
shared_memwarps__static_attributes__r       8nsight-compute-2025.1.1/sections/TheoreticalOccupancy.pyr   r   (   s    VFIJFEr   r   c                      g)NTheoreticalOccupancyr   r   r   r   get_identifierr    /   s    !r   c                      g)NzTheoretical Occupancyr   r   r   r   get_namer"   3   s    "r   c                      g)Nz2Analysis of Theoretical Occupancy and its Limitersr   r   r   r   get_descriptionr$   7   s    ?r   c                      g)N	Occupancyr   r   r   r   get_section_identifierr'   ;   s    r   c                      S/$ )NIssueSlotUtilizationr   r   r   r   get_parent_rules_identifiersr*   ?   s    "##r   c                 \   / n[          H+  nU SU 3   R                  5       nUR                  X#45        M-     UR                  S S9  US   S   /nUS   S   nSnU[	        U5      :  aB  X   S   U:X  a7  UR                  X   S   5        US-  nU[	        U5      :  a  X   S   U:X  a  M7  U$ )Nlaunch__occupancy_limit_c                     U S   $ )N   r   )limits    r   <lambda>"get_top_limiters.<locals>.<lambda>K   s    E!Hr   )keyr   r.   )r   valueappendsortlen)metricslimiterslimiterlimit_valuetop_limiters	top_valueindexs          r   get_top_limitersr>   C   s    H 8	BCIIK./ 
 MM,M-QKN#LAIE
#h-
HOA$6)$CHOA./
 #h-
HOA$6)$C r   c                     SX-  -
  nSnX@;   a0  [         R                  R                  n[        X   U5      nUS-  nXW4$ [         R                  R                  nUS-  nXW4$ )Nr.   "issue_slot_util_speedup_normalizedd   )NvRules	IFrontendSpeedupType_GLOBALminSpeedupType_LOCAL)parent_weightsr	   r
   improvement_localparent_speedup_namespeedup_typeimprovement_globalimprovement_percents           r   get_estimated_speeduprM   W   s|    -99>,((;; !DFWX036
 ,, ((::/#5,,r   c                    UR                  5       S:X  a$  UR                  5       nSn[        XU5      u  pVXVXC4$ SnSnSn	[        UR                  5       5       HL  n
UR                  U
5      n[        XU5      u  pXg:  d  M)  UnUR	                  5       R                  U
5      nUn	MN     [
        R                  R                  XxU	4$ )Nr   )num_instancesr3   rM   rangecorrelation_idsrB   rC   rF   )rG   theoretical_warps_metricr
   r	   	launch_idrJ   speedup_valuemax_speeduplaunch_id_at_max_speedup theoretical_warps_at_max_speedupinstance_id_s               r   get_max_estimated_speeduprZ   f   s    --/144::<	&;y'
# IHHK '($5CCEF4::;G0y
 &'K(88:@@M %/@, G ..Wwwwr   c                 P   [         R                  " U 5      nUR                  S5      R                  S5      nUR	                  5       n[        X5      R                  [        5      n[        S UR                  5        5       5      (       a  g UR                  S5      n[        R                  S[        R                  S[        R                  S[        R                  S0nUS   n[!        US	   R#                  5       5      nS
n	UR%                  5       [         R&                  R(                  :X  a  UR#                  5       n
[+        XZU5      u  pO[-        XWU5      u  ppX-  S-  nX:  a  g [/        U5      n[1        U5      S:X  a  XoS      O:XoS      SR3                  USS  Vs/ s H  nUU   PM
     sn5      -   S-   XoS      -   nSR5                  UU5      nUR%                  5       [         R&                  R(                  :X  a  SR5                  XU5      nOSR5                  UWU5      nUR7                  [         R8                  R:                  U5      nUR=                  UX5        UR?                  UUS   RA                  5       U
[         R8                  RB                  S5        g s  snf )Nr   c              3   (   #    U  H  oS L v   M
     g 7f)Nr   ).0metrics     r   	<genexpr>apply.<locals>.<genexpr>   s     
9(8fT>(8s   r)   z+the number of blocks that can fit on the SMz the number of required registersz$the required amount of shared memoryz%the number of warps within each blockr	   r
   P   rA   r.   z, z, and z?This kernel's theoretical occupancy ({:.1f}%) is limited by {}.zThe {:.2f} theoretical warps per scheduler this kernel can issue according to its occupancy are below the hardware maximum of {}. {}zFor some launches of this workload, the theoretical number of warps per scheduler that can be issued according to its occupancy are below the hardware maximum of {}, e.g., for the kernel with launch ID {}. {}zIIncrease the theoretical number of warps per scheduler that can be issued)"rB   get_contextrange_by_idxaction_by_idxfrontendr   parserequested_metricsanyvaluesreceive_dict_from_parentr   r   r   r   r   intr3   workload_typeIActionWorkloadType_KERNELrM   rZ   r>   r6   joinformatmessagerC   MsgType_MSG_OPTIMIZATIONspeedupfocus_metricnameSeverity_SEVERITY_HIGH)handlectxactionfer7   rG   description_limitrR   r
   low_theoretical_thresholdr	   rJ   rT   rS   theoretical_warps_pct_of_peakr;   r9   top_limiters_stringtop_limiters_messagers   msg_ids                        r   applyr      s   


f
%Ca ..q1F	B$V4::;LMG

9(8
999 	001GHN 	IA FB	  '':;GK(..01I "!D!DD4::<&;y'
#m
 &nPYZ 	BY &7%Bc$I!$A#G,L.1,.?1.Dq/*A/$))7CAb7IJ7IGw'7IJ3
 
(b)9:;  	JPP)+>	
  !D!DDIIO!.BJ 	 &9&:	 	 ZZ))BBGLFJJv|3OO#$))+00S7 Ks   !J#
)enumr   r   rB   RequestedMetricsr   r   r   OPTIONALri   r   r    r"   r$   r'   r*   r>   rM   rZ   r   r   r   r   <module>r      s   2   N N <>QS]SfSfhlnst9;
H[H[]achi2D*:M:MtUZ[5tZ=P=PRVX]^6j>Q>QSWY^_149L9LdTYZ ' "#@$(-x<Kr   