
    gg%              
           S SK r S SKrS SKJrJrJr  \" SS5      \" SS\R                  SS5      \" SS	\R                  SS5      \" S
S\R                  SS5      /rS rS r	S r
S rS rS rS rg)    N)
ImportanceMetricRequestRequestedMetricsParserz0sm__throughput.avg.pct_of_peak_sustained_elapsed
sm_sol_pctz@gpu__compute_memory_throughput.avg.pct_of_peak_sustained_elapsedmem_sol_pctFzJbreakdown:gpu__compute_memory_throughput.avg.pct_of_peak_sustained_elapsedbreakdown_memory launch__waves_per_multiprocessor	num_wavesc                      g)NSOLBottleneck r       0nsight-compute-2025.1.1/sections/SpeedOfLight.pyget_identifierr   &   s    r   c                      g)N
Bottleneckr   r   r   r   get_namer   )   s    r   c                      g)NzHigh-level bottleneck detectionr   r   r   r   get_descriptionr   ,   s    ,r   c                      g)NSpeedOfLightr   r   r   r   get_section_identifierr   /   s    r   c                     S nSnU R                  5        H   u  p4UR                  5       nXR:  d  M  UnUnM"     U(       a.  SSSSSSS.nU H  nUR                  U5      (       d  M  Xg   s  $    g )Nr   DRAML1L2)draml1texltsltcfbpfbpa)itemsvalue
startswith)breakdown_metricsmax_pipemax_pipe_valuenamemetric
pipe_valuetokenstokens           r   get_max_piper.   2   s    HN)//1\\^
&'NH	 2 
 E""5))}$  r   c                     / nUR                  5       R                  S5       H  n[        U5      nUR                  U5        M!     U R	                  U5      $ )N,)r$   splitr   appendparse)parserbreakdown_metricrequestsmetric_namerequests        r   get_breakdown_metricsr9   M   sL    H'--/55c:,  ; <<!!r   c           
      L   [         R                  " U 5      nUR                  S5      R                  S5      nUR	                  5       n[        X5      nUR                  [        5      nUS   nUS   nUS   nUb  Uc  g [        XH5      n	UR                  5       n
UR                  5       nUR                  5       nUR                  5       nSnSnSnSn[         R                  R                  n/ nX:  a  S	nOS
nUU:  Ga  UU:  Ga  X:  Ga  X:  Gay  [         R                  R                  nUS   nU(       a  UR                  5       U:  a  UR                  5       nUR                  UR                  5       U[         R                  R                  SR!                  U5      45        UR#                  5       [         R$                  R&                  :X  a  SR!                  U5      nOSR!                  U5      nUS-  nSnGOUR                  X[         R                  R                  SR!                  UU5      45        UR                  X[         R                  R                  SR!                  UU5      45        SR!                  U5      nSnGO8[(        R*                  " X-
  5      U:  a  [         R                  R                  nX:  aP  UR                  X[         R                  R,                  SR!                  XU5      45        SR!                  U5      nSnOUR                  X[         R                  R,                  SR!                  XU5      45        [/        U	5      nU(       a  SR!                  U5      OSnSR!                  UU5      nSnO7SnSnO2S nX:  a  [/        U	5      nU(       a  UOSnSR!                  UUU5      nSnUR1                  UUU5      nU H$  nUR3                  UUS   US   US    US!   5        M&     UR5                  US"-  US"-  [7        X5      S"-  S#.5        g )$Nr   r   r   r   
   <   P      z;@section:ComputeWorkloadAnalysis:Compute Workload Analysis@z9@section:MemoryWorkloadAnalysis:Memory Workload Analysis@r
   z.Increase the number of waves per SM towards {}zThis kernel grid is too small to fill the available resources on this device, resulting in only {:.1f} full waves across all SMs.zAll launches of this workload use grids that are too small to fill the available resources on this device, resulting in at most {:.1f} full waves across all SMs.zB Look at @section:LaunchStats:Launch Statistics@ for more details.z
Small Gridz{:.3f} < {:.3f}al  This workload exhibits low compute throughput and memory bandwidth utilization relative to the peak performance of this device. Achieved compute throughput and/or memory bandwidth below {:.1f}% of peak typically indicate latency issues. Look at @section:SchedulerStats:Scheduler Statistics@ and @section:WarpStateStats:Warp State Statistics@ for potential reasons.zLatency Issuez{:.3f} - {:.3f} >= {:.3f}zCompute is more heavily utilized than Memory: Look at the {} section to see what the compute pipelines are spending their time doing. Also, consider whether any computation is redundant and could be reduced or moved to look-up tables.zHigh Compute Throughputzto identify the {} bottleneckz,to see where the memory system bottleneck isa:  Memory is more heavily utilized than Compute: Look at the {} section {}. Check memory replay (coalescing) metrics to make sure you're efficiently utilizing the bytes transferred. Also consider whether it is possible to do more work per memory access (kernel fusion) or whether there are values you can (re)compute.zHigh Memory Throughputa   Compute and Memory are well-balanced: To reduce runtime, both computation and memory traffic must be reduced. Check both the @section:ComputeWorkloadAnalysis:Compute Workload Analysis@ and @section:MemoryWorkloadAnalysis:Memory Workload Analysis@ sections.zBalanced Throughput	workloadsa  This workload is utilizing greater than {:.1f}% of the available compute or memory performance of the device. To further improve performance, work will likely need to be shifted from the most utilized to another unit. Start by analyzing {} in the {} section.zHigh Throughput      d   )compute_throughput_normalizedmemory_throughput_normalizedmax_throughput_normalized)NvRulesget_contextrange_by_idxaction_by_idxfrontendr   r3   requested_metricsr9   r)   r$   	IFrontendMsgType_MSG_OKMsgType_MSG_OPTIMIZATIONr2   Severity_SEVERITY_HIGHformatworkload_typeIActionWorkloadType_KERNELmathfabsSeverity_SEVERITY_LOWr.   messagefocus_metricsend_dict_to_childrenmax)handlectxactionfer4   metricssm_sol_pct_metricmem_sol_pct_metricbreakdown_memory_metricbreakdown_metrics_memorysm_sol_pct_namemem_sol_pct_namer   r   balanced_thresholdlatency_bound_thresholdno_bound_thresholdwaves_thresholdmsg_typefocus_metricsbottleneck_sectionnum_waves_metricr
   rW   r)   	pipe_namepipe_msgmsg_idrX   s                                r   applyrq   U   s   


f
%Ca ..q1F	B#F3Fll,-G- /%&89!%<%D4VU',,.O)..0"((*J$**,K O  //HM ZX&&;9K+K/K4Y((AAH&{3$4$:$:$<$N,224	$$&6&;&;&=y'J[J[JrJr  ue  ul  ul  m|  u}  &~  '')W__-P-PP b  i  i  js  tG B  I  I  JS  TG__#$$o7CTCTCkCkm~  nF  nF  GQ  Se  nf  &g  h$$&6WEVEVEmEm  pA  pH  pH  IT  Vh  pi  &j  k I  P  P  Qh  i&YYz/04FF((AAH'$$oGDUDUDkDk  nI  nP  nP  Q[  j|  n}  &~   G  N  N  Oa  b0$$&6WEVEVElEl  oJ  oQ  oQ  R]  k}  o~  &  @()AB	PY:AA)L  `N W  ^  ^  _q  s{  |/ YG(D	#$%=>I )9{ W  ^  ^  _q  s{  }O  P ZZ'40F%
Qa,q/S_`aSbc & -7#-=,7#,=),Z)E)K	
r   )rT   rF   RequestedMetricsr   r   r   OPTIONALrK   r   r   r   r   r.   r9   rq   r   r   r   <module>rt      s   2   N N DlSTVceoexexz~  AF  G^`rt~  uH  uH  JN  PU  V4k:CVCVX\^cd	 -6"Zr   