
    gg>                     z    S SK r S SKJrJr  \" SS5      \" SS5      \" SS5      /rS	 rS
 rS rS rS r	S r
S rg)    N)MetricRequestRequestedMetricsParserz;sm__instruction_throughput.avg.pct_of_peak_sustained_activeinstruction_throughputz0sm__inst_issued.avg.pct_of_peak_sustained_activeinst_issued_avgz0sm__inst_issued.max.pct_of_peak_sustained_activeinst_issued_maxc                      g)NSlowPipeLimiter r
       3nsight-compute-2025.1.1/sections/SlowPipeLimiter.pyget_identifierr   #   s    r   c                      g)NzSlow Pipe Limiterr
   r
   r   r   get_namer   &   s    r   c                      g)Nz&Slow pipe limiting compute utilizationr
   r
   r   r   get_descriptionr   )   s    3r   c                      g)NComputeWorkloadAnalysisr
   r
   r   r   get_section_identifierr   ,   s    $r   c                      S/$ )NComputer
   r
   r   r   get_parent_rules_identifiersr   0   s
    ;r   c                     US   R                  5       nUS   R                  5       nX2-
  U-  nSnXP;   a'  [        R                  R                  nX@U   -  S-  nXg4$ [        R                  R                  nUS-  nXg4$ )a+  Estimate potential speedup from decreasing the usage of slow pipes.

The performance improvement is approximated as the relative part of instructions
not issued.
In case the compute (SM) throughput was collected,
the above approximation can be improved by weighing it with the achieved throughput.

r   r   compute_throughput_normalizedd   )valueNvRules	IFrontendSpeedupType_GLOBALSpeedupType_LOCAL)parent_weightsmetricsr   r   improvement_localcompute_throughput_namespeedup_typeimprovement_percents           r   get_estimated_speedupr&   4   s     /0668O/0668O(:oM=0((;;/AX2YY\__
 ,, ((::/#5,,r   c           	      d   [         R                  " U 5      nUR                  S5      R                  S5      nUR	                  5       n[        X5      R                  [        5      nUR                  S5      nUS   R                  5       nUS   R                  5       nUS   R                  5       nSn	Sn
SnS	nX-
  nXi:  a  Xz:  a  X:  a  UR                  [         R                  R                  S
R                  X}X5      S5      n[        XT5      u  nnUR                  XU5        UR!                  XS   R#                  5       U[         R                  R$                  S5        UR!                  XS   R#                  5       U[         R                  R&                  SR                  U5      5        g g g g )Nr   r   r   r   r   P         z See the @url:Kernel Profiling Guide:https://docs.nvidia.com/nsight-compute/ProfilingGuide/index.html#metrics-decoder@ for the workloads handled by each pipeline.zIt is possible that a slow pipeline is preventing better workload performance. The average pipeline utilization of {:.1f}% is {:.1f}% lower than the maximum utilization of {:.1f}%. Try moving compute to other pipelines, e.g. from fp64 to fp32 or int.{}zSlow PipelinezVThe higher the instruction throughput the more likely is the impact of a slow pipelinezSIncrease the average pipeline utilization towards the maximum utilization ({:.1f}%))r   get_contextrange_by_idxaction_by_idxfrontendr   parserequested_metricsreceive_dict_from_parentr   messager   MsgType_MSG_OPTIMIZATIONformatr&   speedupfocus_metricnameSeverity_SEVERITY_LOWSeverity_SEVERITY_HIGH)handlectxactionfer!   r    sm_busyr   r   no_bound_thresholdissued_avg_thresholddiff_thresholddoc_msg	pipe_diffmsg_idr$   speedup_values                    r   applyrF   L   s   


f
%Ca ..q1F	B$V4::;LMG00;N./557G/0668O/0668ON sG1I$)OT]TnG--FF	 VOI?	\ '<N&T#m


67
(@ A F F H'SZSdSdSzSzd	f
(9 : ? ? A?T[TeTeT|T|ahhixy	{ Uo)O$r   )r   RequestedMetricsr   r   r0   r   r   r   r   r   r&   rF   r
   r   r   <module>rH      s_   2  B OQijDFWXDFWX 4%-0{r   