
    gg                         S SK r S SKJrJr  \" SS5      \" SS5      \" SS5      \" S	S
5      \" SS5      /rS rS rS rS rS r	S r
S rg)    N)MetricRequestRequestedMetricsParserz'smsp__issue_active.avg.per_cycle_activeissue_active(smsp__maximum_warps_avg_per_active_cycletheoretical_warpsz'smsp__warps_active.avg.per_cycle_activeactive_warpsz)smsp__warps_eligible.avg.per_cycle_activeeligible_warpsz%smsp__warps_active.avg.peak_sustained	max_warpsc                      g)NIssueSlotUtilization r       8nsight-compute-2025.1.1/sections/IssueSlotUtilization.pyget_identifierr   %   s    !r   c                      g)NzIssue Slot Utilizationr   r   r   r   get_namer   (   s    #r   c                      g)Nz$Scheduler instruction issue analysisr   r   r   r   get_descriptionr   +   s    1r   c                      g)NSchedulerStatsr   r   r   r   get_section_identifierr   .   s    r   c                      S/$ )NComputer   r   r   r   get_parent_rules_identifiersr   1   s
    ;r   c                     US   R                  5       nSU-
  nSnX@;   a  SX   -
  n[        X55      n[        R                  R                  nUS-  nXg4$ )Nr      max_throughput_normalizedd   )valueminNvRules	IFrontendSpeedupType_LOCAL)parent_weightsmetricsr   improvement_localthroughput_nameupper_boundspeedup_typeimprovement_percents           r   get_estimated_speedupr+   5   si    >*002LL(1O(.99 1?$$66L+c1,,r   c                    [         R                  " U 5      nUR                  S5      R                  S5      nUR	                  5       n[        X5      R                  [        5      nUR                  S5      nUS   R                  5       nUS   R                  5       nUS   R                  5       nUS   R                  5       n	US   R                  5       n
SnXk:  Ga   S	R                  S
U-  5      nUSR                  [        U
5      U5      -  nUS
:  a  US-  nO1USR                  U	5      -  nX-  S:  a  US-  nUS-  nO
US-  nUS-  nUR                  [         R                  R                  U5      n[        XT5      u  pUR!                  XU5        0 UESUS-  0EnUR#                  XS   R%                  5       U[         R                  R&                  S5        UR)                  U5        g )Nr   r   r   r   r   r	   r
   g333333?zEvery scheduler is capable of issuing one instruction per cycle, but for this workload each scheduler only issues an instruction every {:.1f} cycles. This might leave hardware resources underutilized and may lead to less optimal performance.g      ?zw Out of the maximum of {} warps per scheduler, this workload allocates an average of {:.2f} active warps per scheduler,zH which already limits the scheduler to less than a warp per instruction.a   but only an average of {:.2f} warps were eligible per cycle. Eligible warps are the subset of active warps that are ready to issue their next instruction. Every cycle with no eligible warp results in no instruction being issued and the issue slot remains unused.g?z To increase the number of eligible warps, reduce the time the active warps are stalled by inspecting the top stall reasons on the ze@section:WarpStateStats:Warp State Statistics@ and @section:SourceCounters:Source Counters@ sections.z To increase the number of eligible warps, avoid possible load imbalances due to highly different execution durations per warp.z Reducing stalls indicated on the @section:WarpStateStats:Warp State Statistics@ and @section:SourceCounters:Source Counters@ sections can help, too."issue_slot_util_speedup_normalizedr   z<Increase the average number of instructions issued per cycle)r!   get_contextrange_by_idxaction_by_idxfrontendr   parserequested_metricsreceive_dict_from_parentr   formatintmessager"   MsgType_MSG_OPTIMIZATIONr+   speedupfocus_metricnameSeverity_SEVERITY_DEFAULTsend_dict_to_children)handlectxactionfer%   r$   r   r   r   r	   r
   issueActiveTargetr7   msg_idr)   speedup_values                   r   applyrE   D   sa   


f
%Ca ..q1F	B$V4::;LMG00;N>*002L   34::<>*002L-.446N$**,I' F  M  M  NP  Q]  N]  ^  M  T  T  UX  Yb  Uc  eq  r  	r#aaG  a  h  h  iw  x  xG/#5  a  a  C  C  ]  ]  s  sG--FFP&;N&T#


67

0-#2E

 	 7 < < >gN_N_NyNy  |z  	{^,r   )r!   RequestedMetricsr   r   r3   r   r   r   r   r   r+   rE   r   r   r   <module>rG      ss   2  B ;^L<>QR;^L=?OP9;G "$2-,-r   