
    3j"9                       % S SK Jr  S SKrS SKrS SKrS SKrS SKrS SKJr  S SKJ	r	  S SK
Jr  S SKJr  S SKJr  S SKJr  \(       a  S S	KJr  S S
KJr  S SKJr  S qS qS q/ qS\S'   / rS\S'   S qS q\R                   " S S5      5       r / q!S\S'   S q"S q#S q$S q%S\S'   S q&S q'S<S jr(\ " S S5      5       r)S=S jr* " S S5      r+0 r,S\S'   \ " S S5      5       r-\-R]                  S / S!Q5        \-R]                  S"/ S#Q5        \-R]                  S$/ S%Q5        \-R]                  S&/ S'Q5        \-R]                  S(/ S)Q5        S>S* jr/S?S+ jr0S@S, jr1SAS- jr2SBS. jr3S?S/ jr4SCS0 jr5SDS1 jr6      SES2 jr7        SFS3 jr8S<S4 jr9SGS5 jr:\	SHS6 j5       r;SIS7 jr<SJS8 jr=\-R]                  S9/ S:Q5                  SKS; jr>g)L    )annotationsN)	dataclass)	lru_cache)TYPE_CHECKING)config)get_benchmark_name)
OrderedSet)Callable)Config)BaseSchedulerNodez#list[tuple[BaseSchedulerNode, int]]nodes_num_elemz%list[tuple[BaseSchedulerNode, float]]node_runtimesc                  .    \ rS rSr% S\S'   SrS\S'   Srg)CppOuterLoopFusedCount+   intinner_kernel_numberr   local_buffer_number N)__name__
__module____qualname____firstlineno____annotations__r   __static_attributes__r       Q/home/wildlama/miniconda3/lib/python3.13/site-packages/torch/_inductor/metrics.pyr   r   +   s      r   r   zlist[CppOuterLoopFusedCount]!cpp_outer_loop_fused_inner_countsr   num_auto_chunkingc                     Sq SqSq[        R	                  5         [
        R	                  5         SqSq[        R	                  5         Sq	Sq
SqSqSqSqg )Nr   )generated_kernel_countgenerated_cpp_vec_kernel_countnum_bytes_accessedr   clearr   ir_nodes_pre_fusioncpp_to_dtype_countr   num_comprehensive_padding)num_matches_for_scatter_upon_const_tensornum_loop_reorderingparallel_reduction_countcodegen_mix_order_reductionr   r   r   r   resetr,   A   sj     %&"%++- !01- "#r   c                  V    \ rS rSr% SrS\S'   S\S'   S\S'   S\S'   S\S'   S\S	'   S
rg)CachedMetricsDeltas_   zQ
The subset of metrics we want update across cache hits, e.g., the
FxGraphCache.
r   r!   r"   r%   r&   r#   r(   r   N)r   r   r   r   __doc__r   r   r   r   r   r.   r.   _   s-    
  $''/22r   r.   c                 t    [         R                  " [        5       V s/ s H  o R                  PM     sn $ s  sn f N)dataclassesfieldsr.   name)fields    r   get_metric_fieldsr7   n   s*    $/$6$67J$KL$K5JJ$KLLLs   5c                  @    \ rS rSrSrSS jrS	S jr\S
S j5       rSr	g)CachedMetricsHelperr   z
A helper class to help calculate and apply counter deltas for those
metrics we want to save with cache entries (e.g., FxGraphCache) and
apply on a cache hit.
c                h    0 U l         [        5        H  n[        5       U   U R                   U'   M     g r2   )cached_metricsr7   globals)selfmetrics     r   __init__CachedMetricsHelper.__init__y   s.     ')F*1)F*;D' *r   c                |    0 n[        5        H"  n[        5       U   U R                  U   -
  X'   M$     [        S0 UD6$ )Nr   )r7   r=   r<   r.   )r>   delta_metricsr?   s      r   
get_deltasCachedMetricsHelper.get_deltas~   sC    ')F$+If$58K8KF8S$SM! * #3]33r   c                b    [        5        H!  n[        5       U==   [        X5      -  ss'   M#     g r2   )r7   r=   getattr)deltar?   s     r   apply_deltas CachedMetricsHelper.apply_deltas   s&    ')FIf!77 *r   )r<   NreturnNone)rL   r.   )rH   r.   rL   rM   )
r   r   r   r   r0   r@   rD   staticmethodrI   r   r   r   r   r9   r9   r   s%    <
4 8 8r   r9   zdict[str, MetricTable]REGISTERED_METRIC_TABLESc                  t    \ rS rSr% S\S'   S\S'   SrS\S'   SS	 jrSS
 jrSS jrSS jr	\
SS j5       rSrg)MetricTable   str
table_name	list[str]column_namesr   r   num_rows_addedc                \   U R                   [        5       ;  a  g U" 5       n[        U R                  5      [        U5      :X  d(   [        U R                  5       S[        U5       35       e[	        U R                  5      [	        UR                  5       5      :X  d6   [	        U R                  5       S[	        UR                  5       5       35       e[        5       nU/U R                   Vs/ s H  oBU   PM	     sn-   n[        S U 5       5      (       d   eU R                  U5        g s  snf )Nz v.s. c           	   3  b   #    U  H%  n[        U[        [        [        S 5      45      v   M'     g 7fr2   )
isinstancerS   floattype).0is     r   	<genexpr>&MetricTable.add_row.<locals>.<genexpr>   s%     HCq:a#ud4j!9::Cs   -/)	rT   enabled_metric_tableslenrV   r	   keysr   all
_write_row)r>   row_fnrow_dictbncolumn_namerows         r   add_rowMetricTable.add_row   s
   ??"7"9984$$%X6 	
4$$%&fS]O<	
6 $++,
8==?0KK 	
$++,-VJx}}4O3PQ	
K  !dt?P?PQ?P{+?PQQHCHHHHH Rs   +D)c                "    SU R                    S3$ )Nmetric_table_z.csv)rT   )r>   s    r   output_filenameMetricTable.output_filename   s    t/t44r   c                    U R                  5       n[        US5       n[        R                  " USS9nUR	                  S/U R
                  -   5        S S S 5        g ! , (       d  f       g = f)Nw
lineterminator
model_name)ro   opencsvwriterwriterowrV   )r>   filenamefdry   s       r   write_headerMetricTable.write_header   sQ    '')(C BZZ48FOO\NT->->>? !  s   5A
A)c                   U R                  5       nU R                  S:X  a4  [        R                  R	                  U5      (       d  U R                  5         U =R                  S-  sl        [        U5       H+  u  p4[        U[        5      (       a  US nOUc  SnOUnXQU'   M-     [        US5       n[        R                  " USS9nUR                  U5        S S S 5        g ! , (       d  f       g = f)Nr      z.6f ars   rt   )ro   rW   ospathexistsr}   	enumeraterZ   r[   rw   rx   ry   rz   )r>   rj   r{   idxorig_valnew_valr|   ry   s           r   re   MetricTable._write_row   s    '')!#BGGNN8,D,Dq &s^MC(E**%cN!"H , (C BZZ48FOOC  !  s   0'C  
C.c                ,    [        X5      nU[        U '   g r2   )rQ   rO   )r5   rV   tables      r   register_tableMetricTable.register_table   s    D/). &r   r   N)rf   z+Callable[[], dict[str, str | float | None]]rL   rM   )rL   rS   rK   )rj   zlist[str | float | None]rL   rM   )r5   rS   rV   rU   rL   rM   )r   r   r   r   r   rW   rk   ro   r}   re   rN   r   r   r   r   r   rQ   rQ      s@    ONC$5@!( / /r   rQ   slow_fusion)kernel1_pathkernel1_latencykernel2_pathkernel2_latencyfused_kernel_pathfused_kernel_latencyslow_down_ratiograph_stats)graph_idnum_nodes_before_fusionnum_nodes_after_fusionpersistent_red_perf)
kernel0_pathr   r   kernel3_pathkernel0_latencyr   r   kernel3_latency
size_hintsreduction_hint'fusion_failure_due_to_indexing_mismatch)pre_grad_graph_idpost_grad_graph_id
node1_name
node2_namenode1_debug_strnode2_debug_strcommon_buffer_namesfailure_reasonkernel_metadatakernel_namekernel_pathkernel_categoryr   r   line_of_codenum_load	num_storenum_for_loopnum_atomic_addnum_argsxnumelynumelrnumelkernel_args_num_gbc                    SSK Jn  SSKJn  UR	                  U 5      nU" U5      n[
        R                  " UR                  R                  5      $ )z
The kernel_module_code is the python module that contains kernel function code.
kernel function is the proper triton kernel function annotated with
@triton.jit
r   )PyCodeCache)get_triton_kernel)	codecacher   wrapper_benchmarkr   loadinspect	getsourcefn)kernel_module_coder   r   modkernels        r   _parse_kernel_fn_coder     s@     '4


-
.Cs#F VYY\\**r   c                4    [        U R                  5       5      $ )zB
Return the line of code for the kernel excluding the decorators.
)rb   
splitlines)proper_kernel_fn_codes    r   _parse_kernel_line_of_coder   .  s     $//122r   c                |    US:X  a  g [         R                  " SU 5      nU(       d   S5       eUR                  S5      $ )Nforeachzsize_hints=(\[[0-9, ]*\]),zsize_hints missing!r   researchgroup)r   r   ms      r   _parse_size_hintsr   5  s;    )#
		/1CDA###1771:r   c                |    U S;  a  g [         R                  " SU5      nU(       d   S5       eUR                  S5      $ )N)	reductionpersistent_reductionz$reduction_hint=ReductionHint\.(\w*),z/reduction_hint not found in kernel source code!r   r   )r   r   r   s      r   _parse_reduction_hintr   >  s<    CC
		9;MNA???1771:r   c                $    U R                  U5      $ r2   )count)r   patterns     r   _count_patternr   F  s     &&w//r   c                    U R                  5       S   nUR                  S5      (       d   eUR                  S5      nUR                  S5      nXS-   U nUR                  S5      n[	        U5      $ )Nr   def (z):r   ,)r   
startswithindexsplitrb   )r   def_line	start_idxend_idxdecl_csvcompss         r   _count_argsr   J  sn    $//1!4Hv&&&&s#InnT"GA0HNN3Eu:r   c                ,    U R                  S5      nXS $ )z
Skip decorators.
r   N)r   )kernel_fn_code	start_poss     r   _parse_proper_kernel_fn_coder   T  s     $$V,I*%%r   c                z    [         R                  " U S3U 5      nU(       a  [        UR                  S5      5      $ g )Nz
 = ([\d]+)r   )r   r   r   r   )r   numel_arg_namer   s      r   _parse_numelr   \  s3    
		^$K02GHA1771:r   c                v    [         R                  " SU 5      nU(       a  [        UR                  S5      5      $  g)zu
inductor meta looks like:
    inductor_meta={... 'mutated_arg_names': [], 'no_x_dim': False, 'kernel_num_gb': 2.0},
z.kernel_num_gb.:\s*([0-9.]+)r   N)r   r   r[   r   )r   r   r   s      r   _parse_kernel_args_num_gbr   d  s5     			1>BAQWWQZ  	 r   c           
        ^ ^^^^^^^	 SSK Jn  U" U5      m[        TU5      m[        UT5      m	[	        U5      m[        T5      m[        T5      m[        S5      R                  UUUU UUUU	4S j5        g)z
An utility to log kernel metadata. We may parse metadata from kernel source code here.

It's fine to parse the generated kernel code here since the logging is
disabled by default. It would hurt compilation time.
r   )"get_kernel_category_by_source_coder   c                    > TTT TTT[        TS5      [        TS5      [        TS5      [        TS5      [        T5      [        TS5      [        TS5      [        TS5      [        TT 5      S.$ )	Nztl.loadztl.storezfor ztl.atomic_addr   r   r   r   )r   r   r   r   )r   r   kernel_line_of_coder   r   r   r   r   s   r   <lambda>%log_kernel_metadata.<locals>.<lambda>  s}    &&.$,/&'<iH'(=zJ*+@&I,-BOT#$9:"#8(C"#8(C"#8(C";#
r   N)	r   r   r   r   r   r   r   get_metric_tablerk   )
r   r   r   r   r   r   r   r   r   r   s
   ``  @@@@@@r   log_kernel_metadatar   y  sp     F89KLO*?<NON"#5GJ*+=>N8H 55JK&'//	
 	
r   c                    [         R                  5        Ho  u  pU [        5       ;   d  M  UR                  5       n[        R
                  R                  U5      (       a  [        R                  " U5        UR                  5         Mq     g)z
Purge the old log file at the beginning when the benchmark script runs.
Should do it in the parent process rather than the child processes running
each individual model.
N)	rO   itemsra   ro   r   r   r   unlinkr}   )r5   r   r{   s      r   purge_old_log_filesr     s^     0557(**,,.Hww~~h''		(#  8r   c                 4    [        [        R                  5      $ r2   )enabled_metric_tables_implr   ra   r   r   r   ra   ra     s    %f&B&BCCr   c                    [        5       nU R                  S5       HB  nUR                  5       nU(       d  M  U[        ;   d   SU S35       eUR	                  U5        MD     U$ )Nr   zMetric table name z is not registered)r	   r   striprO   add)
config_strenabledr5   s      r   r   r     sg    )|G  %zz|// 	
 &89	
/ 	D & Nr   c                    U [        5       ;   $ r2   )ra   r5   s    r   is_metric_table_enabledr    s    (***r   c                >    U [         ;   d   SU  S35       e[         U    $ )NzMetric table z is not defined)rO   r  s    r   r   r     s*    ++R}TF/-RR+#D))r   kernel_autotuner   r   triton_config
latency_msc                N   ^ ^^^ [        S5      R                  UUU U4S j5        g )Nr  c                 $   > TT[        T 5      TS.$ )Nr	  )rS   )r   r   r   latencys   r   r   ,log_kernel_autotune_result.<locals>.<lambda>  s    && [!	
r   )r   rk   )r   r   r   r  s   ````r   log_kernel_autotune_resultr    s     &'//	
r   rK   )rL   rU   )r   rS   rL   rS   )r   rS   rL   r   )r   rS   r   rS   rL   
str | None)r   rS   r   rS   rL   r  )r   rS   r   rS   rL   r   )r   rS   rL   rS   )r   rS   r   rS   rL   z
int | None)r   rS   r   rS   rL   zfloat | None)r   rS   r   rS   r   rS   rL   rM   )rL   OrderedSet[str])r  rS   rL   r  )r5   rS   rL   bool)r5   rS   rL   rQ   )
r   rS   r   rS   r   r   r  r[   rL   rM   )?
__future__r   rx   r3   r   r   r   r   	functoolsr   typingr   torch._inductorr   torch._inductor.utilsr   torch.utils._ordered_setr	   collections.abcr
   %torch._inductor.runtime.triton_compatr   torch._inductor.schedulerr   r!   r"   r#   r   r   r   r%   r&   r   r   r'   r(   r)   r   r*   r+   r,   r.   r7   r9   rO   rQ   r   r   r   r   r   r   r   r   r   r   r   r   ra   r   r  r   r  r   r   r   <module>r     s}   " 
   	 	 !    " 4 / (<;  !"       8:4 9     ! ! ! CE !#? D ,- )  3    < 3 3 3M8 82 46 0 5 8/ 8/ 8/v         "   -	   6+ 30&*-*))#&)<?)	)X!D 
 
+*
   

#&
06
AF
	
r   