
    3jG                      % S SK Jr  S SKrS SKrS SKrS SKrS SKrS SKrS SKrS SK	r	S SK
r
S SKr
S SKrS SKrS SKJrJr  S SKJr  S SKJrJr  S SKJrJr  S SKrS SKrS SKrS SKJr  S SKJs  J r!  S SK"J#r#J$r$  S SK%J&r&  S S	K'J(r(  S S
K)J*r+  S SK,J-r-J.r.  S SK/J0r0  S SK1J2r2  S SK3J4r4J5r5  S SK6J7r7  S SK8J9r9  S SK:J;r;  S SK<J=r=J>r>  S SK?J@r@JArA  S SKBJCrCJDrDJErEJFrFJGrGJHrH  S SKIJJrJ  S SKKJLrL  S SKMJNrN  SS
KOJ*r*  SSKPJQrQ  SSKRJSrSJTrTJUrUJVrV  SSKWJXrX  SSKYJZrZJ[r[J\r\  SSK]J^r^  SSK_J`r`  SSKaJbrb  SS KcJdrdJereJfrfJgrgJhrh  SS!KiJjrjJkrkJlrl  \(       a  S SKmrnS SKoro\*R                  rqS"\rS#'   \R                  " \t5      ruS$\rS%'   \R                  R                  rw\R                  R                  rx\ " S& S'5      5       ry\ " S( S)5      5       rz\ " S* S+5      5       r{SqS, jr|SrS- jr}SrS. jr~SsS/ jr " S0 S15      rSqS2 jr\" 5       rStS3 jrSuS4 jr      SvS5 jr  Sw             SxS7 jjrSqS8 jrSqS9 jrSqS: jrSqS; jrSqS< jrSqS= jrSqS> jrSqS? jr      SyS@ jrSzSA jr    S{SB jr   S|           S}SC jjr                S~SD jrSSE jrSSF jrSSG jrSSH jrSSI jrSSSJ jjrSSK jr S         SSL jjr  S           SSM jjr SSS6S6SN.                 SSO jjjrSSSP.           SSQ jjr\" SR5      rSSS jrSsST jrSSU jr\GRH                  SSV j5       r      SSW jrSSX jr                  SSY jr          SSZ jrSS[ jrSS\ jrSS] jrSqS^ jr      SS_ jr S         SS` jjr    SSa jrSSb jr    SSc jrSSd jrSSe jr              SSf jrS SgKJr  SSh jrSSi jr S       SSj jjr      SSk jr      SSl jr        SSm jr SSSn.           SSo jjjr     S               SSp jjrg)    )annotationsN)defaultdictdeque)Callable)	dataclassreplace)AnyTYPE_CHECKING)countersis_node_meta_valid)(create_structured_trace_for_min_cut_info)is_with_effects)config)CustomKnapsackSolverCustomRuntimeEstimator)FakeScriptObject)
is_builtin)
LazyStringtrace_structured)	trace_log)extract_tensor_metadata)BackwardState)is_sym_nodepy_sym_types)magic_methodsmethod_to_operator)find_symbol_binding_fx_nodesfree_symbolsis_symbol_binding_fx_nodeoptimization_hintstatically_known_falsestatically_known_true)graph_drawer)
OrderedSet)CheckpointPolicy   )GraphInfoProvider)dp_knapsackdp_knapsack_sliding_hirschberggreedy_knapsackilp_knapsack)KnapsackEvaluator)	AOTOutputSavedForBackwardsAOTOutput#SavedForBackwardsNoVcCheckAOTOutput)_is_functional_graph)is_opaque_node)get_aot_graph_name)_is_bwd_seed_offset_is_fwd_seed_offset
_is_primal_is_tangentget_cuda_generator_meta_val)fx_graph_cseget_aten_targetraise_getitemsboolAOT_PARTITIONER_DEBUGzlogging.Loggerlogc                  ~    \ rS rSr% SrS\S'   S\S'   S\S'   S\S'   S\S'   SS	 jrSS
 jrSS jrSS jr	SS jr
Srg)OpTypes^   z8Class for keeping track of different operator categorieszOrderedSet[Callable[..., Any]]fusible_opscompute_intensive_ops
random_opsview_opsrecomputable_opsc                2    [        U5      U R                  ;   $ N)r9   rA   selfnodes     W/home/wildlama/miniconda3/lib/python3.13/site-packages/torch/_functorch/partitioners.py
is_fusibleOpTypes.is_fusibleh   s    t$(8(888    c                2    [        U5      U R                  ;   $ rG   )r9   rB   rH   s     rK   is_compute_intensiveOpTypes.is_compute_intensivek   s    t$(B(BBBrN   c                2    [        U5      U R                  ;   $ rG   )r9   rC   rH   s     rK   	is_randomOpTypes.is_randomn   s    t$77rN   c                2    [        U5      U R                  ;   $ rG   )r9   rD   rH   s     rK   is_viewOpTypes.is_viewq   s    t$55rN   c                2    [        U5      U R                  ;   $ rG   )r9   rE   rH   s     rK   is_recomputableOpTypes.is_recomputablet   s    t$(=(===rN    NrJ   fx.Nodereturnr;   )__name__
__module____qualname____firstlineno____doc____annotations__rL   rP   rS   rV   rY   __static_attributes__r[   rN   rK   r?   r?   ^   s=    B//99..,,449C86>rN   r?   c                      \ rS rSr% S\S'   S\S'   S\S'   S\S'   S\S'   S	\S
'   S\S'   \R                  SS j5       rSS jrSS jr	SS jr
SS jrSrg)NodeInfox   list[fx.Node]inputsOrderedSet[fx.Node]_required_fw_nodesrequired_bw_nodestangents_closureunclaimed_nodesdict[fx.Node, int]fw_orderstatic_lifetime_input_nodesc                B   ^  [        S T R                   5       U 4S jS9$ )Nc              3  $   #    U  H  ov   M     g 7frG   r[   .0ns     rK   	<genexpr>-NodeInfo.required_fw_nodes.<locals>.<genexpr>   s     0/1Q/s   c                "   > TR                   U    $ rG   )rq   )rw   rI   s    rK   <lambda>,NodeInfo.required_fw_nodes.<locals>.<lambda>   s    a@PrN   key)sortedrl   rI   s   `rK   required_fw_nodesNodeInfo.required_fw_nodes   s!    0//06P
 	
rN   c                    XR                   ;   $ rG   )rl   rI   rw   s     rK   is_required_fwNodeInfo.is_required_fw   s    ++++rN   c                    XR                   ;   $ rG   )rm   r   s     rK   is_required_bwNodeInfo.is_required_bw   s    ****rN   c                    XR                   ;   $ rG   )ro   r   s     rK   is_unclaimedNodeInfo.is_unclaimed   s    ((((rN   c                \    XR                   ;  a  [        SU S35      eU R                  U   $ )NNode z not in fw nodes!)rl   AssertionErrorrq   r   s     rK   get_fw_orderNodeInfo.get_fw_order   s2    +++ 5+<!=>>}}QrN   r[   N)r^   ri   rw   r]   r^   r;   )rw   r]   r^   int)r_   r`   ra   rb   rd   	functoolscached_propertyr   r   r   r   r   re   r[   rN   rK   rg   rg   x   sZ     ++**))((  !44
 

,+) rN   rg   c                  H    \ rS rSr% S\S'   S\S'   S\S'   S\S'   S\S'   Srg	)
MinCutOptions   r;   ban_if_used_far_apartban_if_long_fusible_chainsban_if_materialized_backwardban_if_not_in_allowlistban_if_reductionr[   N)r_   r`   ra   rb   rd   re   r[   rN   rK   r   r      s      $$"&&!!rN   r   c                |    U R                   R                  SS 5      [        R                  [        R                  4;   $ )N	recompute)metagetr%   MUST_RECOMPUTEPREFER_RECOMPUTErJ   s    rK   must_recomputer      s5    99==d+''))0  rN   c                b    U R                   R                   H  n[        U5      (       d  M    g   g)NTF)graphnodesr   fx_grJ   s     rK   has_recomputable_opsr      s)    

  $ ! rN   c                   U R                   R                   Hf  n[        U5      (       d  M  [        UR                  S5      (       d  M2  [
        R                  R                  UR                  R                  ;   d  Mf    g   g)NtagsTF)	r   r   r   hasattrtargettorchTagnondeterministic_seededr   r   s     rK   has_recomputable_rng_opsr      sV    

  4  V,,		11T[[5E5EE ! rN   c                   [        U R                  S   [        R                  [        R                  45      (       a  g[        U R                  S   [        R
                  5      (       d$  [        S[        U R                  S   5       35      eg)Nvalr&   z.expected node.meta['val'] to be SymFloat, got    )
isinstancer   r   SymIntSymBoolSymFloatr   typer   s    rK   sym_node_sizer      sm    $))E"U\\5==$ABBdii&77<T$))EBR=S<TU
 	
 rN   c                      \ rS rSrSS jrSrg)InvalidNodeBase   c                    g)NzInvalid Noder[   r   s    rK   __repr__InvalidNodeBase.__repr__   s    rN   r[   N)r^   str)r_   r`   ra   rb   r   re   r[   rN   rK   r   r      s    rN   r   c                6    [        U R                  SS 5      S:g  $ )N	namespace_c10d_functional)getattrr   r   s    rK   is_not_collectiver      s    4;;T26HHHrN   c                t   U R                   [        R                  :w  a  gU R                  S   nU R                  S   n[	        U[
        R                  5      (       a  UR                  S:w  a  gSUR                  ;  a  gUR                  S   nX#;  a  gX2   n[	        U[
        R                  5      (       a  U$ g)zGiven a getitem node, check if it extracts from a higher-order op
that has kwargs mapping the key back to an original input.

Returns the original input node if found, None otherwise.
Nr   r&   call_functionkwargs)	r   operatorgetitemargsr   fxNodeopr   )getitem_node	ho_resultr~   r   original_inputs        rK   _get_ho_op_original_inputr      s     h...!!!$I


A
Ci))Y\\_-Ly'''h'F
[N."''**rN   c                L   U R                   [        R                  R                  R                  R
                  [        R                  R                  R                  R
                  4;  a  gU R                  S   n[        U[        R                  5      (       d  g[        U5      $ )zCheck if node is a view/reshape of a higher-order op output that aliases an input.

Returns the original input node from the higher-order op's kwargs if the pattern
matches, None otherwise.
Nr   )r   r   opsatenviewdefaultreshaper   r   r   r   r   )rJ   sources     rK   _is_copy_node_bw_onlyr      sl     {{599>>..66		8N8N8V8VWWYYq\Ffbgg&&$V,,rN   c                    [        U 5      nUb   X!;   a  [        X   [        5      (       d  X   $ [        U 5      nUb   X!;   a  [        X   [        5      (       d  X   $ g)a  Try to find a valid input replacement for an invalid forward output.

This handles cases where a forward output depends on backward nodes but
semantically aliases an input. For example, a view of a getitem from a
triton kernel that mutates a buffer in backward, or a direct getitem from
such a higher-order op. The original input may be a primal or a valid
intermediate node already present in the forward graph.
N)r   r   r   r   )rJ   envr   s      rK   _find_input_for_invalid_outputr      se     +40N"!3.@@"".t4N"!3.@@""rN   Fc                @  ^ [         R                  " 5       n0 mU H4  nUR                  UR                  5      nUR                  Ul        UTU'   M6     U R
                   GHo  nU(       dL  [        U5      (       a  US:w  a  Xq;  a  [        TU'   M1  [        U5      (       a  US:w  a  Xq;  a  [        TU'   MW  UT;   a  M_  UR                  S:X  a  [        TU'   Mz  UR                  S:X  a  [        R                  " UR                  0 UR                  D6n	U	 V
s/ s H7  n
[        U
[         R                  5      (       d  M$  [        TU
   [         5      PM9     n	n
[#        U	5      (       a  [        TU'   GM  UR%                  UU4S j5      TU'   GM0  UR                  S:X  a  UR%                  UU4S j5      TU'   GM\  UR                  S:X  d  GMo  GMr     / n['        X#5       GH  u  p[        U
[         R                  5      (       GaV  U
T;  a  [)        S	U
 S
35      e[        TU
   [         5      (       Ga  SnU
R*                  [,        R.                  R0                  R2                  R4                  L a  [        U
5      (       a  [7        U
R                  5      S:  av  [        U
R                  S   [         R                  5      (       aJ  U
R                  S   T;   a7  [        TU
R                  S      [         5      (       d  TU
R                  S      nUc  [9        U
T5      nUb  UR;                  U5        GMV  [=        S	U
 S35      eUR;                  TU
   5        GM|  UR;                  U
5        GM     UR?                  [A        U5      5      nX>R                  S'   U Vs/ s H?  n[        U[         R                  5      (       a  UR                  RC                  S5      OSPMA     snUR                  S'   URE                  5         URG                  5         U$ s  sn
f s  snf )au  
Given a graph, extracts out a subgraph that takes the specified nodes as
inputs and returns the specified outputs.

This includes specifying non-placeholder nodes as inputs.

The general strategy is to initialize all inputs with proxies as we
encounter them, and trace through the graph, only keeping values which take
in valid proxies. Then, all dead code is eliminated.
backwardforwardplaceholderr   c                   > TU    $ rG   r[   xr   s    rK   r{   4_extract_graph_with_inputs_outputs.<locals>.<lambda>Z  	    CFrN   get_attrc                   > TU    $ rG   r[   r   s    rK   r{   r   ]  r   rN   outputr   z couldn't be found in envNr&   r   z was invalid, but is outputdescstack_traceoutput_stack_traces)$r   Graphr   namer   r   _must_be_in_backwardInvalidNode_must_be_in_forwardr   pytreearg_tree_leavesr   r   r   r   r   any	node_copyzipRuntimeErrorr   r   r   r   copy_r   lenr   appendr   r   tupler   eliminate_dead_codelint)joint_graphrj   outputsoutputs_descssubgraphignore_must_be_in_fw_bw	new_graphrJ   new_nodeall_argsr   output_valuesx_descreplacementoutvr   s                   @rK   "_extract_graph_with_inputs_outputsr    sx   $ 
I"$C ((3		D	  !!&$T**
*&'D	 $D))	)&'D	3; WW%#CIWW'--tyyHDKKHH "!Aa) 4
3q6?3!  
 8}}'D	!++D2BCCIWW
"!++D2BCCIWW S "T M0	a!!|"U1#-F#GHH#a&/22 # HH		 4 4 < <<,Q//AFFq("166!9bgg66q	S(&s166!9~GG"%affQi.K &"@C"HK*!((5$uQC/J%KLL  Q(  #= 1> 

5/
0C$HHV
 'A &0277%;%;

=!E'CHH"#
 !!#NNuf's   #P>PAPc                >   [         R                  =(       a    [        U R                  [        R
                  R                  5      =(       a    [        U R                  5      (       + =(       d1    U R                  [        R                  R                  R                  :H  $ rG   )r   is_non_builtin_to_includer   r   r   _ops
OpOverloadr   r   higher_order triton_kernel_wrapper_functionalr   s    rK   r  r    s`    ++ 	DKK!6!6	7	W
4;;@W<W 	R;;%))00QQQrN   c                    U R                   S:H  =(       a)    [        U R                  R                  S5      [        5      $ )Nr   r   )r   r   r   r   r   r   s    rK   _is_backward_stater    s*    77m#W
499==3G(WWrN   c                @    U R                   R                  SS 5      S:H  $ )Npartitioner_tagis_backwardr   r   r   s    rK   _has_tag_is_backwardr    s    99==*D1]BBrN   c                @    U R                   R                  SS 5      S:H  $ )Nr  
is_forwardr  r   s    rK   _has_tag_is_forwardr    s    99==*D1\AArN   c                @    U R                   R                  SS 5      S:H  $ )Nr  must_be_in_forwardr  r   s    rK   _has_tag_must_be_in_forwardr!    s    99==*D15IIIrN   c                @    U R                   R                  SS 5      S:H  $ )Nr  must_be_in_backwardr  r   s    rK   _has_tag_must_be_in_backwardr$    s    99==*D15JJJrN   c                .   [        U 5      (       a  g[        U R                  [        R                  R
                  5      =(       a     U R                  R                  R                  n[        U 5      (       + =(       a    [        U 5      (       + =(       a    U$ NT)
r!  r   r   r   r  r  _schema
is_mutabler  r$  rJ   r(  s     rK   r   r     sq    "4(( 	4;;

 5 56 	+KK** 
 !&& 	,T22	rN   c                    [        U 5      (       a  g[        U R                  [        R                  R
                  5      =(       a     U R                  R                  R                  n[        U 5      =(       a    U$ r&  )	r$  r   r   r   r  r  r'  r(  r  r)  s     rK   r   r     sW    #D))4;;

 5 56 	+KK**   %4*4rN   c          
     \   [         R                  " S U R                  R                  SS9 5       6 n[         R                  " [	        [        U R                  R                  SS95      5      R                  R                  SS /[        U5      -  5      5      nUS U nX!S  nUS U nX1S  nXEXg4$ )Nc              3  8   #    U  H  oR                   v   M     g 7frG   r   rv   rJ   s     rK   rx   +_extract_fwd_bwd_outputs.<locals>.<genexpr>  s     	K J)) J   r   r   r   )	r   r   r   
find_nodesnextiterr   r   r   )joint_modulenum_fwd_outputsr  r  fwd_outputsbwd_outputsfwd_outputs_descsbwd_outputs_descss           rK   _extract_fwd_bwd_outputsr;    s     $$	K 2 2 = = = J	KG **T,$$//8/<=>CCGGTFS\)	
M
 *?+K*+K%&67%&67%6IIrN   c                \    U  H&  nUR                   U:X  d  M  U R                  U5          g    g rG   )r   remove)saved_valuesr   saved_values      rK   _remove_by_namer@    s+    #t#, $rN   c                    [        U 5      n[        [        U 5      S-
  SS5       H  n[        X   5      (       a  M  US-   n  U$    U$ )Nr&   )r   ranger   )fwd_module_outputsidxis      rK   find_first_sym_noderG    sT      
!C3)*Q.B7-011a%CJ	 8 JrN   c           	        U R                  U5         U R                  [        R                  R                  R
                  R                  U4S9n[        R                  R                  R
                  R                  UR                  S   5      UR                  S'   [        UR                  S   5      UR                  S'   S S S 5        U R                  W5         U R                  [        R                  R                  R                  R                  US/S4S9n[        R                  R                  R                  R                  UR                  S   S/S5      UR                  S'   [        UR                  S   5      UR                  S'   S S S 5        U R                  W5         U R                  [        R                  R                  R                  R                  U[        R                  4S9n[        R                  R                  R                  R                  UR                  S   [        R                  5      UR                  S'   [        UR                  S   5      UR                  S'   S S S 5        U R                  W5         U R                  [        R                  R                  R                  R                  Xs4S9n[        R                  R                  R                  R                  UR                  S   U5      UR                  S'   [        UR                  S   5      UR                  S'   S S S 5        U R                  W5         U R                  [        R                  R                  R                  R                  U4S9n	[        R                  R                  R                  R                  UR                  S   5      U	R                  S'   [        U	R                  S   5      U	R                  S'   S S S 5        U R                  W	5         U R                  [        R                  R                  R                  R                   X4S9n
[        R                  R                  R                  R!                  U	R                  S   U5      U
R                  S'   [        U
R                  S   5      U
R                  S'   S S S 5        U R                  W
5         U R                  [        R                  R                  R                  R                  U
[        R"                  4SU SUR$                   3S9n[        R                  R                  R                  R                  U
R                  S   [        R"                  5      UR                  S'   [        UR                  S   5      UR                  S'   S S S 5        U$ ! , (       d  f       GN= f! , (       d  f       GN_= f! , (       d  f       GN= f! , (       d  f       GN= f! , (       d  f       GN= f! , (       d  f       GNc= f! , (       d  f       W$ = f)	Nr-  r   tensor_metarB  Tfp8_scale_pos__r   r   )inserting_afterr   r   r   r   absr   r   r   amaxprimsconvert_element_typefloat64	clamp_min
reciprocalmulTensorfloat32r   )r   rJ   maxminpositionabs_node	amax_nodeamax_64_nodeclamp_min_nodereciprocal_nodemul_node
scale_nodes               rK   calculate_quantization_scalingrb    s    
		t	$&&IINN&& ' 
  %yy~~1199$))E:JKe'>x}}U?S'Tm$ 
% 
		x	(''IINN''RD$' ( 
	 !&		 3 3 ; ;MM% 2$!
	u )@	u@U(V	}% 
) 
		y	)**IIOO0088U]]+ + 
 $)99??#G#G#O#ONN5!5==$
%  ,Ce$,
-( 
* 
		|	,,,IINN$$,,$ - 
 &+YY^^%=%=%E%Ee$c&
E" .E&.
M* 
- 
		~	.--IINN%%-- " . 
 ',iinn&?&?&G&G&'
U# /F  '/
]+ 
/ 
			/&&IINN%%!' ' 
  %yy~~1188  ' 
e (?x}}U?S'Tm$ 
0 
		x	(((IIOO0088EMM*!(1TYYK8 ) 


 "'!E!E!M!MMM% %--"

 *AQVAW)X
& 
) I 
%	$ 
)	( 
*	) 
-	, 
/	. 
0	/ 
)	( sZ   B0WB6W)*CW;B1XB0X$B1X1.CY
W&)
W8;
X

X
X.1
Y 
Yc           	     	   U R                  U5         U R                  [        R                  R                  R
                  R                  U[        R                  4S9n[        R                  R                  R
                  R                  UR                  S   [        R                  5      UR                  S'   [        UR                  S   5      UR                  S'   S S S 5        U R                  W5         U R                  [        R                  R                  R                  R                  Xr4S9n[        R                  R                  R                  R                  UR                  S   UR                  S   5      UR                  S'   [        UR                  S   5      UR                  S'   S S S 5        U R                  W5         U R                  [        R                  R                  R                  R                  X4S9n	[        R                  R                  R                  R                  UR                  S   U5      U	R                  S'   [        U	R                  S   5      U	R                  S'   S S S 5        U R                  W	5         U R                  [        R                  R                  R                  R                  X4S9n
[        R                  R                  R                  R                  U	R                  S   U5      U
R                  S'   [        U
R                  S   5      U
R                  S'   S S S 5        U R                  W
5         U R                  [        R                  R                  R
                  R                  X4SU SUR                   3S9n[        R                  R                  R
                  R                  U
R                  S   U5      UR                  S'   [        UR                  S   5      UR                  S'   S S S 5        U$ ! , (       d  f       GNW= f! , (       d  f       GN= f! , (       d  f       GN= f! , (       d  f       GN"= f! , (       d  f       W$ = f)Nr-  r   rI  fp8_quant_pos_rK  rL  )rM  r   r   r   rP  rQ  r   rW  r   r   r   rU  rV  rS  	clamp_maxr   )r   rJ   ra  
quant_typerS  re  rZ  target_node_32scaled_target_nodeclamp_min_scaled_nodeclamp_max_scaled_nodequant_activation_nodes               rK   perform_quantizationrl  4  s    
		z	*,,IIOO0088& - 
 &+YY__%I%I%Q%QIIeemm&
E" .E&.
M* 
+ 
		~	."00IINN%% - 1 
 */););)B)B&
(>*
& 2I##E*2
. 
/ 
		1	2 % 3 3IINN$$,,$0 !4 !
 -2IINN,D,D,L,L##E*I-
""5) 5L!&&u-5
""=1 
3 
		4	5 % 3 3IINN$$,,'3 !4 !
 -2IINN,D,D,L,L!&&u-y-
""5) 5L!&&u-5
""=1 
6 
		4	5 % 3 3IIOO0088'4!(1TYYK8 !4 !
 IIOO0088%**51: 	""5)
 5L!&&u-5
""=1 
6 ! u 
+	* 
/	. 
3	2 
6	5 
6	5 ! s@   CP09B>QB1QB1Q&$CQ80
P?
Q
Q#&
Q58
Rc                P    U R                  5       nU R                  5       nX-  S-  $ )z
Calculate the size of a PyTorch tensor in megabytes (MB).

Args:
    tensor (torch.Tensor): Input tensor

Returns:
    float: Memory size in MB
i   )numelelement_size)tensornum_elementsro  s      rK   calculate_tensor_sizerr  z  s+     <<>L&&(L'K88rN   c            	         [         R                  R                  R                  S   R	                  SS5      n U R                  S5       Vs/ s H%  n[        [         UR                  S5      S   5      PM'     n nU $ s  snf )N!activation_quantization_aten_passallowed_dtypesztorch.bfloat16;.rB  )r   	_inductorr   post_grad_fusion_optionsr   splitr   )ru  dtypes     rK   get_allowed_dtypesr|    s}    __++DD+	c
,-  ;I:N:Ns:S:Su{{3'+,:S   s   ,A;c                n   [        5       n[        U 5      (       a  U R                  S   R                  U;  a  g[        R
                  R                  R                  S   R                  SS5      n[        U R                  S   5      n[        R
                  R                  R                  S   R                  SS5      (       d  X2:  $ [        R
                  R                  R                  S   R                  SS5      (       a&  [        X2:  5      =(       d    [        X2:  5      (       + $ [        X2:  5      $ )Nr   Frt  
size_in_mbd   skip_dynamo_guardsquantize_dynamic_shape)r|  r   r   r{  r   rx  r   ry  r   rr  r"   r!   )rJ   ru  size_thresholdr~  s       rK   should_quantizer    s   ')Nd##tyy'7'='=^'S__++DD+	c,  'tyy'78J??!!::+	c
&' ++ ??!!::/

#&
./ ), J+J,HIIJ
 ))EFFrN   c                     [         R                  R                  R                  S   R	                  SS5      n [        [         U R                  S5      S   5      $ )Nrt  rf  ztorch.float8_e5m2rw  rB  )r   rx  r   ry  r   r   rz  )rf  s    rK   get_quant_typer    sN    ''@@+	c,+,  5***3/344rN   c                ^    [         R                  " U 5      nUR                  UR                  4$ )z
Calculate the range of values for a given torch.dtype.
Args:
    dtype (torch.dtype): The input dtype.
Returns:
    tuple: A tuple containing the minimum and maximum values.
)r   finforY  rX  )r{  infos     rK   calculate_ranger    s%     ;;uD88TXXrN   c           
        U R                  SS9S   nUR                  S   n[        5       n[        U5      u  pV[	        5       n/ n/ n	[        U5       GH  u  pX:  a  M  UR                  R                  SS5      (       d  M0  [        R                  R                  R                  S   R                  SS5      (       aP  [        XUS	U
5      n[        XXXVU
5      n[        U5      (       d  UR                  U5        OU	R                  U5        OU R!                  U5         U R#                  [        R$                  R&                  R(                  R*                  X4S
U
 SUR,                   3S9n[        R$                  R&                  R(                  R+                  UR                  S   U5      UR                  S'   [/        UR                  S   5      UR                  S'   S S S 5        WXz'   GM     [        U5       VVs/ s H  u  pUR                  X5      PM     nnn[1        U5      nX-   nU(       a  US U U-   UUS  -   nUR3                  S[5        U5      5        [6        S   S==   S-  ss'   g ! , (       d  f       N= fs  snnf )Nr   r1  r   saved_for_quantizationFrt  use_scalingT-q=rd  rK  rL  r   rI  inductor%activation_quantization_fwd_aten_passr&   )r2  r   r  r  dict	enumerater   r   r   rx  r   ry  rb  rl  r   r   rM  r   r   rP  rQ  r   r   r   rG  
update_argr   r   )r   r6  r   r7  rf  rS  re  position_to_quanttensor_scale_nodessym_scale_nodesrZ  rJ   ra  
quant_noderF  output_updated_argsrE  scale_nodess                     rK   quantize_activation_fwr    sl   *1-F++a.K!J*:6I(*%'O#K0 %99==1599%%>>3c-&' <E8

 2x
 #:..&--j9#**:6 **40!&!4!4		<<DD"/-hZqD "5 "J 		<<DD IIe,j OOE*
 6M".6JOOM2 1 +5'[ 1d 7@6L6L71a&6L  
 1
2C$6K%36I#$6OO 	 a234Z@AQFAA 10&s   CI4 J4
J	c           
     R
  ^	 U R                    Vs/ s H  oR                  S:X  d  M  UPM     nnS nU GH  nUR                  R                  SS5      (       d  M'  UR                  R	                  S5        UR                  R	                  S5      n[
        R                  R                  R                  S   R                  SS5      (       Ga  U R                  U5         SUR                  R                  SS	5      -   m	[        U	4S
 jU 5       5      nS S S 5        U R                  W5         U R                  [
        R                  R                  R                   R"                  X4S9n[
        R                  R                  R                   R#                  UR                  S   U5      UR                  S'   [%        UR                  S   5      UR                  S'   S S S 5        U R                  U5         U R                  [
        R                  R&                  R(                  R*                  X54S9n[
        R                  R&                  R(                  R+                  UR                  S   UR                  S   5      UR                  S'   [%        UR                  S   5      UR                  S'   S S S 5        U R                  W5         U R                  [
        R                  R                  R                   R"                  Xd4S9n[
        R                  R                  R                   R#                  UR                  S   U5      UR                  S'   [%        UR                  S   5      UR                  S'   S S S 5        OU R                  U5         U R                  [
        R                  R                  R                   R"                  X4S[-        UR                  5      -   S9n[
        R                  R                  R                   R#                  UR                  S   U5      UR                  S'   [%        UR                  S   5      UR                  S'   S S S 5        [/        UR0                  R3                  5       5       H#  nUW:w  d  M  X:w  d  M  UR5                  X5        M%     GM     [6        S   S==   S-  ss'   g s  snf ! , (       d  f       GN= f! , (       d  f       GN= f! , (       d  f       GNH= f! , (       d  f       N= f! , (       d  f       N= f)Nr   r  Fdequant_typert  r  
fp8_scale_
fp8_quant_ c              3  L   >#    U  H  nUR                   T:X  d  M  Uv   M     g 7frG   r   )rv   	bwd_input
scale_names     rK   rx   )quantize_activation_bw.<locals>.<genexpr>  s&      &)2I$>>Z7 "	)2s   $	$r-  r   rI  dequant_rL  r  %activation_quantization_bwd_aten_passr&   )r   r   r   r   popr   rx  r   ry  rM  r   r   r3  r   r   rP  rQ  r   r   r   divrV  r   listuserskeysreplace_input_withr   )
r   rJ   	bw_inputsactivation_noder  ra  divided_target_node_32dequant_nodeuserr  s
            @rK   quantize_activation_bwr    s,   "'++J+$M1I+IJO99==1599IIMM2399==8L%%>>3c-'( **40!-		0A0A,PR0S!SJ!% &)2& "J 1 **:6&+&9&9		<<DD"1 ': 'O
 		<<DD IIe,l $((/
 ;R',,U3;O((7 7 **?;-2-@-@		**11-: .A .* :?9K9K9R9R',,U3Z__U5K:*//6 00F0K0KE0RS +//> < **+AB#(#6#6		<<DD4C $7 $L
 		<<DD277> !%%e,
 8O$))%08L%%m4 CB **40#(#6#6		<<DD"1'#dii.8 $7 $L 		<<DD IIe,l !%%e,
 8O$))%08L%%m4 1 TZZ__./<'D,C++D? 0M T Z@AQFAY K 10 76 <; CB 10sJ   SS5S,B1S#6B>S5B1TCT
S 	#
S2	5
T	
T	
T&	c                  ^ ^ [        SS U 4S jS9  [        T R                  U5        [        SS U 4S jS9  [        SS U4S jS9  T R                  R                  S	S
9S   R                  S   nU H  nSUR
                  ;   d  M  U[        R                  " SSUR
                  5         nTR                  R                  U5         TR                  R                  UR
                  S9nS S S 5        UR                  S   nWR                  R                  UR                  5        SUR                  S'   XR                  S'   UR                  U5        TR                  R                  U5        GM     [        R                  R                   R"                  S   R%                  SS5      (       a  ['        TR                  R                  SS
95      n	U	S   n
[)        U	5       H  n[+        U5      (       a  M  Un
  O   T R                  R                  S	S
9S   R                  S   nU H  nSUR
                  ;   d  M  TR                  R                  U
5         TR                  R                  UR
                  S9nS S S 5        WR                  R                  UR                  5        Un
M     [-        TR                  5        [        SS U4S jS9  g ! , (       d  f       GN= f! , (       d  f       Nq= f)Nartifactc                     SSS.$ )N,before_activation_quantization_fwd_aten_passstringr   encodingr[   r[   rN   rK   r{   5perform_fp8_activation_quantization.<locals>.<lambda>d      B 
rN   c                 &   > T R                  SSSS9$ NFTprint_outputinclude_strideinclude_deviceprint_readable
fwd_modules   rK   r{   r  h      :44tD 5 
rN   metadata_fn
payload_fnc                     SSS.$ )N+after_activation_quantization_fwd_aten_passr  r  r[   r[   rN   rK   r{   r  q      A 
rN   c                 &   > T R                  SSSS9$ r  r  r  s   rK   r{   r  u  r  rN   c                     SSS.$ )N,before_activation_quantization_bwd_aten_passr  r  r[   r[   rN   rK   r{   r  |  r  rN   c                 &   > T R                  SSSS9$ r  r  
bwd_modules   rK   r{   r    r  rN   r   r1  r   r  z^fp8_quant_pos_\d+_r  r  r  Tr  rt  r  r   rB  r  c                     SSS.$ )N+after_activation_quantization_bwd_aten_passr  r  r[   r[   rN   rK   r{   r    r  rN   c                 &   > T R                  SSSS9$ r  r  r  s   rK   r{   r    r  rN   )r   r  r   r2  r   r   resubrM  r   r   updatereplace_all_uses_with
erase_noder   rx  r   ry  r   r  reversedr6   r  )r  r  bwd_module_inputsr6  quant_fwd_module_outputsfwd_noder  quant_bwd_inputr  quant_bwd_module_inputsbwd_input_locbw_inputscaled_fwd_module_outputsscale_bwd_inputs   ``            rK   #perform_fp8_activation_quantizationr  \  s    

	 :++_=

	 

	  *//::h:GJOOPQR,8==()-r8==AI !!11)<","2"2">">HMM">"R =$>>.9L  ''6=AO  !9:3?  0++O<''	2 - 66+	c- #'z'7'7'B'Bm'B'T"U/3 !89Hx(( ( :
 %/$4$4$?$?8$?$LQ$O$T$TUV$W!1Hx}},%%55mD&0&6&6&B&B&B&VO E$$++HMM: / 2 :++,

	? =<0 EDs   	$K$K*
K'	*
K8	c                .   U(       a  U Vs/ s H  oUR                   PM     snO/ nU  Vs0 s H  oUR                   U_M     nn[        R                  R                  R                  S   R                  SS5      (       a/  U  Vs0 s H"  nSUR                   ;  d  M  UR                   U_M$     nnUR                  R                  SS9S   R                  S   nUR                  R                  SS9 Vs0 s H  oUR                   U_M     n	nSn
U H  nUR                   U;   d  M  [        U5      (       d  M'  UR                   U;   a"  [        R                  S	UR                   5        MY  S
UR                  S'   UR                  S   R                  UR                  S'   S
XR                      R                  S'   UR                  S   R                  XR                      R                  S'   S
n
M     U
(       a  [        XX5        g g s  snf s  snf s  snf s  snf )Nrt  exclude_primalsFprimalsr   r1  r   r   z*Skipping quantization of static input %s: Tr  r   r  )r   r   rx  r   ry  r   r   r2  r   r  r=   debugr   r{  r  )r>  r  r  rr   r6  rJ   static_input_namessaved_values_namesrD  r  should_perform_fp8_quants              rK   enable_activation_quantizationr    s    '  ;;:t:; 
 7CCld))T/lC66+	c
U#$ )5
(4	8RODIItO 	 
 $))444A!DII!L$.$4$4$?$?=$?$Q$QD		4$Q    %"99**t/D/Dyy..		F		R26DII./(,		%(8(>(>DIIn%JNii(--.FG@D		%@P@V@Vii(--n='+$ #  +$5	
  9 	< D
s   HHHH1H)rr   r  omit_aot_autograd_runtimec                  [        XS9u  ppU R                  R                  SS9n/ [        [        U5      QnU(       a  / O/ [        [
        U5      Qn/ [        [        U5      Qn/ [        [        U5      Qn/ [        [        U5      QnUc  / n[        U R                  UU-   U-   U-   U-   U	USUS9n[        R                  R                  5       nUR                  SS9 H  nUR                  (       dD  [        UUR                  5        [        UUR                  5        [        UUR                  5        MX  U(       ae  [!        S UR                   5       5      (       aD  [        UUR                  5        [        UUR                  5        [        UUR                  5        M  [        U5      (       d  M  [        UUR                  5        U(       a  M  [#        S5      e   [%        5       n/ n/ nU HJ  n['        U5      nU(       a$  UR)                  U5        UR+                  U5        M9  UR+                  U5        ML     [-        U R                  5      n[.        R0                  " UX5       Hc  nS	UR2                  ;  a  M  [5        UR2                  S	   5      U-
  n[7        US
 S9 H  nUU;  a  M  UR+                  UU   5        M!     UU-  nMe     UR9                  5         UR;                  UU-   5        U(       Gd  / n/ n/ nU H  n[=        UR2                  R?                  S	5      [@        5      (       a  UR+                  U5        MD  UR2                  R?                  SS5      (       a  UR+                  U5        Mx  UR+                  U5        M     UR9                  5         UR;                  UU-   5        [C        U5      n[E        U5       HL  u  n nU U:  d  M  UR2                  R?                  SS5      (       a  M1  [#        SU  SU S[C        U5       35      e   [        U R                  X-   UU-   U-   U-   U-   U
[G        [C        U5      [C        U5      -   [C        U5      -   [C        U5      -   5       V s/ s H.  n U U:  a  U [C        U5      :  a  [I        U 5      O
[K        U 5      PM0     sn -   SUS9n![        U R                  UU-   U-   U-   U-   U-   U-   U	USUS9nO{[        U R                  X-   X-   U-   U
[G        [C        U5      [C        U5      -   5       V s/ s H  n [K        U 5      PM     sn -   SUS9n![        U R                  UU-   U-   U-   U	USUS9n[L        RN                  RQ                  U U!5      n"[L        RN                  RQ                  U U5      n#[R        RT                  R?                  SS5       b  [W        UU"U#UU5        U"U#4$ s  sn f s  sn f )a:  Extract forward and backward graph modules from a joint graph.

Args:
    ignore_must_be_in_fw_bw: When True, disables forward/backward placement
        enforcement in _extract_graph_with_inputs_outputs. Needed when the
        joint_module is not an original fwd+bwd joint graph (e.g. a backward
        graph being re-partitioned for dI/dW splitting).
    omit_aot_autograd_runtime: When True, skips postprocessing that is
        only needed when the resulting modules will be wrapped in a custom
        autograd.Function (the AOTAutograd path). This includes: tangent input
        handling, version-counter check sorting of saved tensors, opaque object
        (FakeScriptObject) separation, and fp8 activation quantization. Set this
        to True when the fwd/bwd modules will be executed directly without autograd.
r6  r   r1  Nr   )r  c              3     #    U  H^  nUR                   [        R                  R                  R                  R
                  L =(       a    [        UR                  5      S :H  v   M`     g7fr   N)r   r   r   r   wait_tensorr   r   r  ru   s     rK   rx   +_extract_fwd_bwd_modules.<locals>.<genexpr>"  sR      )
   HH		22>>FFF "AGG!"s   A&A(z'backward_state_inputs must not be emptyr   c                    U R                   $ rG   r  )ss    rK   r{   *_extract_fwd_bwd_modules.<locals>.<lambda>J  s    166rN   r}   saved_tensor_with_no_vc_checkFzi=z, no_vc_check_start_idx=z, len(saved_values)=r   rt  ),r;  r   r2  filterr5   r6   r4   r3   r  r  r   distributedis_availabler  r@  r   allr   r$   r   addr   r   	itertoolschainr   r   r   clearextendr   r   r   r   r  rC  r/   r.   r   _lazy_graph_module_make_graph_moduleinductor_configry  r  )$r5  r>  saved_sym_nodessaved_opaque_nodesr6  rr   r  r  r7  r8  r9  r:  placeholdersprimal_inputstangent_inputsfwd_seed_offset_inputsbwd_seed_offset_inputsbackward_state_inputs	bwd_graphdistributed_enabledrJ   saved_symbolssaved_sym_nodes_bindingsaved_sym_nodes_derivedsymbolsymbol_bindingsnew_symbolsr  saved_values_with_vc_checksaved_values_no_vc_checksaved_opaque_objectsno_vc_check_start_idxrF  	fwd_graphr  r  s$                                       rK   _extract_fwd_bwd_modulesr    sG   4 	!O CK/  %%00M0BL7fZ67M'-Qvk</P-Q  Jv&9<HIIv&9<HIGf%7FG!2
	
	 	 !		!
 	 7I  ++88:$$$6zzL$))4OTYY7.		:
 !S )
 ZZ)
 &
 &

 L$))4OTYY7.		:%%L$))4(($%NOO- 78 /9lM    *40f%#**40#**40   3<3E3EFO 7V		!"499U#34}D)9:A '#**?1+=> ; 	$ W" 25LLM$ &("#% ! D$))--.0@AA$++D1>FF(//5*11$7 ! 	69QQR #$> ? !.GAt))yy}}%DeLL(QC78M7NNbcfgsctbuv  / 72"# !! 	
 
 %./0,-. /*+	
A --!c,6G2G 4A6/23	
 $;+
	. 7"# !! 	
 %% $$ $;
	  72&8 s<033GGHHA +1-H
 $;
	 7$% $$ $;

	 &&99,	RJ&&99,	RJ0044/	
 	
 	''	
 z!!K
Fs   85WW)static_lifetime_input_indicesrr   c          	       ^ / nSnU R                   R                   H7  n[        U5      (       d"  [        U5      (       d  [	        U5      (       d  M5  UnM9     Uc  [        S5      eU R                   R                   H*  n[        U5      (       d  UR                  U5        XvL d  M*    O   [        S U 5       5      n[        U 5      n	[        U 5      n
U	(       aE  [        U R                   5      S   b"  [        R                  " S5        [        U UUUS9$ [        U SS9n [         R"                  (       d  [%        U 5        ['        U 5        [)        U 5        Uc  / n[+        XU5      n/ n/ n/ n[,        R.                  R1                  5       mSS	 jnSS
 jnSU4S jjnU R                   R                   GH
  nUR2                  U;  a  M  UR4                  S:X  a'  UR2                  S U R7                  5        5       ;   a  MM  UR8                  [,        R:                  R<                  R>                  R@                  [,        R:                  RB                  RD                  R@                  [,        R:                  RB                  RF                  R@                  [,        R:                  RB                  RH                  R@                  [,        R:                  RB                  RH                  RJ                  4;   a  GMA  [M        U5      (       a  UR                  U5        GMe  U" U5      (       a  GMu  URN                  RQ                  S5      [R        RT                  :X  a6  [W        U5      (       a  UR                  U5        OUR                  U5        GM  U" U5      (       aX  U	(       a  [        SU SUR8                   35      e[W        U5      (       a  UR                  U5        OUR                  U5        GM=  [W        U5      (       a  UR                  U5        GMa  U" U5      (       d  UR4                  S:X  a  [        SU S35      eURX                   Vs/ s H  nUR2                  U;  d  M  UPM     nn[[        S U 5       5      (       a  UR]                  U5        GM  [_        U5      (       a  GM  UR                  U5        GM     [a        [b        Re                  U5      Rg                  5       5      n[a        [b        Re                  U5      Rg                  5       5      n[a        [b        Re                  U5      Rg                  5       5      n[         Rh                  (       a  [i        U R                   U5      nUc  URj                  n[m        U UUUUUS9u  nnUR                   Ro                  [p        S9  UR                   Ro                  [p        S9  U	(       a,  U
(       a  [s        U UU[u        U5      5      u  nn[w        U5      n[         Rx                  (       a  SSK=J<n  U" UUUU5        [}        U5      n[}        U5      n[        USS9n[u        UR                  5      S:  a
  [        USS9nUU4$ s  snf )a  
Partitions the :attr:`joint_module` in a manner that closely resembles the
behavior observed in the original ``.forward()`` and ``.backward()`` of the
callable, i.e., the resulting forward graph contains those operators that
are executed in the original ``.forward()`` callable passed to
:func:`aot_function`.

The default partitioner collects the operators that are between the forward
inputs and the forward outputs. This helps in finding the tensors which have
to be stashed for the backward pass. These stashed tensors become the output
of the generated forward graph. The remaining operators are then placed in
the backward graph.

.. warning::
    This API is experimental and likely to change.

Args:
    joint_module(fx.GraphModule): The joint forward and backward graph. This
        is the result of AOT Autograd tracing.

Returns:
    Returns the generated forward and backward Fx graph modules.
Nzlast_node must not be Nonec              3  \   #    U  H"  oR                   S :w  d  M  UR                  v   M$     g7fr   Nr   r   r.  s     rK   rx   $default_partition.<locals>.<genexpr>  s"      $+dww(/B			m   ,,r   zxTrying to unsafely apply AC to a non-functional graph with the default partitioner. Falling back to min-cut partitioner.)r6  r  Tis_default_partitionc                    SU R                   ;   =(       d=    [        U R                   R                  S5      [        R                  R
                  5      $ )NrI  r   )r   r   r   r   _subclasses
FakeTensorr   s    rK   	is_tensor$default_partition.<locals>.is_tensor#  s=    		) 
ZIIMM% %"3"3">">.
 	
rN   c                x    [        S U R                   5       5      =(       a    [        U R                  5      S:  $ )Nc              3  Z   #    U  H!  oR                   [        R                  :H  v   M#     g 7frG   )r   r   r   rv   r  s     rK   rx   =default_partition.<locals>.is_multi_output.<locals>.<genexpr>*  s     GJDx///Js   )+r   )r  r  r   r   s    rK   is_multi_output*default_partition.<locals>.is_multi_output(  s.    GDJJGG $DJJ!#	
rN   c                   > U R                  SS9=(       a^    U R                  S;  =(       aH    T(       + =(       d:    U R                  [        R                  R
                  R                  R                  L$ )NF)impure_random)r   r   )	is_impurer   r   r   r   r   r  r   )rJ   r  s    rK   r0  $default_partition.<locals>.is_impure.  sc     NNN/ 		 (' U;;eii&@&@&L&L&T&TT	
rN   r   c              3  *   #    U  H	  u  pUv   M     g 7frG   r[   )rv   kr  s      rK   rx   r  C  s      3
6$!A6s   r   z.Trying to apply AC on a graph with impure op: z, r   z	Expected z to be a tensorc              3  8   #    U  H  n[        U5      v   M     g 7frG   r   ru   s     rK   rx   r  p  s     7!{1~~r0  r  r  r6  rr   )is_impure_noder&   enable_activation_offloadingFr  r\   )Ar   r   r  r5   r4   r   r6   r   r$   r   r   r0   warningswarn#min_cut_rematerialization_partitioncleanup_recompute_tagsr   (unsafe_allow_optimization_of_collectivesforce_save_collectivesforce_save_effectful_opsforce_save_bw_mutation_srcclassify_nodesr   r  r  r   r   named_modulesr   r   r   _assert_scalarr   profiler_record_function_enter_new_record_function_enter_record_function_exit_RecordFunctionr   r   r   r%   	MUST_SAVEr1   r  r  r   r   r  r  fromkeysr  _sync_decision_cross_ranksrr   r  r   r   functionalize_rng_opsr   #reordering_to_mimic_autograd_enginer9  ,_activation_offloading.activation_offloadingr:   thread_graphsafe_rng_from_hopsrm   )r5  _joint_inputsr6  r  rr   forward_nodes	last_noderJ   forward_node_namesgraph_has_recomputable_opsgraph_has_recomputable_rng_ops	node_infor>  r  r  r&  r,  r0  rw   backward_usages	fw_module	bw_moduler9  r  s                          @rK   default_partitionr\    sn   @ MI""((t$$
4(8(8<OPT<U<UI ) 9::""((4    &	 )
 $ $+$  "6l!C%=l%K"! 2 23A6B MML 7 /.K	  .lQUV::|,\*|,$,(*%_I LO++88:



$ ""((99..77j TYY 3
&4463
 &
 ;;IINN))11 II99AAII55==II44<<II44DD	
 	
 t ""4(4  99==%)9)C)CCd##"))$/##D)T??)$DTF"T[[MZ  d##"))$/##D)$%%d+477o#= 9TF/!BCC&*jjUjAFFBT4T1jU7777 ""?3d##%y )| l388:;L4==9>>@AOdmm,>?DDFG((1,2D2DlS"*&/&K&K#3'-'$?Iy OO''7H'IOO''7H'I!)#8iC4H$ Iy 8	B	 **	
 	%'		
 y)Iy)I.yeLI
9&&'!+29$O	iE Vs   Y(#Y(g    .Ac                    XR                   -  $ rG   )itemsize)rn  r{  s     rK   _tensor_nbytesr_    s    >>!!rN   c                  ^ SS jmSU R                   ;   a  U R                   S   n[        U[        5      (       a  g[        U[        [        45      (       a  [        U4S jU 5       5      $ [        U[        5      (       a#  [        U4S jUR                  5        5       5      $ [        U[        R                  5      (       a  T" U5      $ [        S[        U5       SU  35      eU R                  S:X  d;  U R                  [        R                  R                  R                   R"                  L a  g	[        S
U  S35      e)Nc                    [        U [        R                  5      (       d  g[        [	        U R                  5       SS9U R                  5      $ )Nr      fallback)r   r   rV  r_  r    rn  r{  r   s    rK   object_nbytes_size_of.<locals>.object_nbytes  s5    !U\\**/	DI177SSrN   r   r&   c              3  4   >#    U  H  nT" U5      v   M     g 7frG   r[   )rv   rw   rf  s     rK   rx   _size_of.<locals>.<genexpr>  s     5A}Q''   c              3  8   >#    U  H  u  pT" U5      v   M     g 7frG   r[   )rv   rK  rw   rf  s      rK   rx   ri    s     @KDA}Q''Ks   zUnknown metadata type z	 on node r   r   r   zO didn't have `val` metadata; we should always have `val` metadata on the nodes.)r   objectr^   r   )r   r   r   r  r   sumr  itemsr   rV  r   r   r   r   r   r   rE  r   )rJ   r   rf  s     @rK   _size_ofro    s    T
 		iic<(( dE]++5555T""@CIIK@@@U\\** %%3DI;ivNOOww*uyy~~/L/L/T/T T

vde rN   c           	     4   SSK Jn  U" [        5      nU R                   H5  nUR                  S:X  d  M  X#R
                  R                  ==   S-  ss'   M7     [        R                  S[        UR                  5       [        R                  " S5      SS95        g )Nr   )r   r   r&   %sTr~   reverse)collectionsr   r   r   r   r   r_   r=   r  r   rn  r   
itemgetter)r   r   cntrJ   s       rK   
_count_opsrw    sk    '%c*C77o%$$%*%  HHT6#))+8+>+>q+A4PQrN   c                    / n [        [        R                  R                  5       H  n[	        [        R                  R                  U5      n[        U[        R                  R                  5      (       d  MR  UR                  5        HJ  n[	        X#5      n[        R                  R                  UR                  ;   d  M8  U R                  U5          M     M     U $ rG   )dirr   r   r   r   r   r  OpOverloadPacket	overloadsr   	pointwiser   r   )r   	attr_nameopoverloadpacketoverloadop_overloads        rK   pointwise_opsr    s    -/C(	"599>>9=*EJJ,G,GHH(224H!"2=Kyy""k&6&66

+, 5 ) JrN   c                    U  Vs0 s H=  n[        U[        R                  R                  R                  5      (       d  M8  X!U   _M?     nn[        UR                  5       [        R                  " S5      SS9$ s  snf )Nr&   Trr  )	r   r   r   rJ   r   r   rn  r   ru  )r   	depth_maparg
arg_depthss       rK   sort_depthsr    sk     (,'+z#uxx}}?Q?Q/Rs^t   *""$(*=*=a*@$OOs   7A8 	A8c                  ^
^^ [         R                  " 5       m0 m
U R                  R                  SS9 H  nTR	                  UU
4S j5      T
U'   M     [        U R                  R                  5       VVs0 s H  u  p!X_M	     snnmSU
UU4S jjn[        [        [        U R                  R                  5      5      nSn[        R                  nU H(  nUR                   H  nTU   U:  d  M  TU   nUnM     M*     Uc  U $ [        U R                  R                  5      STU     HZ  nUR                  S:X  d  M  UR                  [        R                   R"                  R$                  R&                  L d  MR  U" U5        M\     [        U R                  R                  5      TU   S  H  nU" U5        M     [        R                   R)                  U T5      n	U	$ s  snnf )a|  
This pass finds the first bwd node in the graph (by looking at users of
tangents) and then reorders the graph by walking from this node to all the
way to the end of the graph. At each op in this traversal, we insert this op
in a new graph and try to bring only the relevant subgraph from the other
non-bwd edges relevant for this op. This closely mimics the behavior of
autograd engine.

Why is this pass required in the first place?

This is an artifact of how partitioners work today. The starting point of
partitioner is a joint graph, which is fwd and then bwd graph. In the case
of checkpointing, we keep portions of fwd graph in their original place in
the joint graph, while obtaining a bwd graph. As a result, the resulting bwd
graph has copies of recomputed fwd subgraphs followed by the original bwd
graph. If we run this naively, this leads to bad memory footprint, because
the fwd subgraphs are live for way longer duration than necessary. This pass
reorders the operations such that we prioritize the ops for the original bwd
graph while only realizing those ops from the fwd graph that are necessary
at any given point in the graph.
r   r1  c                   > TU    $ rG   r[   r   s    rK   r{   5reordering_to_mimic_autograd_engine.<locals>.<lambda>  s	    ArN   c                8  > U /n[        5       n[        U5      S:  aM  UR                  5       n X;   d  U T;   a  M,  UR                  U 5        XR                  -  n[        U5      S:  a  MM  [        UU4S jS9nU H  n TR                  U U4S j5      TU '   M     g )Nr   c                   > TU    $ rG   r[   )rw   orders    rK   r{   Sreordering_to_mimic_autograd_engine.<locals>.insert_node_in_graph.<locals>.<lambda>+  s	    %(rN   r}   c                   > TU    $ rG   r[   r   s    rK   r{   r  -  r   rN   )r$   r   r  r  all_input_nodesr   r   )rJ   	cur_nodesinsertable_nodesr   r  r  s      rK   insert_node_in_graphAreordering_to_mimic_autograd_engine.<locals>.insert_node_in_graph  s    F	0:)nq ==?D'43;  & ---I )nq  ""28JK$D!++D2BCCI %rN   Nr   rJ   r]   r^   None)r   r   r   r2  r   r  r   r  r  r6   mathinfr  r   r   r   r   r   r   r   GraphModule)gmrJ   rE  r  r  first_node_in_bwdminimum_ordertangentr  new_gmr   r  r  s             @@@rK   rO  rO    s   . 
I"$C ##}#5''.>?D	 6 )2"((..(AB(A93TY(ABED D& &bhhnn=>NHHM!MMDT{]* %d$(! " "  	 RXX^^$%?u->'?@77o%$++9M9M9U9U*U & A RXX^^$U+<%=%?@T" A XX!!"i0FM[ Cs   6Gc                   UR                   nUc  [        S5      eU R                  n	UR                  n
[        R                  R
                  R                  nU R                  R                  U5         U R                  R                  SU 35      n[        U5      UR                  S'   UnSSS5        UR                  R                  U5         UR                  R                  SU 35      n[        U5      UR                  S'   UnSSS5        [        UR                  5      nWUS'   U R                  R                  U5         U	R                  SUUR                  /UR                  Q7US9nSSS5        UR!                  W5        U	R#                  U5        [        UR                  5      nWUS'   U
R%                  U5         U
R                  SUUR                  /UR                  Q7US9nUR!                  U5        U
R#                  U5        SSS5        Xg4$ ! , (       d  f       GNx= f! , (       d  f       GN.= f! , (       d  f       N= f! , (       d  f       Xg4$ = f)	a  
Note [CUDA Graph Safe RNG Functionalization]

CUDA Graph capture doesn't work with get_rng_state and set_rng_state because these functions operate on CPU values,
while CUDA Graph RNG capture uses on-device CUDA tensors. To solve this, we use graphsafe_set_state with a
CUDA Generator registered to the CUDA Graph before capture begins. graphsafe_set_state updates the generator's pointer
to reference a different GeneratorImpl, ensuring subsequent calls are correctly forwarded to the desired generator
(and its cuda-tensor RNG state during graph capture).

For each RNG operation's forward/backward pair:

- We create two generators initialized with identical values
- Each forward and backward call advances its respective generator equally
- This keeps generators synchronized so forward and backward operations use matching RNG values

When forward is called multiple times before backward (causing desynchronization):

- We save the forward RNG state
- We update the backward Generator's state before executing backward

Before each CUDA Graph replay, replay_prologue updates captured RNG pointers with current states, ensuring backward Generator
changes are reflected during replay.

This function modifies both forward and backward computation graphs by:

Creating RNG state placeholders for both passes
Updating the forward node to use graph-safe RNG state
Updating the backward node to use graph-safe RNG state

For more details: https://github.com/pytorch/pytorch/issues/113541
Nzdevice_idx must not be Nonefwd_rng_state_r   bwd_rng_state_	rng_stater   r   r   )indexr   r   r   _prims	rng_primsgraphsafe_run_with_rng_staterM  r   r7   r   r  r   create_noder   r   r  r  inserting_before)rZ  r[  fw_nodebw_nodedevice	rng_countlast_fwd_inputlast_bwd_input
device_idxfw_graphbw_graphr  fwd_rng_statebwd_rng_state	fw_kwargsfunctional_fw_node
bwd_kwargs
rng_outputs                     rK   %apply_graphsafe_rng_functionalizationr  K  s   R J:;;HH#(<<#9#9#V#V  
	(	(	8!33nYK4PQ$?
$K5!& 
9 
	(	(	8!33nYK4PQ$?
$K5!&	 
9 W^^$I*Ik		(	(	1%11(..07<<0	 2 
 
2 !!"45  gnn%J+J{		"	"7	+))(..07<<0	 * 

 	%%j1G$ 
, ))M 
9	8 
9	8 
2	1 
,	+ ))s1   29H9H.+I AI
H+.
H= 
I
I!c                  ^' [         R                  " 5       nSS jnSS jm'SS jnU" U 5      nU" U5      nU" U5      n	0 n
U R                  R                   H  n[	        U5      (       d  M  [        UR                  S5      (       d  M2  [        R                  R                  UR                  R                  ;   d  Mf  UR                  U;  d  UR                  U	;  a  M  X{R                     nXR                     nXR                     nXS.X'   M     [        R                  R                  R                  n[        R                  R                  R                  nS nUR                  R!                  SS9 H  nSUR                  ;   d  M  Un  O   Uc  [#        S	5      e/ n[%        ['        UR                  R!                  SS95      5      n[%        ['        UR                  R!                  SS95      5      n[)        U'4S
 jU
R+                  5        5       5      nUR-                  [        R.                  " S5      5        [1        U5      S:  n[        R2                  R4                  n[4        R6                  =(       a<    U(       + =(       a.    UR8                  (       + =(       d    UR:                  R<                  n[?        U
R+                  5       5       GH   u  nnUS   nUS   nT'" U5      nUR                  nUR                  nU(       a*  Ub'  UR@                  S:X  a  [C        UUUUUUUU5      u  nnMb  URE                  U5         URG                  SUUR                  /URH                  Q7URJ                  S9nURG                  S[L        RN                  US40 S9nU" U5      URP                  S'   URG                  S[L        RN                  US40 S9n [R        RR                  " URP                  5      U l(        URU                  U 5        URW                  U5        URY                  U5        S S S 5        URE                  U5         S[%        U5       3n!UR[                  U!5      n"U" U5      U"RP                  S'   S S S 5        URE                  U5         URG                  SUW"UR                  /URH                  Q7URJ                  S9n URU                  U 5        URW                  U5        S S S 5        GM#     U(       a  [%        []        UR                  R!                  SS95      5      n#U#RH                  S   n$[1        U$5      U-
  n%U$S U% [_        U5      -   U$U%S  -   n&UR                  Ra                  U&5        UR                  RW                  U#5        URc                  5         URc                  5         X4$ ! , (       d  f       GN= f! , (       d  f       GNV= f! , (       d  f       GM  = f)Nc                &   0 nU R                   R                   Ht  nUR                  S:X  d  M  [        UR                  S5      (       d  M2  [
        R                  R                  UR                  R                  ;   d  Mf  X!UR                  '   Mv     U$ )Nr   r   )
r   r   r   r   r   r   r   r   r   r   )gmodrandom_nodesrJ   s      rK   get_rng_ops*functionalize_rng_ops.<locals>.get_rng_ops  sh    +-JJ$$D?*DKK00II559I9II*.TYY' % rN   c                B   SU R                   ;  a  gU R                   S   n[        U[        5      (       d  U4nU HL  n[        U[        R                  5      (       d  M$  UR
                  R                  S:X  d  M@  UR
                  s  $    [        R
                  " S5      $ )zF
Check the example value of the node outputs to find the device type.
r   Ncudacpu)r   r   r   r   rV  r  r   )rJ   
candidates	candidates      rK   
get_device)functionalize_rng_ops.<locals>.get_device  s     		!YYu%
*e,,$J#I)U\\22##((F2$+++ $
 ||E""rN   c                L   SSK Jn  U" 5       nUc  [        S5      eU   U bF  U R                  S:X  a6  UR	                  [
        R                  R                  5       5      sS S S 5        $ UR	                  [
        R                  " 5       5      sS S S 5        $ ! , (       d  f       g = f)Nr   )detect_fake_modezfake_mode must not be Noner  )torch._guardsr  r   r   from_tensorr   r  get_rng_state)r  r  	fake_modes      rK   get_sample_rng_state3functionalize_rng_ops.<locals>.get_sample_rng_state  sx    2$&	 !=>>!fkkV&; ,,UZZ-E-E-GH Y (()<)<)>? YYs   A B(#B
B#r   )fwdbwdr   r1  r  zaCouldn't find tangent node in graph inputs. This is unexpected, please file a bug if you see thisc              3  :   >#    U  H  nT" US    5      v   M     g7f)r  Nr[   )rv   	node_pairr  s     rK   rx   (functionalize_rng_ops.<locals>.<genexpr>  s"      6W
9U#$$6Ws   r  r&   r  r  r  r   r  r   r   rng_state_output_r   )r  fx.GraphModuler^   dict[str, fx.Node])rJ   r]   r^   torch.device | None)r  r  r^   torch.Tensor)2r  countr   r   r   r   r   r   r   r   r   r   r  r  run_and_save_rng_staterun_with_rng_stater2  r   r3  r  r$   valuesdiscardr  r   rx  r   graphsafe_rng_functionalizationfallback_randomtest_configs*graphsafe_rng_func_ignores_fallback_randomr  r   r  r  r  r   r   r   r   r   copyr  r  r   r   r4  r   r   	recompile)(r5  rZ  r[  num_sym_nodesuidr  r  joint_graph_rng_opsfw_graph_rng_opsbw_graph_rng_opsrecomputable_rng_ops_maprJ   	base_noder  r  run_and_save_rngr  bw_tangent_start_nodefw_rng_state_outputsr  r  devicesmulti_cuda_devices
ind_config'use_rng_graphsafe_rng_functionalizationr  r  r  r  r  r  stater  
state_namebw_rng_state_nodefw_output_node
fw_outputssym_node_start_idxr  r  s(                                          @rK   rN  rN    s   2 //
C	#$	@ &l3"9-"9-!""((4  V,,		11T[[5E5EE
 yy 00DIIEU4U+II6I&yy1G&yy1G:A2R$/ )  ||--DD//BB **m*<		!$(! = $o
 	
 +-(9??#=#=#=#OPQN(9??#=#=#=#OPQN 6N6U6U6W G OOELL'( W) ''J.. 	
""	
 *** R&&QQ , !**B*I*I*K L	9E"E"G$???? 4"v%-R	.*NN **73%-%9%9#$    #>> &: 	&" !,,#$$,a0	 -  %9$@

5!%11#$$*  2 
 #'))GLL"9
--j9##G,$++E2C 4H **+@A0c<
$,$8$8$D!0DV0L!&&u- B
 **73%11#& * !
 #>> 2 

 --j9##G, 43E !Mj d9??#=#=#=#JKL#((+
 _}<**+()*+,-. 	
 	w'"">2S 43H BA
 43s&   C&V65WAW6
W	
W	
W*	c                >   U R                   R                   H  n[        UR                  [        R
                  R                  5      (       d  M8  UR                  R                  S:X  d  MT  [        U5      (       a  Mf  [        R                  UR                  S'   M     g)z
By default, the partitioner is not allowed to recompute collectives
unless they come from a user-annotated AC region.
See Note [Recomputing collectives in the partitioner]
r   r   N)r   r   r   r   r   r  r  r   r   r%   rK  r   )r5  rJ   s     rK   r@  r@    sg     ""((t{{EJJ$9$9::%%);;"4((%5%?%?DIIk" )rN   c                   ^ SU4S jjmU R                   R                   HA  n[        U5      (       d  M  [        U5      (       a  M'  [	        U5      (       a  M9  T" U5        MC     g)a8  
Force save outputs from with_effects nodes wrapping effectful ops.

Effectful ops (registered via _register_effectful_op) should not be recomputed
because they may have arbitrary global side effects (I/O, RNG state, collectives,
etc.). We mark the tensor outputs of with_effects as MUST_SAVE to prevent
recomputation of the effectful op.

The with_effects node returns a tuple (token, result). We recursively find all
leaf outputs extracted via getitem and mark them as MUST_SAVE. Since these are
saved, the with_effects op doesn't need to be recomputed in backward.
c                   > U R                    H}  nUR                  [        R                  L d  M"  T" U5        [	        UR
                  R                  S5      [        [        45      (       a  M`  [        R                  UR
                  S'   M     g )Nr   r   )r  r   r   r   r   r   r   r   r  r%   rK  )rJ   r  mark_getitem_outputss     rK   r  6force_save_effectful_ops.<locals>.mark_getitem_outputs  s]    JJD{{h...$T*!$))--"6FF-=-G-GDIIk*	 rN   Nr  )r   r   r   r   r  )r5  rJ   r  s     @rK   rA  rA    sJ    H ""((D!!"4(((.. & )rN   c                   [        5       n[        U R                  R                  5       H  nUR                  S:X  a  M  UR
                  [        R                  R                  R                  R                  L nU(       a  [        U5      (       a  UR                  UR                  S   5        [        U5      (       aA  UR                  S   U;   a,  [        R                   UR                  S   R"                  S'   M  M  M    g    g )Nr   r   r&   r   )r$   r  r   r   r   r   r   r   r   r   r   r$  r  r   r!  r%   rK  r   )r5  has_mutation_in_bwrJ   is_copy_s       rK   rB  rB    s     5?L++11277h;;%)).."6"6">">>+D11"&&tyy|4*400TYYq\EW5W1A1K1K		!!!+. 6X0 ! 3rN   c                   U R                   [        R                  :w  a  gU R                  S   n[	        U5      [
        R                  La  [        S[	        U5       35      eSUR                  ;  =(       a    U R                  S:H  $ )NFr   z#expected parent to be fx.Node, got rI  r   )
r   r   r   r   r   r   r   r   r   r   )rJ   parents     rK   is_getitem_of_multi_outputr    sh    {{h&&&YYq\FF|277"B4<.QRR+J?0JJrN   c               \   U R                   R                   GH  n[        U5      (       a  UR                   Hx  n[        U5      (       d  M  SUR                  ;   d  M'  SUR                  ;   d  M9  UR                  S   UR                  S   :  d  M[  [
        R                  UR                  S'   Mz     UR                  R                  SS5      (       aB  [        S UR                   5       5      (       d  [
        R                  UR                  S'   M  M  GM  SUR                  ;  d  GM  [        S UR                   5       5      (       d  GM9  [        U5      (       a   SUR                  S   R                  ;   a  GMi  U(       d  GMs  [
        R                  UR                  S'   GM     U $ )z
If there are two consecutive checkpointed blocks with no operator in
between, we would still want to stash the tensor at the boundary of
checkpointed blocks. The following pass makes the last output node
non-recomputable to allow for that.
ac_graph_idr   has_backward_hookFc              3  8   #    U  H  n[        U5      v   M     g 7frG   r   r*  s     rK   rx   )cleanup_recompute_tags.<locals>.<genexpr>  s      E1;t$$r0  c              3  8   #    U  H  n[        U5      v   M     g 7frG   r  r*  s     rK   rx   r    s     @ZTN4((Zr0  r   )r   r   r   r  r   r%   rK  r   r   r  r   )r5  r"  rJ   r  s       rK   r>  r>    sH    ""(($

"4((%2%2		-0499]3KK-=-G-GDIIk* # yy}}0%88 E15E B B& *:)C)C		+&'B8* *@TZZ@@@ +400]diiPQlFWFW5W$$ &6%?%?DIIk"W )X rN   c                  ^^^^=^>^?^@^A^B^C^D^E^F^G^H^I^J^K^L Tc
  [        5       m[        5       mJ[        (       aQ  [        S U R                   5       5      nU[        S TJR                   5       5      -
  n[
        R                  SU5        SNS jm>SNS jm?SNU>U?UJ4S jjmB SS KmHSOUBUUJ4S	 jjmDSPUDUUJ4S
 jjnSOUB4S jjmC      SQUCUJ4S jjnTHR                  " 5       mI[        5       m=SRSSU=UUIUJ4S jjjn	U R                   GH  n
U
R                  S:X  a  M  U
TR                  ;   am  U
TR                  ;  a.  TIR                  U
R                  S-   S[         R"                  SS9  O/TIR                  U
R                  S-   S[         R"                  SS9  M  [%        U
5      (       a/  TIR                  U
R                  S-   S[         R"                  SS9  M  ['        U
5      (       a
  U	" U
S5        O[)        U
5      (       a	  U	" U
S5        U" U
5      nTR+                  U
5      (       a  U(       a  U	" X5        SU
R,                  ;  =(       a    SU
R,                  ;  =(       dB    SU
R,                  ;   =(       a,    [/        U
R,                  S   [0        R2                  5      (       + n[5        U
5      (       a  [7        [9        U
5      5      nS nOxU(       a\  [/        U
R,                  R;                  S5      [<        [>        45      (       d  [A        U
5      (       a  SnS nO([         R"                  nSnOU" U
TRB                  5      u  pU(       aN  U[         R"                  :X  d
  U[D        :X  a0  TIR                  U
R                  S-   U
R                  S-   USU 3S9  O+TIR                  U
R                  S-   U
R                  S-   US9  U
RF                   H=  nTIR                  U
R                  S-   UR                  S-   [         R"                  SS9  M?     GM     STUBU4S  jjnTRH                  (       GaR  TRJ                   GHA  nURF                   Vs/ s H,  nTR+                  U5      (       d  M  TRM                  U5      PM.     nnURF                   Vs/ s H  nTR+                  U5      (       d  M  UPM     nn[O        U5      S:  d  M  U" U[Q        U5      5      n[S        URF                  5       H  nTR+                  U5      (       d  M  TRM                  U5      U:  d  M2  TB" UU5      (       d  MB  UT=;   a  MJ  [
        R                  S!UTRM                  U5      UUTRM                  U5      5        U	" U5        M     GMD     TRT                  (       Ga  [        5       nU R                   GHi  nTR+                  U5      (       d  M  TRM                  U5      U4/nTRM                  U5      n[O        U5      S:  d  MR  [V        RX                  " U5      u  nnUU;   a  M2  UR[                  U5        TRM                  U5      US"-   :  aP  [O        U5      S:X  aA  [
        R                  S#UUTRM                  U5      TRM                  U5      5        U	" U5        M  URF                   H[  nTR+                  U5      (       d  M  TB" UU5      (       d  M+  UT=;  d  M3  [V        R\                  " UTRM                  U5      U45        M]     [O        U5      S:  a  GM  GMl      THR^                  " TIS$S5      u  nnUu  n5mG[        5       n6UI4SE jU5 5        H"  u  mLn7U6R                  UGUL4SF jU7 5       5        M$     [        5       n8U6 H?  u  n9n:U9S SG U:S SH :w  a  [        SIU9S SG  SJU:S SH  35      eU9S SG n-U8R[                  U-5        MA     [        U 5      mE[        U R                  5       V;V
s0 s H  u  n;oU;_M
     sn
n;mF[        UE4SK jU8 5       UF4SL jSM9n<U<T=4$ ! [         a  n[        S5      UeS nAff = fs  snf s  snf ! THR`                   Ga0  n[c        [d        Rf                  5      nS nS mAU Rh                  n  U (       a  U Rk                  S%S&S&S'9O
[m        U 5      mA[o        S(S) UA4S* jS+9  [q        S,S-5      n[s        US.5       n!U!Ru                  TA5        S S S 5        O.! , (       d  f       O = f! [v         a  nS/U S03n S nAOS nAff = fS1Ry                  THRz                  R|                  R                  TI5      5      m@[o        S(S2 U@4S3 jS+9  [        TI5      n"U"(       Ga  0 n#S$/n$SUS4 jn%U" H  u  n&n'n(U$R                  U'5        U&S$:X  a+  U%" U'5      n)U#R                  U)/ 5      R                  U(5        MI  U'S:X  a+  U%" U&5      n)U#R                  U)/ 5      R                  U(5        Mz  U%" U&5      U%" U'5      :X  a+  U%" U&5      n)U#R                  U)/ 5      R                  U(5        M  U%" U&5      n*U%" U'5      n+U#R                  U+/ 5      R                  S5U* 35        M     / n,U#R                  5        H8  u  n-n.U,R                  S6U- S735        U. H  n/U,R                  S8U/ 35        M     M:     S1Ry                  U,5      n0S9Ry                  U$5      n1[        TI5      u  n2mKTK(       a  [o        S(S: UK4S; jS+9  U(       a  S<U S13OSn3U2(       a	  U3S=U2 S13-  n3Sn4U(       a  S>n4[        S?U0 S@U1 S1U3 U4 35      Ue[
        R                  SA5        [
        R                  SB[        UHUI4SC j5      5        [        TI5        e S nAf[v         aF    [
        R                  SA5        [
        R                  SB[        UHUI4SD j5      5        [        TI5        e f = fs  sn
n;f )VNc              3     #    U  HS  nUR                   S :X  d  M  [        UR                  S5      (       d  M2  [        UR                  R                  5      v   MU     g7f)r   _overloadpacketN)r   r   r   r   r  r.  s     rK   rx    solve_min_cut.<locals>.<genexpr>  sK      &
)ww/) -.5dkkCT.U -C++,,)s   AA&Ac              3  8   #    U  H  n[        U5      v   M     g 7frG   )r   rv   rF  s     rK   rx   r  "  s      4
5qCFF5r0  z&Ops banned from re-materialization: %sc                X   UR                   [        R                  R                  R                  :w  a  gUR
                  S   n[        R                  R                  R                  U5      u  nnU H6  nUR                  U   nXL a    g[        U[        5      (       d  M/  X;   d  M6    g   gNFr   T)r   r   r   r  auto_functionalizedr   _higher_order_opsauto_functionalizeget_mutable_argsr   r   r  )ab
mutable_opmutable_arg_namesrK  r   r  s          rK   !can_fuse_into_auto_functionalized8solve_min_cut.<locals>.can_fuse_into_auto_functionalized'  s    88uyy--AAAVVAY
 ##66GG
	

 &D((4.Cx#t$$8 & rN   c                    UR                   [        R                  R                  R                  :w  a  gUR
                  S   nU H*  nUR
                  S   nUc  [        S5      eXC   nXL d  M*    g   g)NFtensors_to_cloner   zkwargs must not be NoneT)r   r   r   r  r  r   r   )r  r  r  r   r   r  s         rK   .can_fuse_into_triton_kernel_wrapper_functionalEsolve_min_cut.<locals>.can_fuse_into_triton_kernel_wrapper_functional;  sn    88uyy--NNNHH%78%D((8,F~$%>??,Cx & rN   c                  > [        U5      [        R                  :X  a  gT" X5      (       a  gT" X5      (       a  gU R                  [        R
                  L a?  U R                  S   R                  [        R                  R                  R                  L a  gTR                  U 5      =(       a    TR                  U5      $ )NTr   F)r9   r   catr   r   r   r   r   r   r  r  rL   )r  r  r  r  op_typess     rK   rL   !solve_min_cut.<locals>.is_fusibleH  s     1),Q229!??HH(((q	  yy%%FFG
 ""1%@(*=*=a*@@rN   r   zANeed networkx installed to perform smart recomputation heuristicsc                n  > TR                  U 5      (       a  g[        U /5      n[        U5      S:  a  UR                  5       nUR                   HQ  nTR                  U5      (       d  T" X#5      (       d    gTR                  U5      (       d  M@  UR                  U5        MS     [        U5      S:  a  M  gr  )rV   r$   r   r  r  r   r  )rJ   r  curr  rL   rX  r!  s       rK   is_materialized_backwards0solve_min_cut.<locals>.is_materialized_backwardsb  s    D!!v&	)nq --/C		 //55j>S>S##D))MM$'	 " )nq  rN   c                  > U R                   S:w  a  gU R                  [        R                  L a  gU R                  R                  SS5      [        R                  :X  a  g[        R                  (       a  TR                  U 5      (       a  gU R                  [        R                  R                  [        R                  R                  4;   a  gTR                  (       a  TR!                  U 5      (       d  gO?TR#                  U 5      (       a  gTR%                  U 5      (       a  g['        U 5      (       a  gTR(                  (       a8  T" U 5      (       a+  [*        R-                  S	U [/        U R0                  5      5        g
U R2                  S:  a  U R2                  [        R4                  :  a  gTR6                  (       a1  [9        S U R:                   5       5      n[=        U 5      nUS-  U:  a  gg)zRReturns reason string if node should be banned from recomputation, None otherwise.r   Nr   zmarked MUST_SAVEznot in recomputable allowlistz	random opzcompute intensive opznon-builtin opzmaterialized backwards: %s %szmaterialized in backwardi  ztoo far from backwardc              3  z   #    U  H1  n[        U[        R                  5      (       d  M$  [        U5      v   M3     g 7frG   )r   r   r   ro  r  s     rK   rx   Bsolve_min_cut.<locals>.should_ban_recomputation.<locals>.<genexpr>  s(      %%.*Q2HYs   #;;r   zreduction op)r   r   r   r   r   r   r%   rK  r   recompute_viewsrV   r   lift_fresh_copyr   
lift_freshr   rY   rS   rP   r  r   r=   r  r   r  dist_from_bwmax_dist_from_bwr   rm  r   ro  )rJ   input_tensors_sizeoutput_sizer%  min_cut_optionsr!  s      rK   should_ban_recomputation/solve_min_cut.<locals>.should_ban_recomputationp  s   77o%;;(***99==d+/?/I/II%!!h&6&6t&<&<;;4//779P9PQQ22++D116 2 !!$''",,T22-(..' 77<U=
 =
 II5tU4::=NO- $):):V=T=T)T* ++!$ %%)YY% " #4.KQ!33%rN   c                r   >^  T R                   S:X  a  g[        UU 4S jT R                   5       5      (       + $ )Nr   Tc              3  6   >#    U  H  nT" TU5      v   M     g 7frG   r[   )rv   r  rL   rJ   s     rK   rx   9solve_min_cut.<locals>.is_materialized.<locals>.<genexpr>  s     E*$z$--*s   )r   r  r  )rJ   rL   s   `rK   is_materialized&solve_min_cut.<locals>.is_materialized  s*    77m#E$**EEEErN   c           
       > [         R                  (       a  X;   a  g[        U 5      n[         R                  (       a(  TR	                  U 5      (       a  [
        R                  S4$ [        U R                  S   [        5      (       a4  [        U R                  S   [        R                  5      (       d  [        S4$ [        US[        [        U R                   S5      S5      -  -  5      nT" U 5      (       a  US4$ US	-  S4$ )
zReturns (weight, cannot_save_reason).

cannot_save_reason is None for finite weights, or a string explaining
why the node cannot be saved for infinite weights.
r  zview op (recompute_views=True)r   z$SymFloat (non-SymInt symbolic value)g?r  r&   N   )r    treat_parameters_as_free_to_savero  r*  rV   r  r  r   r   r   r   r   INT_INFr   rX  rY  r-  )rJ   rr   mem_szr7  r!  s      rK   get_node_weight&solve_min_cut.<locals>.get_node_weight  s     333$!!h&6&6t&<&< 88===dii&55dii.== FFF cST%6%6!<a@@A
 4  4<A:t##rN   r  c                F  > TR                  U 5      (       a  gU T;   ak  [        U R                  [        R                  R
                  5      =(       a    U R                  R                  S:H  n[        R                  (       d  U(       d  g[        U 5      (       a  gSU R                  ;   a-  [        U R                  S   [        R                  5      (       a  gTR                  U 5        TR                  SU R                  S-   [        R                   U(       a  SU 3OSS9  g	)
NFr   r   r   _inzcannot recompute: zcannot recomputecapacityreasonT)rV   r   r   r   r  r  r   r   r?  r   r   r   r  add_edger   r  r  )rJ   rD  is_collectivebanned_nodesdont_bannx_graphr!  s      rK   ban_recomputation_if_allowed3solve_min_cut.<locals>.ban_recomputation_if_allowed  s    D!!8 4;;

(=(=> @KK))-??  >>m $DII*TYYu-=u~~"N"N
 	IIXX4:'x0@R	 	 	
 rN   r   _outsinkz;must be available for backward: input required for gradientrB  rA  z3must be computed in backward: required for gradientz+must recompute: marked by checkpoint policyzprimal inputzforward RNG seedr   rI          znon-tensor outputzcannot save: )rC  zdata dependencyc                  > / nU  H,  n[         R                  " UT
R                  U5      US45        M.     [        U5      S:  a  [         R                  " U5      u  pEnU(       d  T
R                  U5      $ UR
                   Hi  nT
R                  U5      (       d  M  T
R                  U5      U:  a  M2  T
R                  U5      UT	" XW5      4nX;  d  MS  [         R                  " X(5        Mk     [        U5      S:  a  M  U$ )zl
Finds the first unfusible node in the chain of nodes starting from
`start_nodes` and returns its position.
Tr   )heapqheappushr   r   heappopr  r   )start_nodes	max_rangesorted_nodesrw   rK  rJ   node_is_fusibler  r   rL   rX  s            rK   find_first_unfusible+solve_min_cut.<locals>.find_first_unfusibleh	  s    
 9;ANN<)*@*@*CQ)MN  ,!#',}}\'B$A_" --d33

++D11 --d3i? !..t4"4.6C
 .|9 #	 ,!# rN   z1used above/below fusible %s:(%s) -> %s -> %s:(%s)r  ztoo long %s %s %s %sr   FTr  r  c                     SSS.$ )Nmin_cut_failed_fx_graphr  r  r[   r[   rN   rK   r{   solve_min_cut.<locals>.<lambda>	  s    5 (%rN   c                    > T $ rG   r[   )fx_graph_strs   rK   r{   r[  	  s    <rN   r  min_cut_failed_graphz.txtwz(failed to write: )
c                     SSS.$ )Nmin_cut_failed_edge_listr  r  r[   r[   rN   rK   r{   r[  	  s    2$!rN   c                    > T $ rG   r[   )edge_list_strs   rK   r{   r[  	  s    }rN   c                f    S H*  nU R                  U5      (       d  M  U S [        U5      *  s  $    U $ )N)rA  rL  )endswithr   )	node_namesuffixs     rK   get_base_name$solve_min_cut.<locals>.get_base_name
  s8    -F ))&11(CK<88 . ! rN   zdepends on z  :z    - z -> c                     SSS.$ )Nmin_cut_failed_svgr  r  r[   r[   rN   rK   r{   r[  9
  s     4$,)rN   c                    > T $ rG   r[   )svg_contents   rK   r{   r[  =
  s    {rN   zFX graph dump: zMin-cut graph visualization: z[Production debugging: Use tlparse to extract debug artifacts (min_cut_failed_fx_graph, min_cut_failed_edge_list, min_cut_failed_svg)]
a  AOT Autograd failed to partition the joint forward-backward graph.

The partitioner determines which intermediate values to save from the forward pass vs recompute in the backward pass. This error means a value is required for backward, but cannot be saved AND cannot be recomputed.

This is a bug in PyTorch. Please file an issue at https://github.com/pytorch/pytorch/issues

Nodes involved in the conflict:
z

[For PyTorch developers: one of the above constraints is wrong. Either the node should be recomputable, saveable, or not required for backward.]

[Debug: min-cut path] z-Failed to compute min-cut on following graph:rq  c                 l   > SR                  T R                  R                  R                  T5      5      $ Nra  join	readwriteedgelistgenerate_edgelistnxrI  s   rK   r{   r[  d
  #    		",,"7"7"I"I("STrN   c                 l   > SR                  T R                  R                  R                  T5      5      $ rr  rs  rx  s   rK   r{   r[  n
  rz  rN   c              3  0   >#    U  H  oTU   4v   M     g 7frG   r[   )rv   rw   rI  s     rK   rx   r  v
  s     8i$is   c              3  :   >#    U  H  oT;   d  M
  TU4v   M     g 7frG   r[   )rv   r  non_reachableus     rK   rx   r  w
  s     Ad=.@fq!fds   	znode_in[:-3]=z != node_out[:-4]=c              3  .   >#    U  H
  nTU   v   M     g 7frG   r[   rv   rJ   name_to_nodes     rK   rx   r  
  s     2	d		s   c                   > TU    $ rG   r[   )r   node_idxs    rK   r{   r[  
  s	    (1+rN   r}   )r  r]   r  r]   r^   r;   r\   )rJ   r]   r^   
str | None)rJ   r]   rr   rk   r^   ztuple[float, str | None])r  )rJ   r]   rD  r   r^   r;   )rS  ri   rT  r   r^   r   )rh  r   r^   r   )Kr$   get_default_op_listr<   r   rE   r=   r  networkxImportErrorr   DiGraphr   rm   rn   rE  r   r  r  r   r5   r4   r   r   r   r   rV  r   floatr   r   r   r   r1   rr   r<  r  r   r   r   r   rX  r   r   rP  rR  r  rQ  minimum_cutNetworkXUnboundedr;   r   handlersowning_moduler  r   r   _get_unique_pathopenwrite	Exceptionrt  ru  rv  rw  _find_infinite_capacity_pathr   
setdefaultrn  visualize_min_cut_graphr   r  r   get_name_to_noder  r   )Mr  rX  r1  rH  joint_module_opsops_ignoreder2  r>  rJ  rJ   
ban_reasonis_non_tensor_nodeweightcannot_save_reasonr  rW  	used_nodeordersfw_usersfirst_unfusible_usevisited
start_nodefusiblestart_orderrK  r$  	cut_value	partitionunbounded_excstructured_tracing_enabledfx_graph_filer5  finf_pathnode_constraintsraw_path_nodesrj  	from_nodeto_noderD  base	from_baseto_baseconstraint_linesrh  constraintscconstraints_strraw_path_strsvg_pathlocal_files_msgtlparse_msg	reachablecutsetnbrs	cut_nodesnode_innode_outrE  r>  rG  r  r  re  r]  rL   r7  r%  r  r  r~  ry  rI  r!  rp  r  sM    ```                                                         @@@@@@@@@@@@@@@@rK   solve_min_cutr    si    <"$H% &
#))&
 

 ' 4
$554
 *
 
 	9;G(A A& 9 9vF&$&$4G&$	!&$ &$P zz|H(2L @ !!77h9...9555!!II&!XXX	 "  !!II%!XXP	 "  $
 		E!D	   d(~> &&(/AB
 .d3
##D))j(: "E}DII'EUtyy SDIIe4Dell)S%S 	 t=./F!% 		e$}6F&G %%%)"%8")8i;;*&F
 6TXX#579J		E!		F"&'9&:;	   dii%/V1CfUJJD		F"		E!(	   g "X 4 ,,,"44I &OO+D++D1 -	&&t,+   "+!0I4L4LT4R   6{Q&:8S[&Q#!)//2D!0066%2248;NN&y$77</$O%%229=/ %2248 5T:! 3 5P 111'1|%++J++J77''
3Z@2G $00<Kg,"w/3'>C  **3/+2CCG)HH."!..s3!..z: 15IID!0066&sD11 4w1G1G1Mt0TU &) g,"" ,Fb!~~h&I	9F  )I}*4,F8i84AdAA 9 ",I#3B<8CR=( ~-?"O  CRL	i  $ $K0L+4[5F5F+GH+Gic4c	+GHH2	28ML %%W  O
	N	
R  V%))*<*<%=" %)#'"00	6
   ++!&tD ,  %   0 --CVLMmS)Q% *)) 	6015M	6 		",,"7"7"I"I("ST -	
 09 68&ZN! /7*	7F%%g. ((1D$//b9@@H&(3D$//b9@@H"9-w1GG(3D$//b9@@H !.i 8I+G4G$//<CC%i[1% /7. +-*:*@*@*B&	; ''"YKq(9:$A$++fQCL9 % +C
 #ii(89O!;;~6L %<H$E!Hk !  3 8E/-3"  %B8*B#OO K)a 
 4 ## $) *6b"#-! !!" 	@AT	
 	 ) 	@AT	
 	 )	6 Is   ` 	`+'`+`0*`0`5 o
`(`##`(5o)m10Ac';cc'm1
c$	 c'#m1$c''
d1c<7m1<dI0m11Aoc                   [        S/5      n[        S/ 4/5      nU(       a  UR                  5       u  p4U R                  U5       H  nXQ;   a  M
  X   U   nUR	                  SS5      nU[
        R                  :X  d  U[        :X  d  MC  UR	                  SS5      nX5U4n	XI/-   n
US:X  a  U
s  $ UR                  U5        UR                  XZ45        M     U(       a  M  g)zBFS from source to sink following only infinite-capacity edges.

Returns a list of (from_node, to_node, reason) tuples representing the path,
or None if no such path exists.
r   rC  r   rD  unknownrM  N)
r$   r   popleft
successorsr   r  r  r<  r  r   )rI  r  queuerJ   	edge_pathneighbor	edge_datarC  rD  new_edgenew_paths              rK   r  r  
  s     ($G <A8R.AQ;RE
--/ ++D1H" x0I }}Z3H488#x7':"x; F3$z1v%#OH%h12 2 %  rN   c                *   U  U 3n[         R                  R                  U5      (       d  U$ Sn[         R                  R                  U  SU U 35      (       a2  US-  n[         R                  R                  U  SU U 35      (       a  M2  U  SU U 3$ )zGet a unique file path, appending a counter if the file already exists.

For example, if "min_cut_failed.svg" exists, returns "min_cut_failed_1.svg".
r&   rK  )ospathexists)	base_name	extensionr  counters       rK   r  r  
  s    
 [$D77>>$G
''..I;ay<
=
=1 ''..I;ay<
=
=['9+..rN   c                   SSK n SSKnUR
                  R                  U 5      R                  5       nUR                  U5      S   nUR                  5        He  nXR                  5          UR                  5          S   nUR                  [        U5      5        U[        S5      :X  d  MT  UR                  S	5        Mg     UR!                  5       R#                  S
5      n[%        SS5      n['        US5       n	U	R)                  U5        SSS5        X4$ ! [         a    [        R	                  SSS9   gf = f! , (       d  f       X4$ = f)z~Visualize the min-cut graph to an SVG file.

Returns (path_to_svg, svg_content) tuple. Both are None if pydot is unavailable.
r   NzMInstall pydot to visualize the min-cut graph for debugging: pip install pydotT)exc_info)NNrC  r  redutf-8min_cut_failed.svgr_  )r  pydotr  r=   r  nx_pydotto_pydot	to_stringgraph_from_dot_data	get_edges
get_sourceget_destination	set_labelr   r  	set_color
create_svgdecoder  r  r  )
rI  ry  r  
dot_format	dot_graphedger  rp  r  r  s
             rK   r  r  
  s3     %%h/99;J))*5a8I##%//+,T-A-A-CDZPs6{#U5\!NN5! & &&(//8K   0&9H	h		 
   3  [ 	 	
 , 
	   s   D 7D8D54D58
Ec                 |   / [         R                  P[         R                  P[         R                  P[         R                  P[         R
                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                   P[         R"                  P[         R$                  P[         R&                  P[         R(                  P[         R*                  P[         R,                  P[         R.                  P[         R0                  P[         R2                  P[         R4                  P[         R6                  P[         R8                  P[         R:                  P[         R<                  P[         R>                  P[         R@                  P[         RB                  P[         RD                  P[         RF                  P[         RH                  P[         RJ                  P[         RL                  P[         RN                  P[         RP                  P[         RR                  P[         RT                  P[         RV                  P[         RX                  P[         RZ                  P[         R\                  P[         R^                  P[         R`                  P[         Rb                  P[         Rd                  P[         Rf                  P[         Rh                  P[         Rj                  P[         Rl                  P[         Rn                  P[         Rp                  P[         Rr                  P[         Rt                  P[         Rv                  P[         Rx                  P[         Rz                  P[         R|                  P[         R~                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[        R                  P[         R                  P[         R                  P[         R                  P[         R                  Pn [         R                  [         R                  [         R                  /nU[         R                  [         R                  [         R                  [        R                  [         R                  [         R                  [         R                  [         R                  [         R                  /	-  nUnU / [        R                  P[        R                  P[         R                  P[         R                  P[         R                  P[        R                  P[        R                  P[         R                  P[         R                  P[        R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[        R                  P[        R                  P-  n U [         R                  [         R                  /-  n X-  n U [        5       -  n U [         R                  /-  n U [         Vs/ s H  n[        U5      PM     sn-  n [        U 5      n[        [        S[        4      " [         R                  [         R                  [         R                  /5      n[         R                  [         R                  [         R                  [         GR                   [         GR                  [         GR                  [         GR                  [         GR                  [         GR
                  [         GR                  [         GR                  /nXE-  nG[        U[        U5      U[        U5      U5      $ s  snf )N.)r   r  r  r  atan2rU  rX  rY  pow	remainderfmod__and____or____xor__
__lshift__
__rshift__eqnegegtleltrN  bitwise_notceilfloorfracnegreluroundsilutruncr=   log10log1plog2lgammaexpexpm1erferfccosacoscoshsinasinsinhtanatantanhatanhsqrtrsqrtrT  sigmoidsoftplus	thresholdthreshold_backwardclampwherelerpaddcmulgelugelu_backwardrm  mean_grad_sum_to_sizesum_to_sizerO  totype_asr   r   squeeze	unsqueezersub_to_copyaliasr   slicetrP  broadcast_in_dimexpand
as_stridedpermuteselectrz  rQ  clone	full_likevarstd_unsafe_viewr   broadcast_tensorsscalar_tensorones	new_zerosr+  arangetriuvar_meanisinfr   fullzerosempty
empty_likeargmaxmaximumiota'_low_memory_max_pool_offsets_to_indicesr  gatherr  
zeros_liker   r   r$   r   r	   native_dropout	rand_like
randn_likemmconvolutionconvolution_backwardbmmaddmm#_scaled_dot_product_flash_attention'_scaled_dot_product_efficient_attention_flash_attention_forward_efficient_attention_forwardupsample_bilinear2d
_scaled_mmr?   )default_recomputable_opsrecomputable_view_opsrD   mrE   rC   rB   rA   s           rK   r  r  
  s   L:L:L: 	L: 	

	L:
 	L: 	L: 	L: 	L: 	L: 			L: 	L: 	L: 	L: 	L: 	L:  	!L:" 	#L:$ 	%L:& 	'L:( 	)L:* 	+L:, 	-L:. 	/L:0 			1L:2 	

3L:4 			5L:6 	7L:8 			9L:: 	

;L:< 			=L:> 	

?L:@ 	AL:B 	

CL:D 	

EL:F 			GL:H 	IL:J 	KL:L 	

ML:N 	OL:P 			QL:R 	SL:T 			UL:V 			WL:X 	YL:Z 			[L:\ 			]L:^ 	_L:` 			aL:b 			cL:d 	

eL:f 			gL:h 	

iL:j 	kL:l 	mL:n 	oL:p 	qL:r 	sL:t 	

uL:v 	

wL:x 			yL:z 	{L:| 			}L:~ 	L:@ 	AL:B 			CL:D 	EL:F 	GL:H 			IL:J 	KL:L 	ML:N 	OL:P 	QL:R 	SL:T 			UL:V 	WL:Z "\\4>>4::F		




 
 %H $!		$!""$! 	

$! 		$!
 	$! 			$! 			$! 	$! 	$! 	$! 	$! 	$! 			$! 	$! 	

$!  	!$!" 	#$!$ 	%$!& 			'$!( 	)$!* 	+$!, 	-$!. 			/$!0 	1$!2 	

3$!4 	5$!6 			7$!8 	9$!: 	

;$!< 	

=$!> 	?$!@ 	A$!B 	C$!D 	

E$!F 	55G$! $L T[[ 99(/!   N1!3A!6 NN!":;HS#X./			dnndoo>J 	!!

0044%%))   #/K()8 + !Os   6d9c                L    0 nU R                    H  nX!UR                  '   M     U$ rG   )r   r   )r   r  rJ   s      rK   r  r    s'    ')L"&TYY rN   c           	        [         R                  nUS:X  a  [        XU5      $ US:X  a  [        XU5      $ US:X  a  [	        XU5      $ US:X  a  [        XU5      $ US:X  aR  [        R                  S5        [        R                  " U UUUS9n[	        UU[        US9R                  [        US	95      $ [        U[        5      (       a  U" XX4U5      u  pS
X4$ [        SU 35      e)Ngreedyilpdpr)   dynamic_memory_budget_dpzdynamic_memory_budget_dp is an experimental solver. It does not guarantee performance improvements. Additionally, it is not guaranteed to be stable.)r  all_recomputable_banned_nodes recorded_knapsack_input_memories recorded_knapsack_input_runtimes)graph_info_provider)knapsack_algomax_mem_budgetrN  z,Not aware of memory budget knapsack solver: )r   activation_memory_budget_solverr*   r+   r(   r)   r=   warningr'   inialize_from_graphr,   get_knee_point_memory_budgetr   r   r   )
r  memoryruntimes
max_memoryrX  ra  SOLVERrd  saved_node_idxrecomp_node_idxs
             rK   #_optimize_runtime_with_given_memoryrq    s    33Fv<<	5Fj99	46Z88	3	3-f
KK	-	-?	

 0CC#*G-3-5	
 $7**)) + 	
 		
 
F0	1	1*08U+
' ^55I&RSSrN   no_dispatchc                   ^ [        U R                  5      nSU4S jjnU Vs/ s H
  oC" U5      PM     nnU R                  5        Vs/ s H
  oC" U5      PM     nnU R                  X%S9$ s  snf s  snf )Nc                   > [        U TS9$ )Nrc  )r    )drd  s    rK   realize_symbol8_remove_symbols_without_guarding.<locals>.realize_symbol  s     X66rN   )stride)rv  ztorch.SymInt | intr^   r   )r  shapery  new_empty_strided)r   rd  rz  rw  r  ry  s    `    rK    _remove_symbols_without_guardingr|    sk    ME7 )..1^AE.)*4AnQF4u44 /4s   A*A/c                  ^ ^^	 [         R                  nSS jnUS:X  a  gUS:X  ac  [        5          SSKJn  [
        R                  " UT R                  T R                  45      u  mm	UR                  UU	U 4S j5      nUsS S S 5        $ US:X  as  SS	K
Jn  [
        R                  " UT R                  T R                  45      u  mm	U" S
S9 nT R                  " T0 T	D6  S S S 5        WR                  5       n[        US5      $ [        U[         5      (       a  U" T 5      $ [#        SU 35      e! , (       d  f       g = f! , (       d  f       Nf= f)Nc                   [        U [        R                  5      (       aC  [        U R                  S   [        R
                  5      (       a  [        U R                  S   SS9$ [        U [        R                  5      (       aC  [        U R                  S   [        R                  5      (       a  [        U R                  S   SS9$ [        U [        R                  5      (       a-  [        U R                  S   [        R                  5      (       a  g[        U [        R                  5      (       a-  [        U R                  S   [        R                  5      (       a  gU $ )Nr   rb  rc        ?T)r   r   r   r   r   rV  r|  r   r    r   r   re  s    rK   materialize_arg)estimate_runtime.<locals>.materialize_arg  s    a!!j&M&M3AFF5MDQQ277##
166%=%,,(O(O$QVVE]TBB277##
166%=%..(Q(Q277##
166%=%--(P(PHrN   testingr&   profiler   )benchmarkerc                 (   > TR                   " T 0 TD6$ rG   )r   )r   r   rJ   s   rK   r{   "estimate_runtime.<locals>.<lambda>  s    4;;3O3OrN   flops)FlopCounterModeF)displayz Not aware of runtime estimator: )r   r	   r^   r	   )r   *activation_memory_budget_runtime_estimatorrs  $torch._inductor.runtime.benchmarkingr  r   tree_mapr   r   benchmark_gputorch.utils.flop_counterr  r   get_total_flopsrX  r   r   r   )
rJ   RUNTIME_MODEr  r  msr  modecounted_flopsr   r   s
   `       @@rK   estimate_runtimer    s   DDL
 y 		"]H!???TYY<TULD&**+OPB ] 
	 <DKK8PQfU+tKK(( , ,,.=!$$	L"8	9	9D!! =l^LMM/ ] ,+s   AD-D>-
D;>
Ec           	       ^ ^^^ ^!^"^#^$^%^&^'^(^)^* US:  d  US:  a  [        SU 35      e[        [        R                  [        R                  [        R
                  [        R                  [        R                  S9n[        R                  (       a  [        USSSSS9nUS:X  a  TR                  $ [        T TU5      u  pEUS:X  a  U$ S-S jm!T!" TR                  5      m(T!" U5      m&T&T(::  a  U$ S.U&U(4S jjnS/U!U&U(4S	 jjm"[        USSSS
9n[        T TU5      u  pT"" U5      U:  a  U$ [        USS9m[        T TT5      u  pT"" U	5      U:  a  U	$ SSKJm#  [        U#4S jTR                   5       5      m%    S0U#U%4S jjnU" U
5      nU Vs/ s H5  nUR                  R!                  SS5      ["        R$                  :X  d  M3  UPM7     nnU Vs/ s H  oU;  d  M
  UPM     nn['        U[(        SS9m [+        T 5      S:X  a  TR                  U-   $ T  Vs/ s H  o" [)        U5      5      PM     snm'T  Vs/ s H  n[-        U5      PM     snm*SSKJm)          S1UU U'U)U*4S jjm$[        R2                  (       Ga  S2U"U$U UU*4S jjnU" S5      U" S5      /nUS   SS  US   SS  :w  a  US   US   4/nU(       a  UR5                  5       u  nnUS   US   -
  S:  a$  UR7                  U5        UR7                  U5        MM  U" US   US   -   S-  5      nUSS  USS  :w  a  UR7                  UU45        USS  USS  :w  a  UR7                  UU45        U(       a  M  UR9                  5         SS KJn  U Vs/ s H  nUS   PM
     nnU Vs/ s H  nUS   PM
     nnUR?                  SS9  URA                  UUSS9  [C        U5       H   u  nnURE                  US UUU   4SSS S!9  M"     URG                  S"5        URI                  S#5        URK                  S$5        URM                  S5        URO                  5       nURQ                  5         [R        RT                  " 5       n[        RV                  b%  [        RV                  n[R        RX                  " USS%9  S&n[Z        R\                  R_                  5       (       aD  [Z        R\                  Ra                  5       (       a!  S'[Z        R\                  Rc                  5        3n[R        Rd                  Rg                  US(U S)[i        5        S*35      nURk                  U5        [l        Ro                  S+U5        T$" UTT S,9S   $ s  snf s  snf s  snf s  snf s  snf s  snf )3Nr&   r   zJThe valid ranges for memory budget are 0 <= m <= 1. The provided value is )r   r   r   r   r   F)r   r   r   r   c                :    [        [        [        U 5      5      S-  $ N    eA)rm  mapro  )r>  s    rK   estimate_activations_size:choose_saved_values_set.<locals>.estimate_activations_size"  s    3x./#55rN   c                   > U S-  TT-
  -  $ r  r[   )szmax_act_sizemin_act_sizes    rK   get_normalized_size4choose_saved_values_set.<locals>.get_normalized_size+  s    S\L899rN   c                &   > T" U 5      T-
  TT-
  -  $ rG   r[   )activationsr  r  r  s    rK   get_mem_ratio.choose_saved_values_set.<locals>.get_mem_ratio.  s"    )+6E<'
 	
rN   )r   r   r   )r   )get_node_storagec              3  4   >#    U  H  nT" U5      v   M     g 7frG   r[   )rv   rJ   r  s     rK   rx   *choose_saved_values_set.<locals>.<genexpr>L  s     TCS4 0 6 6CSrj  c                   > U  Vs/ s H>  nUR                   [        S5      :  d  M  T" U5      T;  d  [        U5      (       d  M<  UPM@     sn$ s  snf r  )r-  r   r  )rG  rF  r  input_storagess     rK   get_recomputable_banned_nodes>choose_saved_values_set.<locals>.get_recomputable_banned_nodesN  sX    
 "
! S)	  %Q'~=03 !
 	
 
s   AAAr   Trr  rr  c                  > T" 5          [        UTT[        U S5      UT5      u  nnnS S S 5        [        5       nW H  n UR                  TU   5        M     UR                  T5      (       d  [        S5      e[        UUTU5      u  p[        (       a+  [        UTWUWT V
s/ s H  n
[        U
5      PM     sn
TTUS9	  UW4$ ! , (       d  f       N= f! [         a     M  f = fs  sn
f )Nr   z:dont_ban must be a subset of all_recomputable_banned_nodes)	r  ra  saved_node_idxsrecomputable_node_idxsexpected_runtimememories_banned_nodes normalized_memories_banned_nodesruntimes_banned_nodesmin_cut_saved_values)rq  rX  r$   r  BaseExceptionissubsetr   r  r<   r   ro  )memory_budgetrX  r  r  r  r  rH  rE  r>  rK  rF  aggressive_optionsra  r  rs  r  s              rK   get_saved_values_knapsack:choose_saved_values_set.<locals>.get_saved_values_knapsacky  s    ]
 4%%M1%-	 &	  )3)C:3?@ *   !>?? L  (	
 ! 4'.K /'=!1)F')FAHQK)F' 2G&;%1 ---] ]$ ! *'s#   C C"C%
C
C"!C"c                F   > T" U TTS9u  pU [        T5      U-
  T" U5      4$ )N)rX  r  )rm  )r  r>  r  r  r  r  rX  r  s      rK   estimate_for_budget4choose_saved_values_set.<locals>.estimate_for_budget  s=    -FYK.*L )*-==l+ rN   rN  r  gMbP?r:  )
      )figsizeo)markerz.4fzoffset points)r   r  center)
textcoordsxytexthazMemory Budgetz Runtime of Recomputed Componentsz:Pareto Frontier of Memory Budget vs. Recomputation Runtime)exist_okr  _rank_memory_budget_paretorK  r  z%Generated Pareto frontier curve at %s)r  rX  r  )r>  ri   r^   r  )r  r  r^   r  )r  ri   r^   r  )rG  rk   r^   ri   )r  r  rX  rg   r  fx.Graphr^   ztuple[list[fx.Node], float])r  r  r^   ztuple[float, float, float])8r   r   r   ban_recompute_used_far_apart!ban_recompute_long_fusible_chains#ban_recompute_materialized_backwardban_recompute_not_in_allowlistban_recompute_reductionsaggressive_recomputationr   rj   r  torch._inductor.fx_utilsr  r$   r   r   r%   rK  r   ro  r   r  torch.utils._mode_utilsrs  visualize_memory_budget_paretor  r   sortmatplotlib.pyplotpyplotfigureplotr  annotatexlabelylabeltitlegridgcfshowr  getcwdmemory_budget_pareto_dirmakedirsr   r  r  is_initializedget_rankr  rt  r2   savefigr=   rh  )+r  rX  r  r1  runtime_optimized_saved_valuesrK  r  more_aggressive_optionsmore_aggressive_saved_values%aggressive_recomputation_saved_valuesrG  r  recomputable_banned_nodesrF  must_save_nodesrJ   r  optionsbisectslhsrhsmidpltitemx_valuesy_valuestxtfigfig_dirrank_suffixfig_namer  ra  r  r  r  r  r  r  r  r  rs  r  s+   ``                             @@@@@@@@@@@@rK   choose_saved_values_setr    s   
 qMA-XYfXgh
 	
 $$AA#)#K#K%+%O%O & E E88O &&!"'',).$)
 (5)%" --6 -Y-=-=>L,-KLL|#--: :
 

 &##(%*	 '4Y 7'#  12]B++  % ;HY 2;7) :;mK449T9CSCSTTN
)
	
 
  !>l K +*A66::k5)-=-G-GG 	
*   -!,a0H,  ! %+!x%! ()Q./112O2OQHQK(2O ,I+H4+H 41.1.)11.@H1.	$1. 1.f ,,,	 	 's+-@-EF1:ab>WQZ^+
GAJ/0G";;=Sq6CF?T)NN3'NN3')3q6CF?a*?@qr7c!"g%NNC:.qr7c!"g%NNC:. ' 	'(/0DG0(/0DG0 	

7
#8C0  )FAsLLs)hqk"*   * 	

?#

56		NOggi
))+**655GKK$/))++0A0A0P0P0R0R"5#4#4#=#=#?"@AK77<<+K=:L:N9OtT
 	H;XF %#yk	 	m
!n 10s0   2U)U)	U.U.U38U8;U=Vc                  ^^ SSK Jm  SS jnSU4S jjn[        R                  R	                  5       (       Gaz  [        R                  R                  5       (       GaV  [        R                  R                  5       S:  Ga3  U" U 5      (       Ga%  U" U 5      (       Ga  [        5          T" 5          U Vs/ s H  oDR                  PM     sn/n[        [        R                  R                  5       5       Vs/ s H  n/ PM     nn[        R                  R                  XuS   5        [        U 5      n/ n	0 n
[        U5       H|  u  pU Vs/ s H  oU   PM	     nnSnU HF  n[        U5      nUU-  nU[        R                  R                  5       :X  d  M7  UU
UR                  '   MH     XS'   U	R                  U5        M~     [        R                   " U	[        R                  R"                  R%                  5       S9n[        R                  R'                  U[        R                  R"                  R(                  R*                  S9  [-        [        R.                  " U5      R1                  5       5      nS	U S
U
 3m[3        SS U4S jS9  UU    Vs/ s H  nUU   PM
     nnS S S 5        S S S 5        U$ U$ s  snf s  snf s  snf s  snf ! , (       d  f       N.= f! , (       d  f       U$ = f)Nr   )unset_fake_temporarilyc                    U R                    HT  n[        UR                  [        R                  R
                  5      (       d  M8  UR                  R                  S;   d  MT    g   g)N>   c10d_functionalr   TF)r   r   r   r   r  r  r   )r  rJ   s     rK   has_collectives3_sync_decision_cross_ranks.<locals>.has_collectives   sM    %%DUZZ22 ++''+RR	 &
 rN   c                  >^ SR                  S U R                   5       5      n[        R                  " UR	                  S5      5      R                  5       n[        [        R                  R                  5       5       Vs/ s H  nS PM     snm[        5          T" 5          [        R                  R                  TU5        S S S 5        S S S 5        [        U4S jT 5       5      $ s  snf ! , (       d  f       N0= f! , (       d  f       N9= f)N/c              3  8   #    U  H  oR                   v   M     g 7frG   r  )rv   r   s     rK   rx   E_sync_decision_cross_ranks.<locals>.has_same_nodes.<locals>.<genexpr>  s     >,=qFF,=r0  r  c              3  4   >#    U  H  nTS    U:H  v   M     g7fr  r[   )rv   r   
all_inputss     rK   rx   r    s     :z!:a=A%zrj  )rt  r   hashlibsha256encode	hexdigestrC  r   r  get_world_sizers  all_gather_objectr  )r  node_strrj   rK  r  r  s       @rK   has_same_nodes2_sync_decision_cross_ranks.<locals>.has_same_nodes  s    
 88>K,=,=>> 89CCE$)%*;*;*J*J*L$MN$Mqd$MN
]24//
FC 5] :z:::	 O44]]s*   C(C>"!C-C>-
C;	7C>>
Dr&   z
total size)r  r1  zpicked_rank_idx=z, saved_nodes of current rank=r  c                     SSS.$ )N)aot_joint_graph_sync_decision_cross_ranksr  r  r[   r[   rN   rK   r{   ,_sync_decision_cross_ranks.<locals>.<lambda>=  s    G (%rN   c                    > T $ rG   r[   )sync_decision_cross_ranks_strs   rK   r{   r  A  s    #@rN   r  )r  torch.fx.Graphr^   r;   )torch._subclasses.fake_tensorr  r   r  r  r  r  rs  r   rC  r  r  r  ro  r  r   rp  distributed_c10d_get_object_coll_device
all_reduceReduceOpMAXr   argminr  r   )r  r>  r  r  r   objectsrK  saved_ops_names_all_ranksr  saved_sizessaved_ops_with_sizesrE  saved_ops_namesop_namesaved_nodes
saved_sizerJ   size_of_nodesaved_sizes_tensorpicked_rank_idxrw   r  r  s                        @@rK   rM  rM    s    E; 	&&((,,..,,.2K((;'']24(45156G!%"3"3"B"B"DE:EqE & : //0IST:V+K8L%'K35 (12K(L$DSTOG4OT
'D#+D>L,.Je//88:::F,TYY7	 (
 6@\2"":. )M "'((99QQS" (("u'8'8'I'I'R'R'V'V )  "%,,/A"B"G"G"IJO.>>OOm  oC  nD  -E) A *C?)S)SAQ)S  Q 5]X <W 6: U:Q 54]]X sb   'K/K
4J6	-K
6J;AK
	K 
9K
C=K
K K
"K6K


K	K
K*c                2   SnU(       a  SOSn[        [        U R                  R                  SS95      5      nU R                  R                  S[        R
                  R                  R                  S9 GH  n[        XR                  S   R                  5      n[        U[        R                  5      (       d  MG  / nUR                  R                  SS9 H  nX8R                  ;   d  M  U R                  R                  U5         U R                  R!                  U SU 35      n	US	-  nUR"                  S
   U	R"                  S
'   U	nUR%                  U	5        SSS5        M     U(       d  M  U R                  R                  U5         U R                  R'                  S[        R
                  R                  R                  / UR                  QUQ70 5      n
UR)                  U
SS9  SSS5        UR"                  R+                  S5      nU(       a9  Uu  p/ UQU Vs/ s H  oR"                  S
   PM     snQ7nUU4W
R"                  S'   U R                  R-                  U5        GM     U $ ! , (       d  f       GM  = f! , (       d  f       N= fs  snf )u  
Graph-safe RNG lets torch.compile use CUDA Graphs for graphs with RNG ops.
For graphs without HOPs, the partitioner adds placeholder nodes
fwd_rng_state_* and bw_rng_state_* to the forward and backward graphs. At
runtime, the AOTDispatcher retrieves these RNG states and passes them to the
compiled graphs.

This works well for no-HOP graphs. With HOPs, the partitioner runs
recursively: it first partitions the HOP (producing forward/backward HOP
subgraphs) and then stitches them back into the outer joint graph. For HOPs
that contain RNG ops, the outer joint graph now includes HOP subgraph
modules with extra RNG placeholders. We must thread these placeholders
through the outer module partitioned forward and backward graphs—this
function does exactly that. It collects the RNG placeholder nodes from the
HOPs and creates corresponding placeholders in the outer forward and
backward graphs.

There is a catch: for a short period, the joint graph is in a “bad” state.
The HOP subgraphs expect additional inputs (because of the new
placeholders), but the outer graph call sites don't yet provide them. We
can't fix this in the joint graph because the joint graph's input signature
is fixed (primals, tangents). As a compromise, we keep the joint graph in
somewhat of a bad state for some time and, once the outer forward and
backward graphs are partitioned, insert the corresponding RNG placeholders
and wire up the calls.
r   r  r  r   r1  r   )r   r   rK  r&   r   NT)propagate_metaeager_input_vals)r3  r  r   r2  r   r   r  invoke_subgraphr   r   r   r   r   r  r   rM  r   r   r   r  r  r   r  )moduler  r  
rng_string
last_inputhop_noder  new_rng_inputsplaceholder_noder  new_hop_node_with_fixed_args
eager_vals
eager_argseager_kwargsinpnew_eager_argss                   rK   rQ  rQ  K  sO   < I$/_Jhv||66-6HIJJLL++599#9#9#I#I ,  6==#3#:#:;h//,.N$,NN$=$=$=$O !6!66  55jA$*LL$<$<)l!I;7%	 "Q	0@0E0Ee0L	u-%.
&--i8 BA	 %P ~\\11(;39<<3K3K'		..>>9(--9.9	40 224T 3  < &]]../AB
/9,J&#&5CD^c((5/^D&N
 '$M0556HI ''1Y\ MI BA <;" Es    AI0A J J0
J 
J	c           	       ^^^^ [        U R                  5      m[        5       mU R                  R                   Hy  nUR                  S:X  a"  SUR
                  ;   a  TR                  U5        O![        U5      (       a  TR                  U5        UT;   d  M^  TR                  UR                  5        M{     [        [        [        U R                  R                  5      5      n[        [        [        U R                  R                  5      5      nXE-   n[        XS9u  pxpTR                  5       nTR                  S U 5       5        [!        U R                  XgU	S5      n[        U4S jUR                   5       5      m[        UU4S jU R                  R                   5       5      n[        U4S j[#        U5       5       5      nS	n0 nU R                  R                   H  nUT;   d  M  UUU'   US
-  nM     [%        UTTUUUU5      $ )Nr   tangentsr  c              3  R   #    U  H  oc  M  UR                   S:w  d  M  Uv   M     g 7f)Nr   r1  )rv   r  s     rK   rx   !classify_nodes.<locals>.<genexpr>  s"      aADDH4D;s   ''	'r   c              3  f   >#    U  H&  nUR                   S :w  d  M  TUR                     v   M(     g7fr  r  r  s     rK   rx   r;    s/      8,D77h 	 TYY,s   11c              3  H   >#    U  H  nUT;  d  M  UT;  d  M  Uv   M     g 7frG   r[   )rv   rJ   rm   r   s     rK   rx   r;    s1      6,D(( 	-19J-J 	,s   
""	"c              3  <   >#    U  H  u  pUT;   d  M  Uv   M     g 7frG   r[   )rv   rF  pr  s      rK   rx   r;    s!      -.da!7T2T.s   	r   r&   )r  r   r$   r   r   r   r  r   r  r  r  r  r5   r4   r;  r  r  r  rg   )r5  r  r6  rJ   r  r	  rj   r7  r8  r9  r:  rn   forward_only_graphro   rr   fw_cntrq   r  rm   r   s    `               @@@rK   rC  rC    s   
 $L$6$67L-7\""((77m#
dkk(A!!$'!$''!!$'$$$$TZZ0 ) 
L,>,>,D,DEFM!&)<l>P>P>V>V"WX3F O CK/ )--/   <F1BI .8 8&,,8 .
 ,6 6 &&,,6 ,O
 #- -.- # FH""(($$#HTNaKF ) # rN   )r  c          	     
  ^ U R                   R                  5         U R                  5         U R                   n[        R                  (       a  [        U5      nX`l         U R                   n[        U 5      n[        U 5      n	U(       a
  [        U SS9n [        R                  (       d  [        U 5        [        U 5        [        U 5        Uc  / n[        XU5      n
[        U
R                  5      S:X  a  [!        U UUUU
R"                  S9$ [%        U R                   R&                  5       H  nUR(                  S:X  a  [+        S5      Ul        M%  U
R/                  U5      (       d	  SUl        MD  [+        S5      Ul        UR0                   H+  n[3        UR,                  UR,                  S-   5      Ul        M-     M     [        R4                  nUR&                   HC  n[7        UR8                  R;                  S	S5      [<        5      (       d  M4  UR8                  S	   n  O   [?        UU
US
9n[        R@                  (       a  [A        X~5      nSS jm[C        [E        U4S jU5      5      n[C        [E        [F        U5      5      n[C        [E        S U5      5      n[I        U UUUUU
R"                  S9u  nnU(       a!  U	(       a  [K        U UU[        U5      5      u  nn[M        U5      n[        RN                  (       a  SSK(J'n  U" UUUU
R"                  5        [S        U5      n[S        U5      n[U        USS9n[U        USS9n[V        (       Ga  [Y        U Vs/ s H  n[[        U5      []        U5      4PM     sn5      n[_        S U 5       5      S-  n[`        Rc                  SU5        [`        Rc                  SU5        [e        S UR                   R&                   5       5      n[e        S UR                   R&                   5       5      nUU-  n[g        [*        5      nUR                   R&                   H\  nURh                  U;   d  M  [k        URl                  S5      (       d  M2  U[]        URl                  Rn                  5      ==   S-  ss'   M^     [`        Rc                  S[        U5      [        U5      [        U5      5        [Y        URq                  5       [r        Rt                  " S5      SS9n[`        Rc                  SU5        UU4$ s  snf )a(  
Partitions the joint graph such that the backward recomputes the forward.
Recomputing helps in trading off memory bandwidth with computation.

To create the fwd and bwd graph, we copy the joint graph, manually set the
outputs to just original forward or backward outputs. And then we run the
resulting graphs through dead code elimination.

.. warning::
    This API is experimental and likely to change.

Args:
    joint_module(fx.GraphModule): The joint forward and backward graph. This
        is the result of AOT Autograd tracing.
    _joint_inputs: The inputs to the joint graph. This is unused.
    compiler: This option determines the default set of recomputable ops.
        Currently, there are two options: ``nvfuser`` and ``inductor``.
    recomputable_ops: This is an optional set of recomputable ops. If this
        is not None, then this set of ops will be used instead of the
        default set of ops.
    num_fwd_outputs: The number of outputs from the forward graph.

Returns:
    Returns the generated forward and backward Fx graph modules.
Fr!  Nr   )r6  r  rr   r   r  r&   r  )r  c                    [        U R                  R                  S5      [        R                  5      =(       a;    [        U R                  5      S:  =(       a    [        S U R                   5       5      $ )Nr   r   c              3     #    U  H>  oR                   [        R                  R                  R                  R
                  L v   M@     g 7frG   )r   r   r   r   rE  r   )rv   r  s     rK   rx   Wmin_cut_rematerialization_partition.<locals>._is_assert_only_symbool.<locals>.<genexpr>G  s+     Ww!HH		 = = E EEws   AA)r   r   r   r   r   r   r  r  rw   s    rK   _is_assert_only_symboolDmin_cut_rematerialization_partition.<locals>._is_assert_only_symboolC  sP    qvvzz%(%--8 XAGGq XWqwwWW	
rN   c                B   > [        U 5      =(       a    T" U 5      (       + $ rG   r5  )rw   rG  s    rK   r{   5min_cut_rematerialization_partition.<locals>.<lambda>L  s    k!nG-DQ-G)GGrN   c                P    [        U 5      (       + =(       a    [        U 5      (       + $ rG   )r   r1   rF  s    rK   r{   rJ  Q  s    [^+EN14E0EErN   r6  r8  r:  Tc              3  8   #    U  H  n[        U5      v   M     g 7frG   )ro  r  s     rK   rx   6min_cut_rematerialization_partition.<locals>.<genexpr>  s     'J\\r0  z'Theoretical Activations Stored: %.2f GBz,Theoretical Per Activation Storage Sizes: %sc              3  \   #    U  H"  oR                   S :X  d  M  UR                  v   M$     g7fr   Nr  r.  s     rK   rx   rM    #      %
"7$77o;UIDII"7r   c              3  \   #    U  H"  oR                   S :X  d  M  UR                  v   M$     g7frO  r  r.  s     rK   rx   rM    rP  r   r  z# remat/fw/bw: %d/%d/%drr  zCount of Ops Rematerialized: %sr   );r   r   r  r   cser8   r   r   r>  r?  r@  rA  rB  rC  r   rm   r\  rr   r  r   r   r   r-  r   r  rY  activation_memory_budgetr   r   r   r  r  rM  r  r  r1   r  rN  rO  r9  rP  r:   rQ  r<   r   ro  r   rm  r=   r  r$   r   r   r   r   r  rn  r   ru  )r5  rR  compilerr6  r  r   	cse_graphr  rV  rW  rX  rJ   r  r  r>  r  r  rZ  r[  r9  rF  sorted_sizestotal_activations_size_gbfw_module_nodesbw_module_nodesremat_nodescountsrematerialized_opsrG  s                               @rK   r=  r=    sc   D **,D zz &	&$$K!5l!C%=l%K"!-lQVW::|,\*|,$,(*%_I 9&&'1, +*G(1(M(M
 	
 ++11277h #CD))$// !D #CD

$'(9(94;L;Lq;P$Q! # 3 33M!!diimmOT:EBB IIo6M " +#L ((1+L
 G	
O
 f^\BCE|TL
 4'-'$-$I$IIy ")#8iC4H$ Iy 4I>I **	
 	%11		
 y)Iy)I.yeLI.ydKIlKlSV4lKL %('J\'J$JS$P!:<UV 	?N$ %
"+//"7"7%
 
 % %
"+//"7"7%
 
 &7!,S!1OO))DyyK'GDKKAR,S,Ss4;;6678A=8 * 	%  		
 $LLN 3 3A 6
 	24FGi= Ls    T
c                "   U(       aZ  [         R                  " U R                  5      n[        R                  " X5      n U R                  R
                   H
  n0 Ul        M     [        R                  R                  U5      u  pU
(       d  S[        R                  -   n
[        R                  SX5        [        R                  " U UUUS9nUR!                  5       n[#        USU
R%                  S5      -   5      nU	 U
 3nUc	  U" U5        g U" XS9  g )Nrw  zWriting FX graph to file: %s%s)parse_stack_tracedot_graph_shapewrite_)prog)r  deepcopyr   r   r  r   r   r  r  splitextr   torch_compile_graph_formatr=   r  r#   FxGraphDrawerget_main_dot_graphr   lstrip)tracedfnamefigname
clear_metara  r^  r_  r  rJ   r  extgr   write_methods                 rK   
draw_graphro    s     MM&,,/	2LL&&DDI '  'IDF555HH-t9""+'		A 	
A1hC89LfSENE|UU&rN   r\   )r   r  r^   r;   )rJ   r]   r^   r   )r   r]   r^   fx.Node | None)rJ   r]   r^   rp  )rJ   r]   r   zdict[fx.Node, Any]r^   rp  )NF)r  r  rj   ri   r  ri   r  zlist[AOTOutput]r  r  r  r;   r^   r  )r5  r  r6  r   r^   zEtuple[list[fx.Node], list[fx.Node], list[AOTOutput], list[AOTOutput]])r>  ri   r   r   r^   r  )rD  z#list[fx.Node] | tuple[fx.Node, ...]r^   r   )g      @r  r   )r   r  rJ   torch.fx.NoderX  r  rY  r  rZ  r   r^   rq  )r   r  rJ   rq  ra  rq  rf  torch.dtyperS  r  re  r  rZ  r   r^   rq  )rp  r  r^   r  )r^   zlist[torch.dtype])rJ   rq  r^   r;   )r^   rr  )r{  rr  r^   ztuple[float, float])r   )r   r  r6  r   r^   r  )r   r  r^   r  )
r  r  r  r  r  r  r6  r   r^   r  )Nr   )r>  ri   r  r  r  r  rr   OrderedSet[fx.Node] | Noner6  r   r^   r  rG   )r5  r  r>  ri   r  ri   r  zlist[fx.Node] | Noner6  r   rr   rs  r  r;   r  r;   r^   %tuple[fx.GraphModule, fx.GraphModule])r5  r  rR  r	   r6  r   r  list[int] | Nonerr   rs  r^   rt  )rn  r   r{  rr  r^   r   )r   r  r^   r  )r^   z!list[torch._ops.OpOverloadPacket])r   ztuple[Any, ...]r  rp   r^   zlist[tuple[fx.Node, int]])r  r  r^   r  )rZ  torch.fx.GraphModuler[  rv  r  rq  r  rq  r  ztorch.devicer  r   r  rq  r  rq  r^   z#tuple[torch.fx.Node, torch.fx.Node])
r5  r  rZ  r  r[  r  r  r   r^   rt  )r5  r  r^   r  )r5  r  r"  r;   r^   r  )
r  r  rX  rg   r1  r   rH  rs  r^   z)tuple[list[fx.Node], OrderedSet[fx.Node]])rI  nx.DiGraph[str, dict[str, Any]]r^   z!list[tuple[str, str, str]] | None)r  r   r  r   r^   r   )rI  rw  r^   ztuple[str | None, str | None])r^   r?   )r   r  r^   r  )r  r  rk  list[float]rl  rx  rm  r  rX  rg   ra  ri   r^   z"tuple[float, list[int], list[int]])r   r  rd  r   r^   r  )rJ   r]   r^   r  )r&   )r  r  rX  rg   r  r  r^   ri   )r  r  r>  list[torch.fx.Node]r^   ry  )r,  r  r  r;   r^   r  )r5  r  r  z	list[int]r6  r   r^   rg   )r  )r5  r  rR  r	   rT  r   r6  r   r  ru  r^   rt  )fx_graphTNFN)rh  rv  ri  r   rj  r   rk  r;   ra  zstr | list[str] | Noner^  r;   r_  r  r^   r  )
__future__r   r  r   r  rP  r  loggingr  r   r  os.pathr  r;  rt  r   r   collections.abcr   dataclassesr   r   typingr	   r
   r   torch._inductor.inductor_primstorch.distributedtorch.fxr   torch.utils._pytreeutils_pytreer   torch._dynamo.utilsr   r   ;torch._functorch._activation_checkpointing.ac_logging_utilsr   $torch._functorch._aot_autograd.utilsr   torch._inductorr   r  !torch._inductor.custom_graph_passr   r   "torch._library.fake_class_registryr   torch._library.utilsr   torch._loggingr   r   torch._logging._internalr   r  r   %torch.fx.experimental._backward_stater   "torch.fx.experimental.proxy_tensorr   r   torch.fx.experimental.sym_noder   r   %torch.fx.experimental.symbolic_shapesr   r   r   r    r!   r"   torch.fx.passesr#   torch.utils._ordered_setr$   torch.utils.checkpointr%   r  -_activation_checkpointing.graph_info_providerr'   "_activation_checkpointing.knapsackr(   r)   r*   r+   ,_activation_checkpointing.knapsack_evaluatorr,   _aot_autograd.descriptorsr-   r.   r/   _aot_autograd.functional_utilsr0   _aot_autograd.graph_compiler1   _aot_autograd.logging_utilsr2   _aot_autograd.utilsr3   r4   r5   r6   r7   compile_utilsr8   r9   r:   r  ry  sympydebug_partitionerr<   rd   	getLoggerr_   r=   r   r   rP  r?   rg   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r!  r$  r   r   r;  r@  rG  rb  rl  rr  r|  r  r  r  r  r  r  r  r  r\  r   r<  r_  ro  rw  cacher  r  rO  r  rN  r@  rA  rB  r  r>  r  r  r  r  r  r  rq  r  rs  r|  r  r  rM  rQ  rC  r=  ro  r[   rN   rK   <module>r     s
   "         	  	  * $ * %  %   $ $ < A 5 @ + 7 . A ? H L  ) / 3  L  L 
 A 7 ;  I H  %66 t 6''1^ 1yy~~		 > > >2      B    I 2-
	 H  $)rrr r #	r
 r "r rjXCBJK5J J69JJJ$; KK
K 
K 
	K
 K K\C!C!
C! C! 	C!
 C! C! C! C!L9"G45	FGRMGh 	XXX *X 	X
 
X~ ?C'
'
'
 '
 "<	'

 '
 
'
\ 04	k" ?C$)&+k" k"k" #k" -	k" k" "<k" "k"  $k" +k"f 7;>Bb  b b  	b 
 $4b  "<b  +b J c(":R  "P
P&8PPK\[*#[*#[* [* 	[*
 [* [* "[* "[* )[*|a  a a  a  	a 
 +a H@'<.K5 5;?55x ,0	v	&v	&v	& #v	& )	v	&
 /v	&r-&B/$!-$!"$!NeP-T-T-T -T 	-T
 -T $1-T (-T` 05*N` z	z	z	 z	 	z	zNN/BNNbOO)-OOd> >#,> > 	>H |  7;|  | |  | 
 |  $4|  +| D #'#"&' '' ' 	'
 !' '  ' 
'rN   