
    3jHq                        S SK r S SKrS SKJs  Jr  S SKJr  S SKJ	r	  SSK
JrJr  SSKJr  \ R                  " \5      rS\R$                  S\R&                  S	\4S
 jrS\R$                  S\R&                  SSS	S4S jrS\R$                  S	\R.                  4S jr\	\\\4      " 5       rS\R.                  S	S4S jrS\R.                  S	\4S jrS\R$                  S\SS4S jrS\R$                  4S jr S r!S r"g)    N)is_symbolic)
OrderedSet   )configir)Vxcomm_buffer_typereturnc                    [        U 5      n[        U[        R                  5      (       a  g[        U[        R                  5      (       a  UR                  5       (       d  gUR                  5       n[        U[        R                  5      (       a  g[        U[        R                  5      (       a  g[        U[        R                  5      (       a  [        UR                  5       5      (       d  gg)zY
Check if an input can be realized as a comm buffer of the specified
`comm_buffer_type`.
TF)	_get_data
isinstancer   LoopsBuffershould_allocateget_output_specCommBufferLayoutFixedLayoutFlexibleLayoutr   	get_numel)r	   r
   datalayouts       W/home/wildlama/miniconda3/lib/python3.13/site-packages/torch/_inductor/comm_lowering.pycan_realize_as_comm_bufferr   7   s     Q<D$!! $		""4+?+?+A+A!!#F&"--..&"..))&"++,,[AQ5R5R    
group_name,torch.distributed.distributed_c10d.GroupNamec                    U R                  5         [        U 5      n[        U[        R                  5      (       d   eUR                  5       n[        U[        R                  5      (       a  g[        U[        R                  [        R                  45      (       d  [        SU S35      e[        UR                  5       5      (       a  [        SU S35      e[        R                  " UUUS9Ul        g)z
Realize an input as a comm buffer of the specified `comm_buffer_type`.

Specifically, this realizes the underlying buffer if it's still unrealized
and changes the layout of the buffer to `ir.CommBufferLayout`.
Nz`A buffer can only be realized as a comm buffer if it has `FlexibleLayout` or `FixedLayout` (got ).zGA buffer with symbolic shape cannot be converted to a comm buffer (got )r   r
   r   )realizer   r   r   r   r   r   r   r   AssertionErrorr   r   r   )r	   r
   r   bufferr   s        r   realize_as_comm_bufferr#   U   s     IIKq\Ffbii((((##%F&"--.. fr00"..ABB::@E
 	

 6##%&&""(-
 	

 '')FMr   c                    [        U R                  [        R                  5      (       aW  U R                  R	                  5       n[        U[        R                  [        R
                  45      (       d   eUR                  $ [        U R                  [        R                  5      (       a  U R                  R                  $ [        SU R                   S35      e)Nz\Expect the data attr of a `TensorBox` to be either an `ir.BaseView` or `ir.StorageBox` (got r   )r   r   r   BaseViewunwrap_view
MutableBox
StorageBoxr!   )r	   nodes     r   r   r   }   s    !&&"++&&vv!!#$bmm <====yy	AFFBMM	*	*vv{{889xrC
 	
r   c                 |    [         R                  [        [        R                  5      U R                  5       45        g)z
If a non-blocking collective is lowered as a blocking collective, the wait
node in the original graph becomes useless and we can skip the lowering it.
N)_bufs_to_skip_waitaddidr   graphget_namer	   s    r   mark_as_skip_waitr1      s%    
 BqwwK67r   c                 b    [        [        R                  5      U R                  5       4[        ;   $ N)r-   r   r.   r/   r+   r0   s    r   should_skip_waitr4      s!    qwwK&*<<<r   inp	reduce_opc                 l   SSK Jn  U R                  5       U R                  5       R                  -  n[
        R                  R                  =(       ac    U" U5      =(       aT    [        U [        R                  R                  5      =(       a)    US:H  =(       a    U[
        R                  R                  :*  $ )Nr   )is_symm_mem_enabled_for_groupsum)#torch.distributed._symmetric_memoryr8   r   	get_dtypeitemsizer   _collectiveauto_selectr   r   CommBufferTypeSYMM_MEM#one_shot_all_reduce_threshold_bytes)r5   r6   r   r8   inp_sizes        r   $_should_lower_as_one_shot_all_reducerC      s    
 R}}!9!99H&& 	O)*5	O&sB,=,=,F,FG	O 	O **NNNr   c           	      B   [        U [        R                  R                  U5        [        R
                  " [        R                  R                  [        R                  R                  [        R                  R                  R                  R                  U UU5      5      $ r3   )r#   r   r?   r@   pytreetree_map	TensorBoxcreateFallbackKerneltorchopssymm_memone_shot_all_reducedefault)r5   r6   r   s      r   _one_shot_all_reducerO      sn    3 1 1 : :JG??

  II22::		
 r   c            	        ^^^^^^^  [         R                  R                  R                    SSKJmJ	mJ
mJmJm  UUU4S jn [         R                  R                  mU " TR                  5      S[        R                  S[        SS	S
[        R                  4UU4S jj5       nU " TR                   5      S[        R                  S[        SS	S
[        R                  4UU4S jj5       nU " TR"                  5      UU4S j5       nU " TR$                  5      U4S j5       nS
[        R&                  4S jmU " TR(                  5      UU4S j5       nU " TR*                  5      U4S j5       nU " TR,                  5      U4S j5       nU " TR.                  5      UU4S j5       nU " TR0                  5      U4S j5       n	U " TR2                  5      U4S j5       n
U " TR4                  5      UU4S j5       nU " TR6                  5      UU4S j5       nU " TR8                  5      U4S j5       nU " [         R                  R:                  R<                  5      U4S j5       nU " TR>                  5      U4S j5       nU " TR@                  5      UU4S j5       nU " TRB                  5      U4S j5       nU " TRD                  5      U4S j5       ng! [         a    [
        R                  S5         gf = f)z,
Register lowerings for the comm subsystem.
zRInductor support for distributed collectives depends on building torch.distributedNr   )add_layout_constraintcloneconstrain_to_fx_stridescopy_register_loweringc                 &   > T" U T5        T" U 5      $ r3    )fnrQ   rS   rU   s    r   register_comm_lowering7register_comm_lowerings.<locals>.register_comm_lowering   s    b"9: $$r   r5   r6   r   r   r   c                   > [        XU5      (       a  [        XU5      $ T" U 5      n [        R                  (       aG  U R	                  5         [
        R                  R                  R                  U R                  5       5        [        R                  R                  U 5      n [        R                  R                  TR                  R                   U UU5        U $ r3   )rC   rO   r    reorder_for_compute_comm_overlapr    r   r.   no_fuse_buffer_namesr,   r/   r   ExternKernelrequire_contiguous_AllReduce_Kernelcreate_inplaceall_reduce_rN   )r5   r6   r   c10drR   s      r   _all_reduce,register_comm_lowerings.<locals>._all_reduce   s     0
KK'
CC Cj22 KKMGG((,,S\\^<oo005 	++$$		
 
r   c                   > [        XU5      (       a   T" U [        XU5      5      n[        U5        U $ [        R                  R                  U 5      n [        R                  R                  TR                  R                  U UU5        U $ r3   )
rC   rO   r1   r   r^   r_   r`   ra   rb   rN   )r5   r6   r   retrc   rT   s       r   _all_reduce_-register_comm_lowerings.<locals>._all_reduce_   s     0
KK$SZ@C c"J oo005
++$$		
 
r   c                    > U  Vs/ s H  nT" U5      PM     n n[         R                  R                  TR                  R                  U UU5        U $ s  snf r3   r   _CollectiveKernelra   all_reduce_coalesced_rN   )inputsr6   r   r5   rc   rR   s       r   _all_reduce_coalesced6register_comm_lowerings.<locals>._all_reduce_coalesced  sS    (./%*/
++&&..		
  0s   Ac                 t   > [         R                  R                  TR                  R                  U UU5        U $ r3   rk   )rn   r6   r   rc   s      r   _all_reduce_coalesced_7register_comm_lowerings.<locals>._all_reduce_coalesced_  s6    
++&&..		
 r   c                     [         R                  R                  " X/UQ76 n[        U[         R                  5      (       d   e[         R
                  R                  U5      $ r3   )r   rl   create_out_of_placer   IRNoderG   rH   )kernelrn   argsr)   s       r   _create_out_of_place5register_comm_lowerings.<locals>._create_out_of_place"  sI    ##77NN$		****||""4((r   c                 B   > T" TR                   R                  U UU5      $ r3   )all_gather_into_tensorrN   )r5   
group_sizer   ry   rc   s      r   _all_gather_into_tensor8register_comm_lowerings.<locals>._all_gather_into_tensor'  s(    #''//	
 	
r   c           	         > [         R                  " [        R                  R                  [        R
                  R                  TR                  R                  U UU5      5      $ r3   )	rE   rF   r   rG   rH   rl   ru    all_gather_into_tensor_coalescedrN   )rn   r}   r   rc   s      r   !_all_gather_into_tensor_coalescedBregister_comm_lowerings.<locals>._all_gather_into_tensor_coalesced0  sM    LL  4455==	
 	
r   c                r   > [         R                  R                  TR                  R                  U UUUS9  U$ N)out)r   rl   ra   all_gather_into_tensor_outrN   )r5   r}   r   r   rc   s       r   _all_gather_into_tensor_out<register_comm_lowerings.<locals>._all_gather_into_tensor_out<  s>    
++++33 	, 	
 
r   c                 D   > T" TR                   R                  U UUU5      $ r3   )reduce_scatter_tensorrN   )r5   r6   r}   r   ry   rc   s       r   _reduce_scatter_tensor7register_comm_lowerings.<locals>._reduce_scatter_tensorG  s+    #&&..
 	
r   c          	      t   > [         R                  R                  TR                  R                  U UUUUS9  U$ r   )r   rl   ra   reduce_scatter_tensor_outrN   )r5   r6   r}   r   r   rc   s        r   _reduce_scatter_tensor_out;register_comm_lowerings.<locals>._reduce_scatter_tensor_outQ  sA    
++**22 	, 	
 
r   c           
         > [         R                  " [        R                  R                  [        R
                  R                  TR                  R                  U UUU5      5      $ r3   )	rE   rF   r   rG   rH   rl   ru   reduce_scatter_tensor_coalescedrN   )rn   r6   r}   r   rc   s       r    _reduce_scatter_tensor_coalescedAregister_comm_lowerings.<locals>._reduce_scatter_tensor_coalesced]  sP    LL  4444<<	
 		
r   c                 D   > T" TR                   R                  U UUU5      $ r3   )all_to_all_singlerN   )r5   output_split_sizesinput_split_sizesr   ry   rc   s       r   _all_to_all_single3register_comm_lowerings.<locals>._all_to_all_singlej  s+    #""**
 	
r   c                    > T" U 5      n [         R                  R                  TR                  R                  XU5        U $ r3   r   rl   ra   
broadcast_rN   )r5   srcr   rc   rR   s      r   
_broadcast+register_comm_lowerings.<locals>._broadcastt  s8    Cj
++OO##Sz	
 
r   c                 r   > [         R                  R                  TR                  R                  XU5        U $ r3   r   )r5   r   r   rc   s      r   _broadcast_,register_comm_lowerings.<locals>._broadcast_|  s/    
++OO##Sz	
 
r   c                 t   > T" [         R                  R                  R                  R                  U UUU5      $ r3   )rJ   rK   _dtensorshard_dim_alltoallrN   )r5   
gather_dim	shard_dimr   ry   s       r   _shard_dim_alltoall4register_comm_lowerings.<locals>._shard_dim_alltoall  s5    #II1199
 	
r   c                    > [        U 5      (       a  U $ [        R                  R                  TR                  R
                  U 5        U $ r3   )r4   r   _WaitKernelcreate_waitwait_tensorrN   )r5   rc   s    r   _wait_tensor-register_comm_lowerings.<locals>._wait_tensor  s8    C  J
""4#3#3#;#;SA
r   c                 ~   > [         R                  R                  U 5      n T" TR                  R                  XX#5      $ r3   )r   r^   r_   isendrN   )r5   dsttagr   ry   rc   s       r   _isend'register_comm_lowerings.<locals>._isend  s0    oo005#DJJ$6$6#RRr   c                    > [         R                  R                  U 5      n [         R                  R	                  TR
                  R                  XX#5        U $ r3   )r   r^   r_   rl   ra   irecvrN   )r5   r   r   r   rc   s       r   _irecv'register_comm_lowerings.<locals>._irecv  sC    oo005
++JJ#	
 
r   c           	        > U Vs/ s H"  n[         R                  R                  U5      PM$     nnTR                  R                  n[
        R                  R                     [         R                  R                  UU UUUU5      u  nnn	n
nS S S 5        W(       a   U SU 35       e[        U W5       HJ  u  pUR                  5         US:X  d  M  [
        R                  R                  UR                  5       5        ML     US   R                  5       n[         R                  " [         R                  " US9UUW	W
5      n/ n[!        [        XW5      5       H  u  nu  pnUS:X  a~  UR"                  R%                  [         R&                  " [         R(                  " US9X_5      5        UR*                  R%                  UR                  5       5        UR%                  U5        M  [         R,                  " [         R                  R/                  U5      U[0        U4/5      nUR%                  [         R2                  R5                  U5      5        M     U$ s  snf ! , (       d  f       GN= f)N r   r   )device)r   r^   r_   batch_p2p_opsrN   r   r.   	fake_moderl   process_kernelzipr    mark_buffer_mutatedr/   
get_deviceMultiOutputLayout	enumeratemutation_outputsappendMutationOutput
NoneLayoutalias_namesMultiOutputtensor_to_layoutlistrG   rH   )op_list	peer_listtag_listtensorsr   trw   example_outputtensor_argsnon_tensor_argsunflatten_argsunbacked_bindingsop
tensor_argr   packedresultsiex_outplaceholderrc   s                       r   _batch_p2p_ops/register_comm_lowerings.<locals>._batch_p2p_ops  s   BIJ'Q2??55a8'J##++WW $$33!  %E2C1D&EE$!';7NB W}++J,?,?,AB 8
 Q**,%%  /
 "+C.,Q"RAvW}''..%%bmm6&BAN ""))!**,7q! !nn((99&AAYK
 r||22;?@ #S a Ks   )I!+I
I()#rJ   rK   _c10d_functional
all_reduceAttributeErrorloginfoloweringrQ   rR   rS   rT   rU   r   rG   strrb   all_reduce_coalescedrm   rv   r|   r   r   r   r   r   r   	broadcastr   r   r   r   r   r   r   )rY   rd   rh   ro   rr   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   ry   rQ   rc   rR   rS   rT   rU   s                      @@@@@@@r   register_comm_loweringsr      s   		""-- % 99%%DDOO,\\ C 
	 -< D,,-\\ C 
	 .0 D556 7 D667 8)ryy )
 D778
 9
 DAAB	
 C	
 D;;< = D667
 8
 D::;	 <	 D@@A

 B

 D223
 4
 DNN+ , DOO, - EII..AAB
 C
 D,,- . DJJ'S (S DJJ' ( D../1 01G   	
 	s   $K K/.K/c                  V  ^^^  [         R                  R                  mTR                    SSKJn   U " TR                  R                  TR                  R                  5        U " TR                  R                  TR                  R                  5        SSKJn  S[        R                   S[        R"                  S	S
S[        R                   4S jmS[        R                   S	[$        S[        R                   4U4S jjmU" TR                  5      S[        R                   S[$        S	[$        4UU4S jj5       nU" TR                  5      S[        R                   S[$        S	[$        S[        R                   4UU4S jj5       nU" TR                  5      S[        R                   S[        R                   S[$        S	[$        4UU4S jj5       nU" TR                  5      S[        R                   S[        R                   S[$        S	[$        S[        R                   4
UU4S jj5       nU" TR&                  5      S[        R                   S[$        S	[$        4UU4S jj5       nU" TR(                  5      S[        R                   S[$        S	[$        S[        R                   4UU4S jj5       nU" TR*                  5      S[        R                   S[$        S	[$        4UU4S jj5       nU" TR,                  5      S[        R                   S[$        S	[$        4UU4S jj5       n	U" TR.                  5      S[        R                   S[$        S	[$        S[        R                   4UU4S jj5       n
U" TR0                  5      S[        R                   S[$        S[2        S	[$        S[        R                   4
UU4S jj5       nU" TR4                  5      S[        R                   S	[$        S[        R                   4UU4S jj5       nU" TR6                  5      S[        R                   S	[$        S[8        S[        R                   4UU4S  jj5       nU" TR:                  5      S[        R                   S[        R                   S![        R                   S"[        R                   S	[$        4
UU4S# jj5       nU" TR<                  5       S+S[        R                   S[        R                   S![        R                   S"[        R                   S	[$        4
UU4S$ jjj5       nU" TR>                  5      S[        R                   S[        R                   S%[        R                   S"[        R                   S	[$        4
UU4S& jj5       nU" TR@                  5       S,S'[        R                   S([        R                   S[2        S	[$        S[$        4
UU4S) jjj5       nU" TRB                  5       S,S([        R                   S	[$        S[$        4UU4S* jjj5       ng! [         a    [
        R                  S5         gf = f)-z@
Register lowerings for symmetric memory (symm_mem) operations.
z7symm_mem ops not available, skipping symm_mem loweringsNr   )register_out_variantr   )rU   r5   r
   r   r   r   c                     U R                  5         [        R                  R                  U R	                  5       U R                  5       U R                  5       U R                  5       S9n[        X1U5        U$ )z
Fallback: insert a Pointwise identity copy allocated in P2P via
CommBufferLayout.  Used when we don't control the input's allocation.
)r   dtypeinner_fnranges)	r    r   	PointwiserH   r   r;   make_loaderget_sizer#   )r5   r
   r   copys       r   _copy_input_to_comm_buffer?register_symm_mem_lowerings.<locals>._copy_input_to_comm_buffer  s]     	||"">>#--/__&<<>	 # 
 	tzBr   c                    > [        U [        R                  R                  5      (       a'  [	        U [        R                  R                  U5        U $ T" U [        R                  R                  U5      $ )a3  
Ensure inp is in P2P memory for a symm_mem collective.

If inductor controls the buffer's allocation (ComputedBuffer,
or any buffer with FlexibleLayout/FixedLayout), switch its
layout to CommBufferLayout in-place, zero-copy.

If inductor does not control allocation (e.g. InputBuffer),
insert a Pointwise identity copy into a new CommBufferLayout buffer.
This adds an extra Triton kernel. Returns the possibly new TensorBox.

TODO(tianrengao): eliminate the extra kernel for static-shape
InputBuffers by pre-allocating P2P memory in the wrapper and DMA .copy_()
)r   r   r?   r@   r#   )r5   r   r   s     r   _maybe_realize_symm_mem<register_symm_mem_lowerings.<locals>._maybe_realize_symm_mem  s]    $ &c2+<+<+E+EFF"3(9(9(B(BJOJ-!!** r   r6   c           	         > T" X5      n [         R                  " [        R                  R                  [        R
                  R	                  TR                  R                  U UU5      5      $ r3   )rE   rF   r   rG   rH   rI   rM   rN   r5   r6   r   r   rL   s      r   _symm_mem_one_shot_all_reduceBregister_symm_mem_lowerings.<locals>._symm_mem_one_shot_all_reduce#  sY     &c6LL$$,,44	
 	
r   r   c           
         > T" X5      n [         R                  " [        R                  R                  [        R
                  R	                  TR                  R                  U UUU5      5      $ r3   )rE   rF   r   rG   rH   rI   one_shot_all_reduce_outrN   r5   r6   r   r   r   rL   s       r   !_symm_mem_one_shot_all_reduce_outFregister_symm_mem_lowerings.<locals>._symm_mem_one_shot_all_reduce_out4  s\     &c6LL$$0088	
 		
r   symm_bufferlocal_inputc           
         > T" X5      n [         R                  " [        R                  R                  [        R
                  R	                  TR                  R                  U UUU5      5      $ r3   )rE   rF   r   rG   rH   rI   one_shot_all_reduce_copyrN   )r  r  r6   r   r   rL   s       r   "_symm_mem_one_shot_all_reduce_copyGregister_symm_mem_lowerings.<locals>._symm_mem_one_shot_all_reduce_copyG  s\     .kFLL$$1199	
 		
r   c                    > T" X5      n [         R                  " [        R                  R                  [        R
                  R	                  TR                  R                  U UUUU5      5      $ r3   )rE   rF   r   rG   rH   rI   one_shot_all_reduce_copy_outrN   )r  r  r6   r   r   r   rL   s        r   &_symm_mem_one_shot_all_reduce_copy_outKregister_symm_mem_lowerings.<locals>._symm_mem_one_shot_all_reduce_copy_outZ  s_     .kFLL$$55==

 
	
r   c                    > T" X5      n [         R                  R                  TR                  R                  U UU5        U $ r3   )r   rI   rH   two_shot_all_reduce_rN   r   s      r   _symm_mem_two_shot_all_reduce_Cregister_symm_mem_lowerings.<locals>._symm_mem_two_shot_all_reduce_o  B     &c6
  ))11		
 
r   outputc           
         > T" X5      n [         R                  " [        R                  R                  [        R
                  R	                  TR                  R                  U UUU5      5      $ r3   )rE   rF   r   rG   rH   rI   two_shot_all_reduce_outrN   )r5   r6   r   r  r   rL   s       r   !_symm_mem_two_shot_all_reduce_outFregister_symm_mem_lowerings.<locals>._symm_mem_two_shot_all_reduce_out~  s\     &c6LL$$0088	
 		
r   c                    > T" X5      n [         R                  R                  TR                  R                  U UU5        U $ r3   )r   rI   rH   multimem_all_reduce_rN   r   s      r   _symm_mem_multimem_all_reduce_Cregister_symm_mem_lowerings.<locals>._symm_mem_multimem_all_reduce_  r  r   c           	         > T" X5      n [         R                  " [        R                  R                  [        R
                  R	                  TR                  R                  U UU5      5      $ r3   )rE   rF   r   rG   rH   rI   multimem_one_shot_all_reducerN   r   s      r   &_symm_mem_multimem_one_shot_all_reduceKregister_symm_mem_lowerings.<locals>._symm_mem_multimem_one_shot_all_reduce  sY     &c6LL$$55==	
 	
r   c           
         > T" X5      n [         R                  " [        R                  R                  [        R
                  R	                  TR                  R                  U UUU5      5      $ r3   )rE   rF   r   rG   rH   rI    multimem_one_shot_all_reduce_outrN   r  s       r   *_symm_mem_multimem_one_shot_all_reduce_outOregister_symm_mem_lowerings.<locals>._symm_mem_multimem_one_shot_all_reduce_out  s\     &c6LL$$99AA	
 		
r   rootc                    > T" X5      n [         R                  " [        R                  R                  [        R
                  R	                  TR                  R                  U UUUU5      5      $ r3   )rE   rF   r   rG   rH   rI   multimem_one_shot_reduce_outrN   )r5   r6   r$  r   r   r   rL   s        r   &_symm_mem_multimem_one_shot_reduce_outKregister_symm_mem_lowerings.<locals>._symm_mem_multimem_one_shot_reduce_out  s_     &c6LL$$55==

 
	
r   c           	         > T" X5      n [         R                  " [        R                  R                  [        R
                  R	                  TR                  R                  U UU5      5      $ r3   )rE   rF   r   rG   rH   rI   multimem_all_gather_outrN   )r5   r   r   r   rL   s      r   !_symm_mem_multimem_all_gather_outFregister_symm_mem_lowerings.<locals>._symm_mem_multimem_all_gather_out  sY     &c6LL$$0088	
 	
r   split_last_dimc           
         > T" X5      n [         R                  " [        R                  R                  [        R
                  R	                  TR                  R                  U UUU5      5      $ r3   )rE   rF   r   rG   rH   rI   reduce_scatter_outrN   )r5   r   r-  r  r   rL   s       r   _symm_mem_reduce_scatter_outAregister_symm_mem_lowerings.<locals>._symm_mem_reduce_scatter_out  s\     &c6LL$$++33	
 		
r   	in_splitsout_splits_offsetsc                    > T" X5      n T" X5      n[         R                  R                  TR                  R                  U UUUU5        g r3   )r   rI   rH   all_to_all_vdevrN   )r5   r   r2  r3  r   r   rL   s        r   _symm_mem_all_to_all_vdev>register_symm_mem_lowerings.<locals>._symm_mem_all_to_all_vdev  sP     &c6%c6
  $$,,	
 r   c           	         > T" X5      n T" X5      n[         R                  R                  TR                  R                  U UUUUU5        g r3   )r   rI   rH   all_to_all_vdev_2drN   )r5   r   r2  r3  r   major_alignr   rL   s         r   _symm_mem_all_to_all_vdev_2dAregister_symm_mem_lowerings.<locals>._symm_mem_all_to_all_vdev_2d  sS     &c6%c6
  ''//	
 r   in_splits_offsetsc                    > T" X5      n T" X5      n[         R                  R                  TR                  R                  U UUUU5        g r3   )r   rI   rH   all_to_all_vdev_2d_offsetrN   )r5   r   r=  r3  r   r   rL   s        r   #_symm_mem_all_to_all_vdev_2d_offsetHregister_symm_mem_lowerings.<locals>._symm_mem_all_to_all_vdev_2d_offset'  sP     &c6%c6
  ..66	
 r   in_tileout_tilec                    > T" X5      n T" X5      n[         R                  R                  TR                  R                  U UUUU5        g r3   )r   rI   rH   tile_reducerN   )rB  rC  r$  r   r6   r   rL   s        r   _symm_mem_tile_reduce:register_symm_mem_lowerings.<locals>._symm_mem_tile_reduce;  sP     *'>*8@
    ((	
 r   c                    > [        U 5       H  u  pVT" Xc5      X'   M     T" X5      n[        R                  R                  TR                  R
                  U UUUU5        g r3   )r   r   rI   rH   multi_root_tile_reducerN   )	in_tilesrC  rootsr   r6   r   rB  r   rL   s	          r    _symm_mem_multi_root_tile_reduceEregister_symm_mem_lowerings.<locals>._symm_mem_multi_root_tile_reduceO  sc     $H-JA1'FHK .*8@
  ++33	
 r   r3   )r9   )"rJ   rK   rL   rM   r   r   r   torch._library._out_variantr   rN   r   r  r  r   rU   r   rG   r?   r   r  r  r  r  r!  r&  intr*  r/  boolr5  r9  r?  rE  rI  )r   rU   r   r  r  r  r  r  r  r  r"  r'  r+  r0  r6  r;  r@  rF  rL  r   r   rL   s                      @@@r   register_symm_mem_loweringsrQ    s"   	99%%
 	$$
 A $$,,((00 ))11--55
 ,\\++ C 
	&\\ 
8 x334
\\

 
 5
  x778
\\

 
 \\	
 9
$ x889
\\
\\
 
 	
 :
$ x<<=
\\
\\
 
 	

 \\
 >
( x445\\  6 x778
\\

 
 	
 9
$ x445\\  6 x<<=
\\

 
 >
  x@@A
\\

 
 \\	
 B
$ x<<=
\\

 
 	

 \\
 >
( x778
\\

 \\
 9
  x223
\\

 
 	
 4
$ x//0\\\\ << LL	
  1& x223 \\\\ << LL	
   4* x99:\\\\ << LL	
  ;& x++, ,,  	
   -& x667 ,, 	
   8]  JKs   &X X('X()#loggingrJ   torch.utils._pytreeutils_pytreerE   torch._inductor.utilsr   torch.utils._ordered_setr    r   r   virtualizedr   	getLogger__name__r   rG   r?   rP  r   r#   rv   r   tuplerO  r   r+   r1   r4   rC   rO   r   rQ  rW   r   r   <module>r]     s,     $ $ - /   !T	||')'8'8	<%	||%''% ?% 
	%P
 
")) 
   c3h02 8 8t 8=		 =d =	 ?"
bll 
ZzKr   