
    3j~                    .   % S SK Jr  S SKrS SKrS SKJrJr  S SKJr  S SK	J
r
JrJrJrJrJr  S SKrS SKJr  S SKJr  S SKJr  S S	KJrJr  \(       a  S S
KJr  S SKJr  \R:                  " \5      r " S S\5      r \!\!\"S4   \S-  4   r#S\$S'    " S S\%5      r&\" SSS9 " S S5      5       r'\" SSS9 " S S\'5      5       r(\'\(-  r)S\$S'   \" SS9 " S S5      5       r*\" SSS9 " S S5      5       r+\" SSS9 " S S 5      5       r,      S<S! jr-    S=S" jr. " S# S$5      r/ " S% S&\5      r0S'S(.S>S) jjr1S* r2 S?       S@S+ jjr3 S?       SAS, jjr4\" SS9 " S- S.5      5       r5SBS/ jr6\SS0.     SCS1 jj5       r7\      SDS2 j5       r7S'S0.     SES3 jjr7SFSGS4 jjr8\ SH     SIS5 jj5       r9\ SH     SJS6 jj5       r9 SF     SKS7 jjr9S'S'S8.           SLS9 jjr:SS'S8.           SMS: jjr;          SNS; jr<g)O    )annotationsN)	dataclassfield)Enum)castLiteraloverloadProtocolTYPE_CHECKING	TypeAlias)fx)_MeshLayout)DTensor)tree_flattentree_unflatten)
DeviceMesh)	Placementc                  .    \ rS rSrSr      SS jrSrg)GetMeshCallback   zGCallback to create/retrieve a DeviceMesh from its cache key components.c                    g N )selfmesh_dim_namesmesh_layouts      ]/home/wildlama/miniconda3/lib/python3.13/site-packages/torch/distributed/pipelining/_utils.py__call__GetMeshCallback.__call__   s         r   N)r   tuple[str, ...]r   _MeshLayout | Nonereturnr   )__name__
__module____qualname____firstlineno____doc__r   __static_attributes__r   r    r   r   r      s&    Q' ( 
	r    r   .r   MeshCacheKeyc                      \ rS rSrSrSrg)PipeliningMetadataError,   z<Raised on metadata mismatches during pipeline communication.r   N)r$   r%   r&   r'   r(   r)   r   r    r   r,   r,   ,   s    Fr    r,   T)frozenslotsc                  j    \ rS rSr% SrS\S'   S\S'   S\S'   S	\S
'   \SS j5       rSS jrSS jr	Sr
g)_TensorMeta0   zTensor metadata for recv buffer allocation and validation.

For plain tensors, these are the tensor's actual attributes.
For DTensors, these are LOCAL shard attributes; global attributes
are stored in :class:`_DTensorMeta`.

torch.Sizeshapetuple[int, ...]strideztorch.dtypedtypeboolrequires_gradc                    [        U [        5      (       a  [        S5      e[        U R                  U R                  5       U R                  U R                  S9$ )zCreate metadata from a plain tensor.

Args:
    tensor: A plain ``torch.Tensor`` (not DTensor).

Returns:
    Metadata capturing shape, stride, dtype, and requires_grad.

Raises:
    TypeError: If ``tensor`` is a DTensor.
zJExpected plain tensor, got DTensor. Use _DTensorMeta.from_dtensor instead.r4   r6   r7   r9   )
isinstancer   r,   r1   r4   r6   r7   r9   tensors    r   from_tensor_TensorMeta.from_tensor>   sP     fg&&)\  ,,==?,, ..	
 	
r    c                R    [        X5      nUR                  U R                  5        U$ )zReconstruct a tensor on ``device`` from this metadata.

Args:
    device: Target device for the tensor.

Returns:
    An empty strided tensor on ``device``.
)_make_tensor_from_metarequires_grad_r9   )r   devicets      r   	to_tensor_TensorMeta.to_tensorV   s'     #40	++,r    c                   X:X  a  / $ / nU R                   UR                   :w  a+  UR                  SU R                    SUR                    35        U R                  UR                  :w  a+  UR                  SU R                   SUR                   35        U R                  UR                  :w  a+  UR                  SU R                   SUR                   35        U$ )zReturn field-by-field differences with ``other``.

Args:
    other: Metadata to compare against.

Returns:
    List of human-readable difference strings (empty if equal).
zshape mismatch:  vs zstride mismatch: zdtype mismatch: )r4   appendr6   r7   r   otherdiffss      r   get_diff_TensorMeta.get_diffc   s     =I::$LL+DJJ<tEKK=IJ;;%,,&LL,T[[Mell^LM::$LL+DJJ<tEKK=IJ r    r   N)r>   torch.Tensorr#   r1   )rD   torch.device | strr#   rP   rL   r1   r#   	list[str])r$   r%   r&   r'   r(   __annotations__staticmethodr?   rF   rN   r)   r   r    r   r1   r1   0   s=     
 
.r    r1   c                      \ rS rSr% Sr\" S S9rS\S'   \" SS9rS	\S
'   \" SS9r	S\S'   \" SS9r
S\S'   \" SS9rS\S'   \SS j5       r\SS j5       rSS jrSS jrSrg)_DTensorMeta{   a  DTensor metadata extending :class:`_TensorMeta` with distribution info.

Inherited fields (shape, stride, etc.) are LOCAL shard attributes.
Additional fields capture global shape and placement information
needed to reconstruct a :class:`DTensor` via ``DTensor.from_local()``.

The :class:`DeviceMesh` is **not** stored (not serializable for P2P);
it is looked up from :class:`_MeshCache` using
``(mesh_dim_names, mesh_layout)`` as the key.
c                 .    [         R                  " / 5      $ r   )torchSizer   r    r   <lambda>_DTensorMeta.<lambda>   s    UZZ^r    )default_factoryr3   global_shaper   )defaultr5   global_strideztuple[Placement, ...]
placementsr!   r   Nr"   r   c                t   U R                   n[        U R                  R                  U R                  R	                  5       U R
                  U R                  U R                  U R	                  5       U R                  R                  UR                  (       a  [        UR                  5      OSUR                  S9	$ )zCreate metadata from a DTensor.

Args:
    dtensor: The DTensor to extract metadata from.

Returns:
    Metadata capturing both local and global attributes.
r   )	r4   r6   r7   r9   r_   ra   rb   r   r   )device_meshrW   _local_tensorr4   r6   r7   r9   _specrb   r   tuple_layout)dtensorrd   s     r   from_dtensor_DTensorMeta.from_dtensor   s     ))''--((//1--!// !..*}}//5@5O5Ok001UW#++
 	
r    c                2    U R                   U R                  4$ )z<Cache key ``(mesh_dim_names, mesh_layout)`` for mesh lookup.)r   r   r   s    r   mesh_cache_key_DTensorMeta.mesh_cache_key   s     ##T%5%566r    c                    [        X5      n[        [        [        R                  " UUU R                  U R
                  U R                  SS9R                  U R                  5      5      $ )zReconstruct a DTensor on ``device`` with placements.

Args:
    device: Target device for the local tensor.
    mesh: The ``DeviceMesh`` to attach.

Returns:
    A DTensor on ``device``.
F)rd   rb   r4   r6   	run_check)	rB   r   r   
from_localrb   r_   ra   rC   r9   )r   rD   meshlocal_tensors       r   
to_dtensor_DTensorMeta.to_dtensor   sb     .d;  ??'')) nT//0

 
	
r    c                B   X:X  a  / $ [         R                  X5      n[        U[        5      (       Ga[  U R                  UR                  :w  a+  UR                  SU R                   SUR                   35        U R                  UR                  :w  a+  UR                  SU R                   SUR                   35        U R                  UR                  :w  a+  UR                  SU R                   SUR                   35        U R                  UR                  :w  a+  UR                  SU R                   SUR                   35        U R                  UR                  :w  a+  UR                  SU R                   SUR                   35        U$ UR                  S5        U$ )zReturn field-by-field differences, including DTensor-specific fields.

Args:
    other: Metadata to compare against.

Returns:
    List of human-readable difference strings (empty if equal).
zglobal_shape mismatch: rI   zglobal_stride mismatch: zplacements mismatch: zmesh_dim_names mismatch: zmesh_layout mismatch: z!type: _DTensorMeta vs _TensorMeta)
r1   rN   r<   rW   r_   rJ   ra   rb   r   r   rK   s      r   rN   _DTensorMeta.get_diff   s    =I
 $$T1 e\**  E$6$66-d.?.?-@UEWEWDXY !!U%8%88.t/A/A.B$uGZGZF[\ %"2"22+DOO+<DAQAQ@RS ""e&:&::/0C0C/DDI]I]H^_ 5#4#44,T-=-=,>d5CTCTBUV  LL<=r    )ri   r   r#   rW   )r#   r*   )rD   rQ   rs   r   r#   r   rR   )r$   r%   r&   r'   r(   r   r_   rT   ra   rb   r   r   rU   rj   propertyrn   ru   rN   r)   r   r    r   rW   rW   {   s    	  %5KLL*L%*2%6M?6 ).)J% 
 ',B&7NO7&+'K#  
 
8 7 7
2*r    rW   
TensorMeta)r/   c                  p    \ rS rSr% SrSrS\S'   SrS\S'   SrS\S'   Sr	S\S	'   SS
 jr
SS jrSS jrSrg)
_StageMetai  zPConsolidated tensor metadata for a pipeline stage's forward and backward passes.Ntuple[TensorMeta, ...] | Noneinputsoutputs$tuple[TensorMeta | None, ...] | Noneinput_gradsoutput_gradsc                ~    [        S U R                  U R                  U R                  U R                  4 5       5      $ )z)Check if any metadata field is populated.c              3  *   #    U  H	  nUS Lv   M     g 7fr   r   ).0vs     r   	<genexpr>%_StageMeta.has_any.<locals>.<genexpr>  s      
U TMUs   )anyr~   r   r   r   rm   s    r   has_any_StageMeta.has_any  s9     
kk4<<1A1A4CTCTU
 
 	
r    c                    U R                   U R                  4 H%  nU(       d  M  [        S U 5       5      (       d  M%    g   g)z3Check if any input/output metadata is DTensor type.c              3  R   #    U  H  o(       d  M  [        U[        5      v   M     g 7fr   )r<   rW   r   ms     r   r   *_StageMeta.has_dtensors.<locals>.<genexpr>  s     M%Q18Z<88%s   
''TF)r~   r   r   )r   metass     r   has_dtensors_StageMeta.has_dtensors  s6    kk4<<0EuM%MMM 1 r    c                H    U R                   SL=(       a    U R                  SL$ )z-Check if forward metadata is fully populated.N)r~   r   rm   s    r   is_complete_for_forward"_StageMeta.is_complete_for_forward  s    {{$&C4<<t+CCr    r   )r#   r8   )r$   r%   r&   r'   r(   r~   rT   r   r   r   r   r   r   r)   r   r    r   r|   r|     sA    Z,0F)0-1G*18<K5<9=L6=
Dr    r|   c                  $    \ rS rSr% SrS\S'   Srg)_StageForwardMetai!  zLForward metadata transmitted from stage *i* to stage *i+1* during inference.tuple[TensorMeta, ...]forward_metasr   Nr$   r%   r&   r'   r(   rT   r)   r   r    r   r   r   !  s    V))r    r   c                  $    \ rS rSr% SrS\S'   Srg)_StageBackwardMetai(  u   Backward metadata transmitted from stage *i* to stage *i-1* during inference.

Gradient placements may differ from forward activations
(e.g., ``Replicate`` → ``Partial``).
tuple[TensorMeta | None, ...]backward_metasr   Nr   r   r    r   r   r   (  s     r    r   c                l    [         R                  " U R                  U R                  U R                  US9$ )zCreate a tensor from metadata.

Args:
    meta: Metadata with shape, stride, and dtype.
    device: Target device for the tensor.

Returns:
    Empty tensor preserving the exact memory layout.
)sizer6   r7   rD   )rZ   empty_stridedr4   r6   r7   )metarD   s     r   rB   rB   5  s/     ZZ{{jj	 r    c                &    [        S U  5       5      $ )zDerive gradient metadata from tensor metadata.

Returns metadata with the same shape/stride/dtype but ``requires_grad=False``.
Entries where the source has ``requires_grad=False`` become ``None``.
c              3     #    U  HB  nUR                   (       a*  [        UR                  UR                  UR                  S S9OSv   MD     g7f)Fr;   N)r9   r1   r4   r6   r7   r   s     r   r   %_derive_grad_metas.<locals>.<genexpr>R  sD       A ?? 	!''!((!''QVW	 s   A
A)rg   )tensor_metass    r   _derive_grad_metasr   J  s        	  r    c                  X    \ rS rSrSrSSS jjrSS jrSS jrSS jrSS jr	SS	 jr
S
rg)
_MeshCacheiZ  zCache for :class:`DeviceMesh` objects keyed by ``(mesh_dim_names, mesh_layout)``.

Assumes all pipeline stages share the same rank tensor (true for
TorchTitan-style frameworks where meshes derive from a common world).
Nc                    0 U l         Xl        g r   _cache_get_mesh_cb)r   get_mesh_cbs     r   __init___MeshCache.__init__a  s    68'r    c                    XR                   ;   a  U R                   U   $ Uu  p#U R                  c  [        SU SU S35      eU R                  X#5      nUc  [        SU SU S35      eX@R                   U'   U$ )zReturn a cached mesh, or create one via the callback.

Args:
    key: Cache key ``(mesh_dim_names, mesh_layout)``.

Returns:
    The ``DeviceMesh``.

Raises:
    PipeliningMetadataError: If not cached and no callback provided.
z+Mesh not found in cache for mesh_dim_names=z, mesh_layout=z`, and no get_mesh callback provided. Provide a get_mesh callback or use DTensors in static mode.z>Mesh lookup failed: callback returned None for mesh_dim_names=z6. Ensure all stages use meshes from the same universe.)r   r   r,   )r   keyr   r   rs   s        r   get_mesh_MeshCache.get_meshe  s     ++;;s##&)#$)=n=M N*m ,NO    =<)""0!1} MGH 
  Cr    c                     X R                   U'   g)zAdd a mesh to the cache.Nr   )r   r   rs   s      r   put_MeshCache.put  s    Cr    c                   U H|  n[        U[        5      (       d  M  UR                  nUR                  (       a  [	        UR                  5      OSnUR
                  nXE4nX`R                  ;  d  Mn  X0R                  U'   M~     g)zJExtract and cache meshes from any :class:`DTensor` instances in *tensors*.r   N)r<   r   rd   r   rg   rh   r   )r   tensorsr>   rs   	dim_namesr   r   s          r   update_from_tensors_MeshCache.update_from_tensors  sf    F&'**)):>:M:ME$"5"56SU	"ll .kk)'+KK$ r    c                    XR                   ;   $ r   r   )r   r   s     r   __contains___MeshCache.__contains__  s    kk!!r    c                ,    [        U R                  5      $ r   )lenr   rm   s    r   __len___MeshCache.__len__  s    4;;r    r   r   )r   zGetMeshCallback | Noner#   None)r   r*   r#   r   )r   r*   rs   r   r#   r   )r   tuple[torch.Tensor | None, ...]r#   r   )r   r*   r#   r8   )r#   int)r$   r%   r&   r'   r(   r   r   r   r   r   r   r)   r   r    r   r   r   Z  s&    ( D 	," r    r   c                  4    \ rS rSrSrSrSr\SS j5       rSr	g)	InferenceModei  a  Pipeline-level metadata inference mode, determined collectively across all PP ranks.

The mode is set by the schedule (not individual stages) because
``has_backward`` is only known at schedule creation time and all
stages must agree to avoid P2P hangs.

.. attribute:: STATIC

    All stages have sufficient metadata; runtime inference is skipped.

.. attribute:: DYNAMIC

    At least one stage requires runtime metadata inference.
staticdynamicc                    UR                  5       (       d  gUR                  5       (       d  gU(       d  gUR                  b  UR                  c  gg)zDetermine whether dynamic metadata inference is needed for a stage.

Args:
    meta: Stage metadata from user-provided args.
    stage_has_backward: Whether a backward pass will be performed.

Returns:
    ``True`` if dynamic inference is needed.
TF)r   r   r   r   )clsr   stage_has_backwards      r   needs_dynamicInferenceMode.needs_dynamic  sS     ++--   "" " #t'8'8'@ r    r   N)r   r|   r   r8   r#   r8   )
r$   r%   r&   r'   r(   STATICDYNAMICclassmethodr   r)   r   r    r   r   r     s%     FG r    r   Fdetachc                  [        U 5      u  p#U(       ah  U Vs/ s HM  n[        U[        R                  5      (       a)  UR	                  5       R                  UR                  5      OUPMO     nn[        XS5      nXe4$ U$ s  snf )a  Flatten ``args`` into a list, optionally detaching tensors.

Args:
    args: Nested arguments to flatten.
    detach: If ``True``, detach tensors while preserving ``requires_grad``.

Returns:
    ``(new_args, flat_detached_args)`` when ``detach=True``;
    ``flat_args`` list otherwise.
)r   r<   rZ   Tensorr   rC   r9   r   )argsr   	flat_argstreespecaflat_detachednew_argss          r   flatten_argsr     s     't,I
 	
  !U\\** HHJ%%aoo6 	 	 
 "-:&&
s   AA?c                    [        U SS9$ )zHFlatten and detach. Deprecated: use ``flatten_args(args, detach=True)``.Tr   )r   )r   s    r   flatten_args_detachr     s    T**r    c                "   0 nUS:X  a  [        U5       H	  nX@-  X4'   M     U$ US:X  aY  X-  S:w  a  [        SU SU  S35      eSn[        U5       H,  nXSU'   US-   U -  S:X  a  M  X@-  S-  S:X  a  US-  nM'  US-  nM.     U$ [        S	U S
35      e)z
Compute the stage id to rank mapping for either a looped or V-style schedule.

Most commonly num_stages == pp_size * 2, but this function can be used to
compute the mapping for any number of stages per rank.
loopr   r   znum_stages z% must be evenly divisible by pp_size z for V schedules      zStyle z is not supported.)range
ValueError)pp_size
num_stagesstylemappingstage_index
rank_indexs         r   generate_stage_to_rank_mappingr     s     G ,K#.#8G  -( N% 
#1$j\)NwiWgh  
 ,K#-K a7*a/&!+q0a
a
 - N 6%(:;<<r    c                    [        XU5      n0 nUR                  5        H!  u  pVXd;  a  / XF'   XF   R                  U5        M#     UR                  5        H  nUR	                  5         M     U$ )a  
Compute the rank to stage id mapping for either a looped or V-style schedule.

This function inverts the stage_to_rank_mapping to get which stages are assigned to each rank.

Returns a dictionary mapping rank -> list of stage indices assigned to that rank.
)r   itemsrJ   valuessort)r   r   r   stage_to_rankrank_to_stagesstage_idrankstagess           r   generate_rank_to_stage_mappingr     sq     37NM ,.N'--/%#%N ##H- 0 !'') * r    c                  8    \ rS rSr% SrS\S'   S\S'   S\S'   S	rg
)PipeInfoi6  z6
Captures information for a pipeline (`Pipe` object).
zfx.Graphgraphr   r   r8   has_loss_and_backwardr   Nr   r   r    r   r   r   6  s     OOr    r   c                    [        U [        5      (       a  [        R                  U 5      $ [        R                  U 5      $ )a  Extract metadata from a tensor.

Handles both plain Tensor and DTensor correctly: DTensors are
dispatched to ``_DTensorMeta.from_dtensor`` which captures local
shard attributes plus global shape/placement info, while plain
tensors use ``_TensorMeta.from_tensor``.

Args:
    tensor: A plain tensor or DTensor.

Returns:
    ``_TensorMeta`` for plain tensors, ``_DTensorMeta`` for DTensors.
)r<   r   rW   rj   r1   r?   r=   s    r   extract_tensor_metar   F  s3     &'""((00&&v..r    )
allow_nonec                   g r   r   r   r   s     r   extract_tensor_metasr  Z  s    
 %(r    c                   g r   r   r  s     r   r  r  b  s    
 ,/r    c                  U c  g/ nSnU  HQ  n[        U[        R                  5      (       a  UR                  [	        U5      5        M>  SnUR                  S5        MS     U(       d  U(       a  [        S5      e[        U5      $ )aj  Extract metadata from a tuple of tensors.

Args:
    tensors: Tuple of tensors (may include ``None`` when ``allow_none=True``).
    allow_none: If ``True``, preserve ``None`` elements (for gradients).

Returns:
    Tuple of ``TensorMeta``, or ``None`` if ``tensors`` is ``None``.

Raises:
    PipeliningMetadataError: If ``None`` found and ``allow_none=False``.
NFTz_None values are not allowed in tensor metadata tuples. Use allow_none=True for optional values.)r<   rZ   r   rJ   r   r,   rg   )r   r   metas_with_nonehas_nonerE   s        r   r  r  j  s~    " /1OHa&&""#6q#9:H""4(  (%7
 	
 !!r    c                    U(       a  U R                  5       OU n[        U[        5      (       a  UR                  5       $ U$ )u  Convert a DTensor to its local shard, or return a plain tensor as-is.

When ``detach=True``, the tensor is detached before conversion —
this applies to both DTensors and plain tensors.

Args:
    tensor: A tensor that may be a DTensor.
    detach: If ``True``, detach before ``to_local()`` to avoid
        redistribution during backward.

Returns:
    The local tensor component.
)r   r<   r   to_local)r>   r   maybe_detached_tensors      r   to_local_if_dtensorr
    s7     06FMMO6'11$--//  r    c                    g r   r   r   r   s     r   validate_and_normalize_to_tupler    s     '*r    c                    g r   r   r  s     r   r  r    s     .1r    c           	        U c  g[        U [        R                  5      (       a  U 4$ [        U [        [        45      (       a  [        U 5       Hd  u  p#Uc  U(       d  [        SU S35      eM   [        U[        R                  5      (       a  MA  [        SU S[        U5      R                   S35      e   [        U [        5      (       a  [        U 5      $ U $ [        S[        U 5      R                   S35      e)a  Normalize ``args`` to a tuple and validate that all elements are tensors.

Args:
    args: A single tensor, tuple/list of tensors, or ``None``.
    allow_none: If ``True``, permit ``None`` elements (for gradients).

Returns:
    Tuple of tensors, or ``None`` if ``args`` is ``None``.

Raises:
    PipeliningMetadataError: On non-tensor values
        (or ``None`` when ``allow_none=False``).
Nz
Stage arg[zF] is None. Stage args must be tensors. Use kwargs for optional values.z] has type zC. All stage args must be tensors. Use kwargs for non-tensor inputs.z<Stage args must be a tensor, tuple, or list of tensors, got .)	r<   rZ   r   rg   list	enumerater,   typer$   )r   r   iargs       r   r  r    s    , |	D%,,	'	'w	D5$-	(	(oFA{!1$QC (V W  c5<<00- ;tCy/A/A.B CX Y  & )t44uT{>$>%J4PT:K^K^J__`a
 	
r    raise_on_mismatchwarn_on_mismatchc               `   [        U[        R                  5      (       a  [        U5      nOUn[	        U5      [	        U5      Lau  S[	        U5      R
                   S[	        U5      R
                   3/nU(       a  [        U  SUS    35      eU(       a#  [        R                  " U  SUS    S3[        SS9  U$ UR                  U5      nU(       a\  U(       a  [        U  SS	R                  U5       35      eU(       a/  [        R                  " U  S
S	R                  U5       S3[        SS9  U$ )a,  
Compare expected metadata against actual tensor or metadata.

This is the unified validation/comparison function that uses get_diff() from
metadata classes. Works with both plain tensors and DTensors.

For plain tensors: compares shape/stride/dtype/requires_grad.
For DTensors: compares all properties including global shape and placements.

Args:
    desc: Description for error/warning messages.
    expected: Expected tensor metadata (_TensorMeta or _DTensorMeta).
    actual: Actual tensor or metadata to compare against.
    raise_on_mismatch: If True, raise PipeliningMetadataError on mismatch.
    warn_on_mismatch: If True, issue a warning on mismatch.

Returns:
    List of differences (empty if metadata matches).

Raises:
    PipeliningMetadataError: If raise_on_mismatch=True and differences exist.
ztype: expected , got : r   z: Metadata type mismatch. z.. Using dynamically inferred metadata instead.r   
stacklevelz; z: Metadata mismatch. )r<   rZ   r   r   r  r$   r,   warningswarnUserWarningrN   join)descexpectedactualr  r  actual_meta	type_diffrM   s           r   validate_metadatar'    s3   > &%,,'')&1 H~T+..d8n556fT+=N=W=W<XY
	 )TF"Yq\N*CDDMM&29Q<. A? @	  k*E)TF"TYYu5E4F*GHHMM&-dii.>-? @? @	 Lr    c          	     :   [        U5      [        U5      :w  aR  U  S[        U5       S[        U5       3nU(       a  [        U5      eU(       a  [        R                  " U[        SS9  U/$ / n[        [        XSS95       H  u  nu  pUc  U	c  M  Ub  U	c]  U  SU SUc  S	OS
 SU	c  S	OS
 3nU(       a  [        U5      eU(       a  [        R                  " U[        SS9  UR                  U5        Ms  [        U  SU S3UU	UUS9n
UR                  U
5        M     U$ )a  Validate metadata for a tuple of tensors element-wise.

Args:
    desc: Description prefix for error/warning messages.
    expected: Tuple of expected metadata (may include ``None`` for grads).
    actual: Tuple of actual tensors or metadata to compare against.
    raise_on_mismatch: If ``True``, raise on the first mismatch.
    warn_on_mismatch: If ``True``, issue warnings for mismatches.

Returns:
    Aggregated list of difference strings.

Raises:
    PipeliningMetadataError: If lengths differ or on mismatch.
z: expected z tensors, got r   r  Tstrict[z]: expected r   metadatar  ]r  )
r   r,   r  r  r   r  ziprJ   r'  extend)r"  r#  r$  r  r  msg	all_diffsr  expactrM   s              r   validate_tensors_metadatar4  -  s6   . 8}F#k#h-s6{mL)#..MM#{q9uI"3x#EF:C;3;;#+&!L3;J(O P!$v*=?  !-c22c;1=S!!fAaSN/-
 	+ G, r    c                   U(       a  SOSnU S3nU S3n[        U5      [        U5      :w  a-  [        SU  SU S[        U5       SU S[        U5       S	35      e[        [        XS
S95       GH   u  nu  pUR                  (       d4  U	b1  [        SU  SU SU SU SU S[        U	5      R                   S35      eUR                  (       a-  U	c*  [        R                  " SU  SU SU SU SU S3[        SS9  [        U[        5      (       d  M  UR                  (       d  M  U	c  M  [        U	[        5      (       a  M  [        SU  SU SU SU SU S[        U	5      R                   S35      e   g)u  
Validate the args↔grads contract for static mode.

Enforces four rules for each (arg, grad) pair:
  1. len(args) must equal len(grads).
  2. If arg.requires_grad is False, grad must be None.
  3. If arg.requires_grad is True and grad is None, emit a warning
     (this is legal at pipeline boundaries but may indicate a bug).
  4. If arg is a DTensor with requires_grad=True and grad is not None,
     grad must also be a DTensor.

Args:
    stage_index: The stage index for error messages.
    args: Tuple of forward tensors.
    grads: Tuple of gradient tensors (can include None).
    is_input: True for input_args/input_grads, False for output_args/output_grads.

Raises:
    PipeliningMetadataError: If any hard rule (1, 2, or 4) is violated.
inputoutput_args_gradszStage r  z	 length (z) does not match zo). Each forward tensor must have a corresponding gradient entry (use None for tensors that don't require grad).Tr)  Nr+  z] has requires_grad=False, but z] is not None (zE). Non-differentiable tensors must have None as their gradient entry.z] has requires_grad=True, but zT] is None. This is legal at pipeline boundaries but may indicate a missing gradient.r   r  z,] is a DTensor with requires_grad=True, but z] is za, expected DTensor or None. DTensor gradients may have different placements than forward tensors.)r   r,   r  r.  r9   r  r$   r  r  r   r<   r   )
r   r   gradsis_inputkind	args_name
grads_namer  r  grads
             r   'validate_static_arg_grad_correspondencer@  f  s   4 7HD&I6J 4yCJ%[MJ<yUDUk3t9+ .[\
 	
 $CD$AB;C  T%5)R	{!A3 7!l!A3od4j6I6I5J KUV  MMR	{!A3 7!l!A3 '78  sG$$!!! tW--)R	{!A3 7!l!A3eDJ,?,?+@ AXY 5 Cr    )r   r1   rD   rQ   r#   rP   )r   r   r#   ztuple[_TensorMeta | None, ...])r   r8   )r   )r   r   r   r   r   strr#   zdict[int, int])r   r   r   r   r   rA  r#   zdict[int, list[int]])r>   rP   r#   rz   )r   tuple[torch.Tensor, ...] | Noner   Literal[False]r#   r}   )r   &tuple[torch.Tensor | None, ...] | Noner   Literal[True]r#   r   )r   Atuple[torch.Tensor | None, ...] | tuple[torch.Tensor, ...] | Noner   r8   r#   r   )F)r>   rP   r   r8   r#   rP   ).)r   zCtorch.Tensor | tuple[torch.Tensor, ...] | list[torch.Tensor] | Noner   rC  r#   rB  )r   zQtorch.Tensor | tuple[torch.Tensor | None, ...] | list[torch.Tensor | None] | Noner   rE  r#   rD  )r   ztorch.Tensor | tuple[torch.Tensor, ...] | tuple[torch.Tensor | None, ...] | list[torch.Tensor] | list[torch.Tensor | None] | Noner   r8   r#   rF  )r"  rA  r#  rz   r$  ztorch.Tensor | TensorMetar  r8   r  r8   r#   rS   )r"  rA  r#  r   r$  z,tuple[torch.Tensor | TensorMeta | None, ...]r  r8   r  r8   r#   rS   )
r   r   r   ztuple[torch.Tensor, ...]r:  r   r;  r8   r#   r   )=
__future__r   loggingr  dataclassesr   r   enumr   typingr   r   r	   r
   r   r   rZ   r   torch.distributed._mesh_layoutr   torch.distributed.tensorr   torch.utils._pytreer   r   torch.distributed.device_meshr   (torch.distributed.tensor.placement_typesr   	getLoggerr$   loggerr   rg   rA  r*   rT   RuntimeErrorr,   r1   rW   rz   r|   r   r   rB   r   r   r   r   r   r   r   r   r   r  r
  r  r'  r4  r@  r   r    r   <module>rT     s   #   (  N N   6 , < 8B 
		8	$h   c3ht1C CDi DGl G $d#G G $GT $d#@; @ $@H $l2
I 2
 D D D6 $d#* * $* $d#	 	 $	
 *(# @  @ P0D 0p */ 6+ 17!*-F 17!*-4      /( 
 "%(,( ( #	( 
( 
/3/ / *	/ 
/ !"N!" !" *	!"H!( 
 "%*
M** %* 
* 
 !$11
 1 ,1 
1  -
-
 -
 G-
t $"B
BB &B
 B B BT #"6
6+6 96
 6 6 6rDD
"D +D 	D
 
Dr    