
    3jV                       % S SK Jr  S SKrS SKJr  S SKJrJrJr  S SKJ	r	J
r
Jr  S SKrS SKJr  \(       a  S SKJr  SS	KJr  / S
Qr\" S5      r\	" S5      r\" \R,                  S5      (       d]  \" S5      \R,                  R.                  S'   \" S5      \R,                  R.                  S'   \" S5      \R,                  R.                  S'   S SKJrJrJr  SS jrSS jr " S S\5      r " S S5      r\R@                  RB                  \S\"4   -  r#S\$S'   \   S            S!S jj5       r%\   S            S"S jj5       r%   S            S#S jjr%g)$    )annotationsN)Callable)overloadTYPE_CHECKING	TypeAlias)	ParamSpecSelfTypeVar)Tensor)_POOL_HANDLE   )_dummy_type)is_current_stream_capturinggraph_pool_handleXPUGraphgraphmake_graphed_callables_R_P_XpuStreamBase	_XPUGraph_xpu_graph_pool_handle_xpu_isCurrentStreamCapturing)r   r   r   c                     [        5       $ )zReturn True if XPU graph capture is underway on the current XPU stream, False otherwise.

If a XPU context does not exist on the current device, returns False without initializing the context.
)r        J/home/wildlama/miniconda3/lib/python3.13/site-packages/torch/xpu/graphs.pyr   r   (   s    
 )**r   c                 P    [         R                  R                  [        5       5      $ )zBReturn an opaque token representing the id of a graph memory pool.)torchxpur   r   r   r   r   r   r   0   s    99!!"8":;;r   c                     ^  \ rS rSrSrSSU 4S jjjrSSU 4S jjjrSU 4S jjrSU 4S jjrSU 4S jjr	SU 4S jjr
SU 4S	 jjrSU 4S
 jjrSU 4S jjrSU 4S jjrSU 4S jjrSrU =r$ )r   5   a  Wrapper around a XPU graph.

Arguments:
    keep_graph (bool, optional): If ``keep_graph=False``, the
        executable command graph will be instantiated on GPU at the end of
        ``capture_end`` and the underlying modifiable command graph will be
        destroyed. Note that the executable command graph will not be
        instantiated at the end of ``capture_end`` in this
        case. Instead, it will be instantiated via an explicit called
        to ``instantiate`` or automatically on the first call to
        ``replay`` if ``instantiate`` was not already called. Calling
        ``instantiate`` manually before ``replay`` is recommended to
        prevent increased latency on the first call to ``replay``.

c                "   > [         TU ]  X5      $ N)super__new__)cls
keep_graph	__class__s     r   r&   XPUGraph.__new__F   s    ws//r   c                    > [         TU ]  US9  g)a  Begin capturing XPU work on the current xpu stream.

Typically, you shouldn't call ``capture_begin`` yourself.
Use :class:`~torch.xpu.graph`, which call ``capture_begin`` internally.

Arguments:
    pool (optional): Token (returned by :func:`~torch.xpu.graph_pool_handle` or
        :meth:`other_Graph_instance.pool()<torch.xpu.XPUGraph.pool>`) that hints this graph may share memory
        with the indicated pool.
poolN)r%   capture_begin)selfr-   r)   s     r   r.   XPUGraph.capture_beginI   s     	4(r   c                "   > [         TU ]  5         g)zEnd XPU graph capture on the current stream.

After ``capture_end``, ``replay`` may be called on this instance.

Typically, you shouldn't call ``capture_end`` yourself.
Use :class:`~torch.xpu.graph`, which call ``capture_end`` internally.
N)r%   capture_endr/   r)   s    r   r2   XPUGraph.capture_endV   s     	r   c                "   > [         TU ]  5         g)a  Instantiate the XPU graph. Will be called by
``capture_end`` if ``keep_graph=False``, or by ``replay`` if
``keep_graph=True`` and ``instantiate`` has not already been
explicitly called. Does not destroy the xpu modify command graph returned
by ``raw_xpu_graph``.
N)r%   instantiater3   s    r   r6   XPUGraph.instantiate`   s     	r   c                "   > [         TU ]  5         g)z+Replay the XPU work captured by this graph.N)r%   replayr3   s    r   r9   XPUGraph.replayi   s    r   c                "   > [         TU ]  5         g)z1Delete the graph currently held by this instance.N)r%   resetr3   s    r   r<   XPUGraph.resetm   s    r   c                    > [         TU ]  5       $ )zReturn an opaque token representing the id of this graph's memory pool.

This id can optionally be passed to another graph's ``capture_begin``,
which hints the other graph may share the same memory pool.
)r%   r-   r3   s    r   r-   XPUGraph.poolq   s     w|~r   c                    > [         TU ]  5       $ )z.Enable debugging mode for XPUGraph.debug_dump.)r%   enable_debug_moder3   s    r   rA   XPUGraph.enable_debug_modey   s    w(**r   c                "   > [         TU ]  U5      $ )z
Arguments:
    debug_path (required): Path to dump the graph to.

Calls a debugging function to dump the graph if the debugging is
enabled via XPUGraph.enable_debug_mode()
)r%   
debug_dump)r/   
debug_pathr)   s     r   rD   XPUGraph.debug_dump}   s     w!*--r   c                    > [         TU ]  5       $ )zuReturns the underlying xpuGraph_t. ``keep_graph`` must be True.

XPU doesn't provide APIs to manipulate this object.
)r%   raw_xpu_graphr3   s    r   rH   XPUGraph.raw_xpu_graph   s    
 w$&&r   c                    > [         TU ]  5       $ )a  Returns the underlying xpuGraphExec_t. ``instantiate`` must have been called if ``keep_graph`` is True, or ``capture_end`` must have been called if ``keep_graph`` is False. If you call ``instantiate()`` after ``raw_xpu_graph_exec()``, the previously returned xpuGraphExec_t will be destroyed. It is your responsibility not to use this object after destruction.

XPU doesn't provide APIs to manipulate this object.
)r%   raw_xpu_graph_execr3   s    r   rK   XPUGraph.raw_xpu_graph_exec   s    
 w)++r   r   )F)r(   boolreturnr	   r$   )r-   _POOL_HANDLE | NonerN   NonerN   rP   rN   r   )rE   strrN   rP   )rN   int)__name__
__module____qualname____firstlineno____doc__r&   r.   r2   r6   r9   r<   r-   rA   rD   rH   rK   __static_attributes____classcell__)r)   s   @r   r   r   5   sN     0 0) )+.', ,r   r   c                  X    \ rS rSr% SrSrS\S'     S
     SS jjrSS jrSS jr	S	r
g)r      a^  Context-manager that captures XPU work into a :class:`torch.xpu.XPUGraph` object for later replay.

Arguments:
    xpu_graph (torch.xpu.XPUGraph): Graph object used for capture.
    pool (optional): Opaque token (returned by a call to :func:`~torch.xpu.graph_pool_handle()` or
        :meth:`other_Graph_instance.pool()<torch.xpu.XPUGraph.pool>`) hinting this graph's capture
        may share memory from the specified pool.
    stream (torch.xpu.Stream, optional): If supplied, will be set as the current stream in the context.
        If not supplied, ``graph`` sets its own internal side stream as the current stream in the context.

.. note::
    For effective memory sharing, if you pass a ``pool`` used by a previous capture and the previous capture
    used an explicit ``stream`` argument, you should pass the same ``stream`` argument to this capture.

Ntorch.xpu.Stream | Nonedefault_capture_streamc                D   U R                   R                  c-  [        R                  R	                  5       U R                   l        Uc  SOU4U l        Ub  UOU R                   R                  U l        U R                  c  [        S5      eU R                  U l        Xl	        g )Nr   zcapture_stream must not be None)
r)   r_   r   r    Streamr-   capture_streamAssertionError
stream_ctx	xpu_graph)r/   re   r-   streams       r   __init__graph.__init__   s     >>00849II4D4D4FDNN1;?<RdW	(Fdnn.S.S 	 & !BCC--"r   c                    [         R                  R                  5         [         R                  R                  5         U R                  R                  5         U R                  R                  " U R                  6   g r$   )	r   r    synchronizeempty_cacherd   	__enter__re   r.   r-   )r/   s    r   rl   graph.__enter__   sH    				!!#$$dii0r   c                j    U R                   R                  5         U R                  R                  " U6   g r$   )re   r2   rd   __exit__)r/   argss     r   ro   graph.__exit__   s$    ""$  $'r   )rb   r-   rd   re   )NN)re   r   r-   rO   rf   r^   rQ   )rp   objectrN   rP   )rU   rV   rW   rX   rY   r_   __annotations__rg   rl   ro   rZ   r   r   r   r   r      sH      7;3:
 %)*.	## "# (	#*1(r   r   .r   _ModuleOrCallablec                    g r$   r   	callablessample_argsnum_warmup_itersallow_unused_inputr-   s        r   r   r      s     r   c                    g r$   r   rv   s        r   r   r      s     %(r   c                4   [         R                  " 5       (       a%  [         R                  " 5       (       a  [        S5      eSn[	        U [
        5      (       d+  SnU 4n [        R                  " [
        [        S4   U5      4nO-[        R                  " [
        [
        [        S4   S4   U5      n/ n[        X5       GH  u  p[	        U[         R                  R                  5      (       a  [        UR                  5      S:X  a2  [        UR                  5      S:X  a  [        UR                  5      S:X  d  [        S5      e[!        S UR#                  5        5       5      (       d  [        S5      e[         R$                  R&                  R(                  " U	6 n
UR+                  [        U
5      5        [!        S	 U
 5       5      (       a  GM  [-        S
5      e   U V	s/ s H  n	[        U	5      PM     nn	U  Vs/ s HG  n[	        U[         R                  R                  5      (       a  [        UR/                  5       5      OSPMI     nn[1        [        U 5      5       Vs/ s H  nX}   X   -   PM     nn[1        [        U 5      5       Vs/ s H!  n[         R2                  R5                  5       PM#     nn[1        [        U 5      5       Vs/ s H!  n[         R2                  R5                  5       PM#     nnUc
  [7        5       OUn[         R2                  R9                  5         [         R2                  R;                  [         R2                  R=                  5       5         [        XU5       H  u  nn	nSu  nnn[1        U5       H  n[         R$                  R&                  R?                  U" U	6 5      n[        S U 5       5      n[        U5      S:  d  MR  [         R@                  RC                  U[        S U 5       5      [        S U 5       5      SUS9nM     UUU4 H  nAM     M     SSS5        [         R2                  R9                  5         / n/ n[        XU5       H  u  nn	n[         R2                  RE                  UUS9   U" U	6 nSSS5        [         R$                  R&                  RG                  W5      u  nnUR+                  [        U5      5        UR+                  U5        M     / n/ n [        [I        U5      [I        U5      [I        U5      5       GH  u  nn!n"[        S U! 5       5      n#[        S U! 5       5      nSn[        U5      S:  ah  [         R2                  RE                  U"US9   [         R@                  RC                  U[        S U 5       5      [        S U# 5       5      SUS9nSSS5        / n$Sn%U HC  n&U&RJ                  (       a  Ub  U$R+                  UU%   5        U%S-  n%M2  U$R+                  S5        ME     [        U$5      n$UR+                  U#5        U R+                  U$5        GM"     URM                  5         U RM                  5                             SS jn'/ n([O        U 5       H  u  nnU'" UU   UU   X   X   UU   X   UU   UU   U U   5	      n)[	        U[         R                  R                  5      (       aF            SS jn*U*" UURP                  U)URR                  5      Ul)        U(R+                  U5        M  U(R+                  U)5        M     U(       a  U(S   $ [        U(5      $ s  sn	f s  snf s  snf s  snf s  snf ! , (       d  f       GNC= f! , (       d  f       GN= f! , (       d  f       GN= f)a  Accept callables (functions or :class:`nn.Module<torch.nn.Module>`\ s) and returns graphed versions.

Each graphed callable's forward pass runs its source callable's
forward XPU work as a XPU graph inside a single autograd node.

The graphed callable's forward pass also appends
a backward node to the autograd graph. During backward, this node runs the
callable's backward work as a XPU graph.

Therefore, each graphed callable should be a drop-in replacement for its source callable
in an autograd-enabled training loop.

See :ref:`Partial-network capture<partial-network-capture>` for detailed use and constraints.

If you pass a tuple of several callables, their captures will use the same memory pool.

Arguments:
    callables (torch.nn.Module or Python function, or tuple of these): Callable or callables to graph.
        If you pass a tuple of callables, their order in the tuple must be the same order they'll run
        in the live workload.
    sample_args (tuple of Tensors, or tuple of tuples of Tensors): Samples args for each callable.
        If a single callable was passed, ``sample_args`` must be a single tuple of argument Tensors.
        If a tuple of callables was passed, ``sample_args`` must be tuple of tuples of argument Tensors.
    num_warmup_iters (int): The number of warmup iterations. Currently, ``DataDistributedParallel`` needs
        11 iterations for warm up. Default: ``3``.
    allow_unused_input (bool): If False, specifying inputs that were not used when computing outputs
        (and therefore their grad is always zero) is an error. Defaults to False.
    pool (optional): Token (returned by :func:`~torch.xpu.graph_pool_handle` or
        :meth:`other_Graph_instance.pool()<torch.xpu.XPUGraph.pool>`) that hints this graph may share memory
        with the indicated pool.
.. note::
    The ``requires_grad`` state of each Tensor in ``sample_args`` must match the state
    that's expected for the corresponding real input in the training loop.

.. warning::
    This API is in beta and may change in future releases.

.. warning::
    ``sample_args`` for each callable must contain only Tensors. Other types are not allowed.

.. warning::
    Returned callables do not support higher order differentiation (e.g., double backward).

.. warning::
    In any :class:`~torch.nn.Module` passed to :func:`~make_graphed_callables`, only parameters
    may be trainable. Buffers must have ``requires_grad=False``.

.. warning::
    After you pass a :class:`torch.nn.Module` through :func:`~make_graphed_callables`,
    you may not add or remove any of that Module's parameters or buffers.

.. warning::
    :class:`torch.nn.Module`\s passed to :func:`~torch.xpu.make_graphed_callables` must not have module hooks
    registered on them at the time they are passed. However, registering hooks on modules *after* passing them
    through :func:`~torch.xpu.make_graphed_callables` is allowed.

.. warning::
    When running a graphed callable, you must pass its arguments in the same order and format
    they appeared in that callable's ``sample_args``.

.. warning::
    The automatic mixed precision is supported in :func:`~torch.xpu.make_graphed_callables` only with disabled
    caching. The context manager `torch.amp.autocast()` must have `cache_enabled=False`.
z_make_graphed_callables does not support the autocast caching. Please set `cache_enabled=False`.FT.r   zModules must not have hooks registered at the time they are passed. However, registering hooks on modules after passing them through make_graphed_callables is allowed.c              3  <   #    U  H  oR                   S L v   M     g7f)FNrequires_grad.0bs     r   	<genexpr>)make_graphed_callables.<locals>.<genexpr>F  s     EA%/s   zIn any :class:`~torch.nn.Module` passed to :func:`~make_graphed_callables`, only parameters may be trainable. All buffers must have ``requires_grad=False``.c              3  V   #    U  H  n[        U[        R                  5      v   M!     g 7fr$   )
isinstancer   r   )r   args     r   r   r   N  s     HKS:c5<<00Ks   ')zfIn the beta API, sample_args for each callable must contain only Tensors. Other types are not allowed.r   N)NNNc              3  J   #    U  H  oR                   (       d  M  Uv   M     g 7fr$   r~   r   os     r   r   r   n  s     $K1??QQ   #	#c              3  J   #    U  H  oR                   (       d  M  Uv   M     g 7fr$   r~   r   is     r   r   r   r  s      %';!AA';r   c              3  r   #    U  H-  oR                   (       d  M  [        R                  " U5      v   M/     g 7fr$   r   r   
empty_liker   s     r   r   r   u  s(      +9@AOO/E,,Q//s   77)outputsinputsgrad_outputsonly_inputsallow_unusedr,   c              3  r   #    U  H-  oR                   (       a  [        R                  " U5      OS v   M/     g 7fr$   r   r   s     r   r   r     s'      $
FT??EQ<ns   57c              3  J   #    U  H  oR                   (       d  M  Uv   M     g 7fr$   r~   r   s     r   r   r     s     J1//QQr   c              3  J   #    U  H  oR                   (       d  M  Uv   M     g 7fr$   r~   r   s     r   r   r     s      T,@qOO,@r   c              3  .   #    U  H  oc  M  Uv   M     g 7fr$   r   r   s     r   r   r     s     &W2EQqq2Es   	   c	           	        ^ ^^^^^^^^^
  " UU UUUUU4S jS[         R                  R                  5      m
SU
UU4S jjn	U	$ )Nc                     > \ rS rSr\SUUUU4S jj5       r\\R                  R                  R                  SU UU4S jj5       5       r
Srg)Omake_graphed_callables.<locals>.make_graphed_autograd_function.<locals>.Graphedi  c                *  > [        T5       HB  nTU   R                  5       X   R                  5       :w  d  M,  TU   R                  X   5        MD     TR                  5         [	        T[
        5      (       d  [        S5      e[        S T 5       5      $ )Nzstatic_outputs must be a tuplec              3  @   #    U  H  oR                  5       v   M     g 7fr$   detachr   s     r   r   jmake_graphed_callables.<locals>.make_graphed_autograd_function.<locals>.Graphed.forward.<locals>.<genexpr>  s     @AXXZZs   )rangedata_ptrcopy_r9   r   tupleRuntimeError)ctxr   r   	fwd_graphlen_user_argsstatic_input_surfacestatic_outputss      r   forwardWmake_graphed_callables.<locals>.make_graphed_autograd_function.<locals>.Graphed.forward  s     }-A+A.779VY=O=O=QQ,Q/55fi@ .   "!.%88&'GHH@@@@r   c                  > [        U5      [        T5      :w  a#  [        S[        T5       S[        U5       35      e[        TU5       H?  u  p#Uc  M
  UR                  5       UR                  5       :w  d  M.  UR	                  U5        MA     TR                  5         [        T[        5      (       d  [        S5      e[        S T 5       5      $ )Nz	Expected z gradients but got z"static_grad_inputs must be a tuplec              3  J   #    U  H  ob  UR                  5       OUv   M     g 7fr$   r   r   s     r   r   kmake_graphed_callables.<locals>.make_graphed_autograd_function.<locals>.Graphed.backward.<locals>.<genexpr>  s!      @R1-AHHJQ6@Rs   !#)lenr   zipr   r   r9   r   r   )r   gradsggrad	bwd_graphstatic_grad_inputsstatic_grad_outputss       r   backwardXmake_graphed_callables.<locals>.make_graphed_autograd_function.<locals>.Graphed.backward  s     u:%8!99&#C(;$<#==PQTUZQ[P\]   ##6>GA}::<4==?:GGDM  ?   "!"4e<<&'KLL @R  r   r   N)r   rr   r   r   rN   tuple[Tensor, ...])r   rr   r   r   rN   r   )rU   rV   rW   rX   staticmethodr   r   autogradfunctiononce_differentiabler   rZ   )r   r   r   r   r   r   r   s   r   Graphedr     sN    A A A ^^$$88  9 r   r   c                    > [         R                  R                  R                  " U 6 nTR                  " [        U5      T-   6 n[         R                  R                  R                  UT5      $ r$   )r   utils_pytreearg_tree_leavesapplyr   tree_unflatten)	user_argsflatten_user_argsoutr   module_paramsoutput_unflatten_specs      r   functionalizedVmake_graphed_callables.<locals>.make_graphed_autograd_function.<locals>.functionalized  sU     % 3 3 C CY O--%(9":]"JLC;;&&55c;PQQr   )r   rr   rN   rr   )r   r   Function)r   r   r   r   r   r   r   r   r   r   r   s   ````````` @r   make_graphed_autograd_function>make_graphed_callables.<locals>.make_graphed_autograd_function  s4    	 	enn-- 	B	R 	R r   c                $   ^ ^^^ SU UUU4S jjnU$ )Nc                 D   > TR                   T:X  a  T" U 0 UD6$ T" U 0 UD6$ r$   )training)r   user_kwargsfuncgraph_training_stategraphedorig_fwds     r   new_fwdEmake_graphed_callables.<locals>.make_graphed_forward.<locals>.new_fwd  s0    }}(<<&	A[AA'BkBBr   )r   z_P.argsr   z	_P.kwargsrN   r   r   )r   r   r   r   r   s   ```` r   make_graphed_forward4make_graphed_callables.<locals>.make_graphed_forward  s    C C r   )r   r   r   r   r   ztuple[torch.nn.Parameter, ...]r   rT   r   ztorch.utils._pytree.TreeSpecr   r   r   r   r   ztuple[Tensor | None, ...]r   r   rN   zCallable[..., object])
r   ztorch.nn.Moduler   rM   r   Callable[_P, _R]r   r   rN   r   )*r   is_autocast_enabledis_autocast_cache_enabledr   r   r   typingcastr   r   nnModuler   _backward_hooks_forward_hooks_forward_pre_hooksallbuffersr   r   r   append	TypeError
parametersr   r    r   r   rj   rf   ra   tree_leavesr   r   r   tree_flattenreversedr   reverse	enumerater   r   )+rw   rx   ry   rz   r-   just_one_callable_sample_argsflatten_sample_argscrp   flatten_argper_callable_len_user_argsper_callable_module_paramsr   "per_callable_static_input_surfaces_
fwd_graphs
bwd_graphsmempoolr   r   grad_inputsr   outputs_gradvper_callable_static_outputs"per_callable_output_unflatten_specr   func_outputsflatten_outputsspec per_callable_static_grad_outputsper_callable_static_grad_inputsr   r   r   r   grad_idxr   r   retr   r   s+                                              r   r   r      sH   N   ""u'F'F'H'Hm
 	
  i'' L	E&#+$6DF{{5vs{);S)@#A;Oy/a))A%%&!+(()Q.,,-2"a  EEEE"1 
 kk))994@""5#56HKHHH^ ) 06 9L!L8K#d)8K!L "A ",Auxx!?!?allnRG  " s9~&*&A 	!;!>>& ' *
 16c)n0EF0E1%))$$&0EJF05c)n0EF0E1%))$$&0EJF%)\!tG 
II			%))**,	-03%G1
,D$, 2B.K,+,++--99$+F$$K$KK|$q("'.."5"5 ,$ %';%   &+ +9@+ & %)%7 #6 
#K	 - |[9 :'1
 
.. 
II #%)+&!$Yj!IdIYY__YW_5;L 6 !& 3 3 @ @ N#**5+AB*11$7 "J (*$&(#;>34,-<7ni
 $ $
FT$
 
 JJJ|q 9#nn11(  T,@ TT!&&W2E&W!W $!3 2  :  'C  [%<"))+h*?@A"))$/ ( ##56(//0CD'../ABA<F %,,.#++-222 62 	2
  <2 12 +2 72 /2 
2h $&CY'40qMqM&)&).q1.1'*,Q/+A.

 dEHHOO,,%&* * +	
 " 0dmmWdllDL JJtJJwE (H 1v:w "M"*
 GF 
.	-< 65, :9sL   ]8A]]	(]	(]A3]$A]$]63A^$
]36
^	
^	)rN   rM   rR   )   FN)rw   rt   rx   r   ry   rT   rz   rM   r-   rO   rN   rt   )rw   tuple[_ModuleOrCallable, ...]rx   ztuple[tuple[Tensor, ...], ...]ry   rT   rz   rM   r-   rO   rN   r  )rw   1_ModuleOrCallable | tuple[_ModuleOrCallable, ...]rx   z3tuple[Tensor, ...] | tuple[tuple[Tensor, ...], ...]ry   rT   rz   rM   r-   rO   rN   r  )&
__future__r   r   collections.abcr   r   r   r   typing_extensionsr   r	   r
   r   r   	torch.xpur   _utilsr   __all__r   r   hasattr_C__dict__torch._Cr   r   r   r   r   r   r   r   r   rr   rt   rs   r   r   r   r   <module>r     s   "  $ 5 5 6 6   &   T]t_uxx)**%0%=EHHk"2=>V2WEHH./9D':EHH56 V U+<
^,y ^,B3( 3(l  %xx#v+1FF 9 F 
 $ $ #  	
   
 
 $ $(,(/( ( 	(
 ( #( 
( $ $n@nDn n 	n
 n 7nr   