
    "
3j;S                        S SK r S SKrS SKJr  S SKJr  S SKJr  S SKrS SK	r	S SK
Jr  S SKJrJr  \\\\\   -  4   r\R(                  " \5      r " S S5      r " S	 S
5      r " S S5      r " S S\5      r " S S5      rg)    N)OrderedDict)Mapping)Any)TensorProto)InferenceSession
RunOptionsc                      \ rS rSr\S\S\S\4S j5       r\S\S\4S j5       r\S\4S j5       r	\S\4S	 j5       r
\S\S\\\4   4S
 j5       r\S\4S j5       r\S\R                   4S j5       r\S\R                   4S j5       r\S\S\\\R                   4   4S j5       r\S\S\\\R                   4   4S j5       rSrg)
TypeHelper   ort_sessionnamereturnc                     [        U R                  5       5       H#  u  p#UR                  U:X  d  M  UR                  s  $    [	        SU S35      e)Nzinput name 
 not found)	enumerate
get_inputsr   type
ValueError)r   r   _iinputs       d/home/wildlama/miniconda3/lib/python3.13/site-packages/onnxruntime/transformers/io_binding_helper.pyget_input_typeTypeHelper.get_input_type   sH    ";#9#9#;<IBzzT!zz! = ;tfJ788    c                     [        U R                  5       5       H#  u  p#UR                  U:X  d  M  UR                  s  $    [	        SU S35      e)Nzoutput name r   )r   get_outputsr   r   r   )r   r   r   outputs       r   get_output_typeTypeHelper.get_output_type   sH    #K$;$;$=>JB{{d"{{" ? <vZ899r   ort_typec                    [         R                  [         R                  [         R                  [         R                  [
        [         R                  [         R                  [         R                  [         R                  [         R                  [         R                  [         R                  [         R                  [         R                  S.nX;  a  [        U  S35      eX   $ )N)tensor(int64)tensor(int32)tensor(float)tensor(float16)tensor(bool)tensor(uint8)tensor(int8)tensor(double)tensor(int16)tensor(uint16)tensor(uint32)tensor(uint64)tensor(complex64)tensor(complex128) not found in map)numpyint64int32float32float16booluint8int8float64int16uint16uint32uint64	complex64
complex128r   )r    ort_type_to_numpy_type_maps     r   ort_type_to_numpy_type!TypeHelper.ort_type_to_numpy_type#   s     #[["[["]]$}} "[[!JJ#mm"[[#ll#ll#ll!&"'"2"2&
"  5z):;<<)33r   c                    0 S[         R                  _S[         R                  _S[         R                  _S[         R                  _S[         R
                  _S[         R                  _S[         R                  _S[         R                  _S	[         R                  _S
[         R                  _S[         R                  _S[         R                  _S[         R                  _S[         R                  _S[         R                  _S[         R                   _S[         R"                  _[         R$                  [         R&                  [         R(                  [         R*                  S.EnX;  a  [-        U  S35      eX   $ )Nr"   r#   r$   r%   tensor(bfloat16)r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   tensor(float8e4m3fn)tensor(float8e4m3fnuz))tensor(float8e5m2)tensor(float8e5m2fnuz)tensor(int4)tensor(uint4)r0   )torchr2   r3   r4   r5   bfloat16r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   float8_e4m3fnfloat8_e4m3fnuzfloat8_e5m2float8_e5m2fnuzint4uint4r   )r    ort_type_to_torch_type_maps     r   ort_type_to_torch_type!TypeHelper.ort_type_to_torch_type:   sg   &
U[[&
U[[&
 U]]&
 u}}	&

 &
 EJJ&
 U[[&
 EJJ&
 emm&
 U[[&
 ell&
 ell&
 ell&
  &
 !%"2"2&
  #E$7$7!&
" %e&;&;#&
$ #("3"3&+&;&;!JJ"[[+&
". 5z):;<<)33r   c                    0 nU R                  5        H.  n[        R                  UR                  5      XR                  '   M0     U R                  5        H.  n[        R                  UR                  5      XR                  '   M0     U$ )z9Create a mapping from input/output name to onnx data type)r   r
   ort_type_to_onnx_typer   r   r   )r   name_to_onnx_typer   r   s       r   get_io_onnx_type_mapTypeHelper.get_io_onnx_type_mapX   sp      ++-E,6,L,LUZZ,Xjj) . "--/F-7-M-Mfkk-Zkk* 0  r   c                 0   0 S[         R                  _S[         R                  _S[         R                  _S[         R                  _S[         R
                  _S[         R                  _S[         R                  _S[         R                  _S	[         R                  _S
[         R                  _S[         R                  _S[         R                  _S[         R                  _S[         R                  _S[         R                  _S[         R                   _S[         R"                  _[         R$                  [         R&                  [         R(                  [         R*                  [         R,                  [         R.                  S.EnX;  a  [1        U  S35      eX   $ )Nr"   r#   r$   r%   rD   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   rE   rF   )rG   rH   ztensor(float4e2m1)rI   rJ   ztensor(string)r0   )r   INT64INT32FLOATFLOAT16BFLOAT16BOOLUINT8INT8DOUBLEINT16UINT16UINT32UINT64	COMPLEX64
COMPLEX128FLOAT8E4M3FNFLOAT8E4M3FNUZ
FLOAT8E5M2FLOAT8E5M2FNUZ
FLOAT4E2M1INT4UINT4STRINGr   )r    ort_type_to_onnx_type_maps     r   rW    TypeHelper.ort_type_to_onnx_typec   s   %
[..%
[..%
 [..%
 {22	%

  4 4%
 K,,%
 [..%
 K,,%
 k00%
 [..%
 k00%
 k00%
 k00%
  !6!6%
 !+"8"8%
  #K$<$<!%
" %k&@&@#%
$ #."8"8&1&@&@"-"8"8',,(..)00/%
!2 4z):;<<(22r   
numpy_typec                 h   [         R                  [        R                  [         R                  [        R                  [         R                  [        R                  [         R
                  [        R
                  [        [        R                  [         R                  [        R                  [         R                  [        R                  [         R                  [        R                  [         R                  [        R                  [         R                  [        R                  [         R                  [        R                  [         R                  [        R                  [         R                  [        R                  [         R                  [        R                  0nX;  a  [!        U  S35      eX   $ Nr0   )r1   r2   rK   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r   )ru   numpy_type_to_torch_type_maps     r   numpy_type_to_torch_type#TypeHelper.numpy_type_to_torch_type   s     KKKKMM5==MM5==%**KKJJ

MM5==KKLL%,,LL%,,LL%,,OOU__e..(
$" 9
|+<=>>+77r   
torch_typec                 h   [         R                  [        R                  [         R                  [        R                  [         R                  [        R                  [         R
                  [        R
                  [         R                  [        [         R                  [        R                  [         R                  [        R                  [         R                  [        R                  [         R                  [        R                  [         R                  [        R                  [         R                  [        R                  [         R                  [        R                  [         R                  [        R                  [         R                  [        R                  0nX;  a  [!        U  S35      eX   $ rw   )rK   r2   r1   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r   )r{   torch_type_to_numpy_type_maps     r   torch_type_to_numpy_type#TypeHelper.torch_type_to_numpy_type   s     KKKKMM5==MM5==JJKKJJ

MM5==KKLL%,,LL%,,LL%,,OOU__e..(
$" 9
|+<=>>+77r   c                    0 nU R                  5        H.  n[        R                  UR                  5      XR                  '   M0     U R                  5        H.  n[        R                  UR                  5      XR                  '   M0     U$ )z:Create a mapping from input/output name to numpy data type)r   r
   rA   r   r   r   )r   name_to_numpy_typer   r   s       r   get_io_numpy_type_map TypeHelper.get_io_numpy_type_map   s       ++-E-7-N-Nuzz-Zzz* . "--/F.8.O.OPVP[P[.\{{+ 0!!r   c                    0 nU R                  5        H.  n[        R                  UR                  5      XR                  '   M0     U R                  5        H.  n[        R                  UR                  5      XR                  '   M0     U$ )z:Create a mapping from input/output name to torch data type)r   r
   rT   r   r   r   )r   name_to_torch_typer   r   s       r   get_io_torch_type_map TypeHelper.get_io_torch_type_map   r   r    N)__name__
__module____qualname____firstlineno__staticmethodr   strr   r   rA   rT   dictintrY   rW   r1   dtypery   rK   r~   r   r   __static_attributes__r   r   r   r
   r
      sh   9$4 9C 9C 9 9 :3 :3 : : 4 4 4, 4 4 4: !*: !tCH~ ! ! 3 3 3> 8U[[ 8 8. 8U[[ 8 8. "+; "S%++EU@V " " "+; "S%++EU@V " "r   r
   c            
           \ rS rSr\S\4S j5       r\S\R                  S\R                  S\R                  S\	\R                     4S j5       r
\SS	 j5       rS
rg)IOBindingHelper   r   c                     0 nUR                  5        HZ  u  pE[        R                  X5      n[        R                  U5      n[        R
                  " [        R                  " U5      XrS9X4'   M\     U$ )zpReturns a dictionary of output name as key, and 1D tensor as value. The tensor has enough space for given shape.)r   device)itemsr
   r   rT   rK   emptyr1   prod)r   output_shapesr   output_buffersr   shaper    r{   s           r   get_output_buffers"IOBindingHelper.get_output_buffers   sb     (..0KD!11+DH#::8DJ#(;;uzz%/@
#bN  1 r   	input_idsposition_idsattention_maskpastc                 d   [         R                  U 5      nU R                  5       nUR                  5       (       d   eUR	                  SUR
                  R                  SUS   [        UR                  5       5      UR                  5       5        Ub  [        U5       H  u  pU
R                  5       (       d   eU
R                  5       nUS:X  a  UR                  5       nUR	                  SU	 3U
R
                  R                  SUSU	 3   [        U
R                  5       5      U5        M     Ubi  UR                  5       (       d   eUR	                  SUR
                  R                  SUS   [        UR                  5       5      UR                  5       5        Ubi  UR                  5       (       d   eUR	                  SUR
                  R                  SUS   [        UR                  5       5      UR                  5       5        U R                  5        H  nUR                  nX]   n[        R                  U SUR
                  R                   S[        UR                  5       5       35        UR                  UUR
                  R                  SX}   Xm   UR                  5       5        M     U$ )zdIO binding for a session: bind inputs (input_ids, position_ids, attention_mask, past_*) and outputs.r   r   past_r   r   z device type=z shape=)r
   rY   
io_bindingis_contiguous
bind_inputr   r   listsizedata_ptrr   r   r   loggerdebugbind_output)r   r   r   r   r   r   r   rX   r   ipast_ir   r   output_nameoutput_buffers                  r   prepare_io_binding"IOBindingHelper.prepare_io_binding   s    ';;KH !++-
 &&((((!!k*!" 	
 &t_	++----!??,q=  )113H%%A3KMM&&%aSk2' -$ %!//1111!! %%**!"23^((*+'') #--////!!##((!.1\&&()%%' "--/F ++K*7MLLK=m6J6J6O6O5PPWX\]j]o]o]qXrWstu""$$))!.*&&(	 0 r   c                 v   / nU R                  5        H  nUR                  nX   nX&   nUS[        R                  " U5       R	                  U5      R                  5       R                  5       n	U(       a/  UR                  U	R                  5       R                  5       5        M  UR                  U	5        M     U$ )z3Copy results to cpu. Returns a list of numpy array.r   )	r   r   r1   r   reshapeclonedetachappendcpu)
r   r   r   return_numpyort_outputsr   r   bufferr   copy_tensors
             r   "get_outputs_from_io_binding_buffer2IOBindingHelper.get_outputs_from_io_binding_buffer*  s     !--/F ++K#0F!.E UZZ%67??FLLNUUWK"";??#4#:#:#<="";/ 0 r   r   N)T)r   r   r   r   r   r   r   rK   Tensorr   r   r   r   r   r   r   r   r      s    (8   R<<R llR 	R
 5<< R Rh  r   r   c                       \ rS rSrSrSS\S\R                  4S jjrS\	S\	4S jr
S	 rS
\	S\R                  4S jrS\4S jrSS\\	\R                  4   S\S\4S jjr\SS\S\S\S\\	\4   4S jj5       rSrg)CudaSessioni:  zLInference Session with IO Binding for ONNX Runtime CUDA or TensorRT providerr   r   c                 8   Xl         U R                   R                  5        Vs/ s H  oDR                  PM     snU l        U R                   R	                  5        Vs/ s H  oUR                  PM     snU l        [        R                  U R                   5      U l        [        R                  U R                   5      U l
        U R                   R                  5       U l        X0l        [        5       U l        [        5       U l        X l        0 U l        g s  snf s  snf N)r   r   r   input_namesr   output_namesr
   rY   io_name_to_onnx_typer   io_name_to_torch_typer   enable_cuda_graphr   input_tensorsoutput_tensorsr   buffer_sharing)selfr   r   r   r   r   s         r   __init__CudaSession.__init__=  s    &484D4D4O4O4QR4Q5JJ4QR7;7G7G7S7S7UV7UV[[7UV$.$C$CDDTDT$U!%/%E%EdFVFV%W"**557!2(])m /1 SVs   DD
input_namer   c                     XR                   ;   d   eX R                  ;   d   eX R                  U'   XR                  U'   g r   )r   r   r   )r   r   r   s      r   set_buffer_sharingCudaSession.set_buffer_sharingM  sB    -----/////*5J'+5K(r   c                     U ? U ?U ?g r   )r   r   r   )r   s    r   __del__CudaSession.__del__S  s    Or   r   tensorc           	      p   UR                   R                  b  UR                   R                  OSn[        UR                  5      S:X  a  S/O[	        UR                  5      nU R
                  R                  UUR                   R                  UU R                  U   UUR                  5       5        XR                  ;   ax  U R
                  R                  U R                  U   UR                   R                  UU R                  U   UUR                  5       5        X R                  U R                  U   '   g g )Nr      )r   indexlenr   r   r   r   r   r   r   r   r   r   )r   r   r   	device_idtensor_shapes        r   bind_input_and_buffer_sharing)CudaSession.bind_input_and_buffer_sharingX  s    +1==+>+>+JFMM''PQ	!&,,/14s$v||:L""MM%%d+OO	
 &&&OO''##D)""))$/! >D 3 3D 9: 'r   
shape_dictc                    U R                   (       a  UR                  5        H  u  p#X R                  ;   d  M  X R                  ;   a<  [	        U R                  U   R
                  5      [	        U5      :X  a  MV  [        S5      eU R                  U   n[        R                  " [	        U5      US9R                  U R                  S9nXPR                  U'   U R                  X%5        M     UR                  5        GHM  u  p#X R                  ;   d  M  X R                  ;   a1  [	        U R                  U   R
                  5      [	        U5      :X  a  MW  X R                  ;   a  Mh  U R                  U   n[        R                  " [	        U5      US9R                  U R                  S9nXPR                  U'   U R                   R#                  UUR                  R$                  UR                  R&                  b  UR                  R&                  OSU R(                  U   [+        UR-                  5       5      UR/                  5       5        GMP     g)z Allocate tensors for I/O Bindingz(Expect static input shape for cuda graph)r   )r   Nr   )r   r   r   r   tupler   RuntimeErrorr   rK   r   tor   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   r   r   torch_dtyper   s         r   allocate_buffersCudaSession.allocate_buffersp  s   !!)//1+++111 !3!3D!9!?!?@E%LP$*+UVV"&"<"<T"BK"[[u[ILLTXT_T_L`F/5&&t,66tD  2 &++-KD(((...59L9LT9R9X9X3Y]bch]i3i..."88>U5\EHHPTP[P[H\,2##D)++MM&&+1==+>+>+JFMM''PQ--d3'OO% .r   N	feed_dictrun_optionssynchronizec                 t   UR                  5        GH  u  pE[        U[        R                  5      (       a  UR	                  5       (       d   eX@R
                  ;   d  MM  U R                  (       a  U R                  U   R                  5       UR                  5       :X  d   eU R                  U   R                  UR                  :X  d   eUR                  R                  S:X  d   eU R                  U   R                  U5        M  U R                  XE5        GM     U(       af  U R                  R                  5         U R                   R#                  U R                  U5        U R                  R%                  5         U R&                  $ U R                   R#                  U R                  U5        U R&                  $ )z$Bind input tensors and run inferencecuda)r   
isinstancerK   r   r   r   r   r   nelementr   r   r   copy_r   r   synchronize_inputsr   run_with_iobindingsynchronize_outputsr   )r   r   r   r   r   r   s         r   inferCudaSession.infer  sO   %OO-LDfell338L8L8N8NNN'''))--d3<<>&//BSSSS--d399V\\III!==--777&&t,226:66tD . OO..0//MOO//1 """ //M"""r   r   r   streamr   c                 :    U SUS.nUS:w  a  [        U5      US'   U$ )NkSameAsRequested)r   arena_extend_strategyr   r   user_compute_stream)r   )r   r   r   optionss       r   get_cuda_provider_options%CudaSession.get_cuda_provider_options  s1     #%7!2
 Q;-0[G)*r   )r   r   r   r   r   r   r   r   r   r   r   F)NT)r   )r   r   r   r   __doc__r   rK   r   r   r   r   r   r   r   	ShapeDictr   r   r   r6   r   r   r   r   r   r   r   r   r   r   r   :  s    V1$4 1ell 1 6S 6s 6
D# Du|| D0$9 $L#tC$56 #Z #ei #, S T SV _cdgildl_m  r   r   c                      ^  \ rS rSr    SS\S\R                  S\S\S\	S\	S	\
\\4   S-  4U 4S
 jjjrSS\S\4S jjrSS\
\\R                  4   S\4U 4S jjjrSrU =r$ )
GpuBindingi  Nr   r   r   enable_gpu_graphgpu_graph_idr   r   c                   > [         T
U ]  XU5        U(       a*  UR                  5        H  u  pU R                  X5        M     U R	                  U5        XPl        U(       a  [        R                  " U5      OS U l        X`l	        S U l
        g r   )superr   r   r   r   r  copydeepcopyr   r   last_run_gpu_graph_id)r   r   r   r   r  r  r   r   r   r   	__class__s             r   r   GpuBinding.__init__  st     	.>?+9+?+?+A'
''
@ ,B 	j)(7G$--
3T%)"r   disable_cuda_graph_in_runr   c                     [        5       nU(       a  SOU R                  nUR                  S[        U5      5        X0l        U$ )Nr  )r   r  add_run_config_entryr   r
  )r   r  r   r  s       r   get_run_optionsGpuBinding.get_run_options  s8    ,6rD<M<M$$^S5FG%1"r   r   c                    > U R                  U5      nU R                  (       a  UR                  SS5        [        TU ]  X5      $ )N'disable_synchronize_execution_providers1)r  r   r  r  r   )r   r   r  r   r  s       r   r   GpuBinding.infer  s<    **+DE;;,,-VX[\w}Y44r   )r  r
  r   r   )Fr  r   Nr   )r   r   r   r   r   rK   r   r  r6   r   r   r   r   r   r  r   r   r   __classcell__)r  s   @r   r  r    s     "'04*%* * 	*
 * * * S#X-* *.	 	* 	5tC$56 5SW 5 5r   r  c            	       |    \ rS rSrSrSS\S\R                  S\S\4S jjr	  SS	\
S
\S\\\4   S-  S\4S jjrSrg)GpuBindingManageri  zA manager for I/O bindings that support multiple CUDA Graphs.
One cuda graph is reused for same input shape. Automatically add a new cuda graph for new input shape.
r   r   r   max_cuda_graphsc                 P    Xl         X l        / U l        S U l        X0l        X@l        g r   )r   r   graph_bindingsno_graph_bindingr   r  )r   r   r   r   r  s        r   r   GpuBindingManager.__init__  s,    & ! !%.r   Nr   use_cuda_graphr   r   c           
      <   U R                    H  nUR                  U:X  d  M  Us  $    [        U R                   5      U R                  :  d  U(       do  U R                  c;  [        U R                  U R                  XR                  US9U l        U R                  $ U R                  R                  U5        U R                  $ [        U R                  U R                  US[        U R                   5      U R                  US9nU R                   R                  U5        U$ )N)r   r   T)r  r  r   r   )r  r   r   r  r  r  r   r   r   r   r   )r   r   r  r   gpu_graph_bindings        r   get_bindingGpuBindingManager.get_binding  s     "&!4!4 ++z9(( "5 t""#t';';;N$$,(2$$dkk:kkbp)%
 ((( %%66zB((( 'KK!T001;;)
 	""#45  r   )r   r  r  r  r   r   )r   r   )FN)r   r   r   r   r   r   rK   r   r   r   r  r6   r   r   r  r"  r   r   r   r   r  r    st    /$4 /ell /TW /nq /"  %04	 ! !  ! S#X-	 !
 
 !  !r   r  )r  loggingcollectionsr   collections.abcr   typingr   r1   rK   onnxr   onnxruntimer   r   r   r   r   r   r  	getLoggerr   r   r
   r   r   r  r  r   r   r   <module>r+     s      # #     4 Cc**+				8	$t" t"nm m`~ ~B)5 )5X3! 3!r   