
    
9j/y                    N   S SK Jr  S SKrS SKrS SKJr  S SKrS SKrS SKJr  S SKJ	r	  S SK
Jr  S SKJr  SqS	 rS
 rS rS&S jrS&S jrS rS&S jrS rS&S jrS&S jrS&S jrS&S jrS rS rS rS rS rS r S r!S r"S r#S'S jr$S r%S  r&S! r'S(S" jr(S&S# jr)S)S$ jr*S*S% jr+g)+    )annotationsN)linalg)_core)cublas)device)_util   c                     [         $ N_batched_gesv_limit     E/home/wildlama/miniconda3/lib/python3.13/site-packages/cupy/cublas.pyget_batched_gesv_limitr      s    r   c                    U q g r   r   )limits    r   set_batched_gesv_limitr      s    r   c                2   [         R                  " X5        [         R                  " U 5        [         R                  " U 5        U R                  UR                  :X  d  U R                  UR                  S-   :X  a-  U R
                  SS UR
                  SU R                  S-
   :X  d  [        S5      e[         R                  " X5      u  p#UR                  S:X  a!  [        R                  " UR
                  U5      $ US:X  a  SnO&US:X  a  SnOUS	:X  a  S
nOUS:X  a  SnO[        S5      e[        [        US-   5      n[        [        US-   5      nU R                  S:  a#  [        R                  " U R
                  SS 5      OSnU R
                  S   nU R                  UR                  :X  a  UR
                  S   OSn	UR
                  n
U R                   R"                  nUR                   R"                  n[        R$                  " U R'                  XxU5      R)                  SSS5      US9n [        R$                  " UR'                  XxU	5      R)                  SSS5      US9nU R                   R"                  U:X  a  U R+                  5       n UR                   R"                  U:X  a  UR+                  5       nU[-        5       :  a.  [.        R0                  " SR3                  U[-        5       5      5        [4        R6                  " 5       nUnX-  U R8                  -  n[        R:                  " U R                   R"                  U R                   R"                  X-  -   U[        R<                  S9nUnUU	-  UR8                  -  n[        R:                  " UR                   R"                  UR                   R"                  UU-  -   U[        R<                  S9n[        R                  " Xx4[>        R@                  S9n[        R                  " U4[>        R@                  S9n[>        R                  " S[>        R@                  S9nU" XUR                   R"                  UUR                   R"                  UR                   R"                  U5        [         RB                  " UU5        U" U[        RD                  XUR                   R"                  UUR                   R"                  UR                   R"                  UURF                  R                   U5        US   S:w  aR  SR3                  URH                  5      nUS   S:  a  USR3                  US   * 5      -  n[J        RL                  " U5      eUR)                  SSS5      R'                  U
5      RO                  USS9$ )a  Solves multiple linear matrix equations using cublas<t>getr[fs]Batched().

Computes the solution to system of linear equation ``ax = b``.

Args:
    a (cupy.ndarray): The matrix with dimension ``(..., M, M)``.
    b (cupy.ndarray): The matrix with dimension ``(..., M)`` or
        ``(..., M, K)``.

Returns:
    cupy.ndarray:
        The matrix with dimension ``(..., M)`` or ``(..., M, K)``.
   NzEa must have (..., M, M) shape and b must have (..., M) or (..., M, K)r   fsdFcDzinvalid dtypegetrfBatchedgetrsBatched   dtypez/The matrix size ({}) exceeds the set limit ({}))r   z Error reported by {} in cuBLAS. z)The {}-th parameter had an illegal value.F)copy)(r   _assert_cupy_array_assert_stacked_2d_assert_stacked_squarendimshape
ValueErrorlinalg_common_typesizecupyempty	TypeErrorgetattrr   mathproddataptrascontiguousarrayreshape	transposer&   r   warningswarnformatr   get_cublas_handleitemsizearangeuintpnumpyint323_check_cublas_info_array_if_synchronization_allowedCUBLAS_OP_Nctypes__name__r   LinAlgErrorastype)abr%   	out_dtypetgetrfgetrsbsnnrhsb_shape
a_data_ptr
b_data_ptrhandleldaa_stepa_arrayldbb_stepb_arraypivotdinfoinfomsgs                           r   batched_gesvr`      s:    
Q"	Q	  # 
166	QVVqvvz1GGCRLAGGKQVVaZ00 	 //5Evv{zz!''9--|	#	#	#((FA./EFA./E$%FFQJ1773B<	 AB	A&&AFF*1772;DggGJJqyy2<<Q1E%*	,Aqyy5??1aH%*	,AvvzzZFFHvvzzZFFH!##GfQ 6 89	; %%'F
CWqzz!Fkk!&&**affjj6;&> $

,G
C4Z!**$Fkk!&&**affjj6B;&> $

,GJJwekk2EJJuEKK0E;;t5;;/D	&W\\%%sEJJNNEJJNNBO	==eUK	&&$$aw||/?/?
**..',,**C1A1A2GAw!|077G7Q;>EEtAwhOOC  %%;;q!Q''077	7NNr   c                    [        XS5      $ )zFinds the (smallest) index of the element with the maximum magnitude.

Note: The result index is 1-based index (not 0-based index).
amax	_iamaxminxouts     r   iamaxrh   w       
 QV$$r   c                    [        XS5      $ )zFinds the (smallest) index of the element with the minimum magnitude.

Note: The result index is 1-based index (not 0-based index).
aminrc   re   s     r   iaminrl      ri   r   c                   U R                   S:w  a$  [        SR                  U R                   5      5      eU R                  R                  nUS:X  a  SnO&US:X  a  SnOUS:X  a  SnOUS:X  a  S	nO[        S
5      e[        [        SU-   U-   5      n[        R                  " 5       nSn[        XaU5      u  pn
 U" X`R                  U R                  R                  SU5        [        R                  " Xj5        Uc  U	nU$ UR                  U:w  a  [        R                   " X5        U$ ! [        R                  " Xj5        f = f)Nr   !x must be a 1D array (actual: {})r   r   r   r   r   r   r   r   i)r*   r,   r<   r%   charr1   r2   r   r   r=   _setup_result_ptrr.   r5   r6   setPointerModer   elementwise_copy)rf   rg   namer%   rL   funcrU   result_dtype
result_ptrresult	orig_modes              r   rd   rd      s   vv{<CCAFFKLLGGLLE|	#	#	#((637T>*D%%'FL$5\%#!J	1VVVQVVZZJ7f0
{ J 
l	"v+J 	f0s   7)D% %D=c                   U R                   S:w  a$  [        SR                  U R                   5      5      eU R                  R                  nUS:X  a  [
        R                  nOPUS:X  a  [
        R                  nO9US:X  a  [
        R                  nO"US:X  a  [
        R                  nO[        S5      e[        R                  " 5       nUR                  5       n[        XAU5      u  pgn U" X@R                  U R                   R"                  SU5        [
        R$                  " XH5        Uc  UnU$ UR                  U:w  a  [&        R(                  " Xq5        U$ ! [
        R$                  " XH5        f = f)z&Computes the sum of the absolute of x.r   rn   r   r   r   r   r   )r*   r,   r<   r%   rp   r   sasumdasumscasumdzasumr1   r   r=   lowerrq   r.   r5   r6   rr   r   rs   	rf   rg   r%   ru   rU   rv   rw   rx   ry   s	            r   asumr         vv{<CCAFFKLLGGLLE|||	#||	#}}	#}}((%%'F;;=L$5\%#!J	1VVVQVVZZJ7f0
{ J 
l	"v+J 	f0   ')E E-c           	     <   [        X5        UR                  R                  nUS:X  a  [        R                  nOPUS:X  a  [        R
                  nO9US:X  a  [        R                  nO"US:X  a  [        R                  nO[        S5      e[        R                  " 5       n[        XPU5      u  pn U" XQR                  XaR                  R                  SUR                  R                  S5        [        R                  " XW5        g! [        R                  " XW5        f = f)z-Computes y += a * x.

(*) y will be updated.
r   r   r   r   r   r   N)_check_two_vectorsr%   rp   r   saxpydaxpycaxpyzaxpyr1   r   r=   _setup_scalar_ptrr.   r5   r6   rr   )rI   rf   yr%   ru   rU   a_ptrry   s           r   axpyr      s    
 qGGLLE|||	#||	#||	#||((%%'F+Fu=Ai1VVVUFFJJ166::qAf0f0s   .>D Dc           	     d   U R                   R                  nUS:X  a  [        R                  nO3US:X  a  [        R                  nOUS;   a  [        S5      e[        S5      e[        X5        [        R                  " 5       nUn[        XRU5      u  pxn	 U" XPR                  U R                  R                  SUR                  R                  SU5        [        R                  " XY5        Uc  UnU$ UR                   U:w  a  [        R                  " X5        U$ ! [        R                  " XY5        f = f)$Computes the dot product of x and y.r   r   FDz&Use dotu() or dotc() for complex dtyper   r   )r%   rp   r   sdotddotr1   r   r   r=   rq   r.   r5   r6   rr   r   rs   
rf   r   rg   r%   ru   rU   rv   rw   rx   ry   s
             r   dotr      s    GGLLE|{{	#{{	$@AA((q%%'FL$5\%#!J	1VVVQVVZZAFFJJ:Ff0
{ J 
l	"v+J 	f0s   ?D D/c           	     b   U R                   R                  nUS;   a
  [        XUS9$ US:X  a  [        R                  nO"US:X  a  [        R
                  nO[        S5      e[        X5        [        R                  " 5       nUn[        XRU5      u  pxn	 U" XPR                  U R                  R                  SUR                  R                  SU5        [        R                  " XY5        Uc  UnU$ UR                   U:w  a  [        R                   " X5        U$ ! [        R                  " XY5        f = f)r   fdrg   r   r   r   r   )r%   rp   r   r   cdotuzdotur1   r   r   r=   rq   r.   r5   r6   rr   r   rs   r   s
             r   dotur         GGLLE}1S!!	#||	#||((q%%'FL$5\%#!J	1VVVQVVZZAFFJJ:Ff0
{ J 
l	"v+J 	f0   ?D D.c           	     b   U R                   R                  nUS;   a
  [        XUS9$ US:X  a  [        R                  nO"US:X  a  [        R
                  nO[        S5      e[        X5        [        R                  " 5       nUn[        XRU5      u  pxn	 U" XPR                  U R                  R                  SUR                  R                  SU5        [        R                  " XY5        Uc  UnU$ UR                   U:w  a  [        R                   " X5        U$ ! [        R                  " XY5        f = f)z+Computes the dot product of x.conj() and y.r   r   r   r   r   r   )r%   rp   r   r   cdotczdotcr1   r   r   r=   rq   r.   r5   r6   rr   r   rs   r   s
             r   dotcr     r   r   c                   U R                   S:w  a$  [        SR                  U R                   5      5      eU R                  R                  nUS:X  a  [
        R                  nOPUS:X  a  [
        R                  nO9US:X  a  [
        R                  nO"US:X  a  [
        R                  nO[        S5      e[        R                  " 5       nUR                  5       n[        XAU5      u  pgn U" X@R                  U R                   R"                  SU5        [
        R$                  " XH5        Uc  UnU$ UR                  U:w  a  [&        R(                  " Xq5        U$ ! [
        R$                  " XH5        f = f)z(Computes the Euclidean norm of vector x.r   rn   r   r   r   r   r   )r*   r,   r<   r%   rp   r   snrm2dnrm2scnrm2dznrm2r1   r   r=   r   rq   r.   r5   r6   rr   r   rs   r   s	            r   nrm2r   ;  r   r   c                b   UR                   S:w  a$  [        SR                  UR                   5      5      eUR                  R                  nUS:X  a  [
        R                  nOPUS:X  a  [
        R                  nO9US:X  a  [
        R                  nO"US:X  a  [
        R                  nO[        S5      e[        R                  " 5       n[        X@U5      u  pn U" XAR                  XQR                  R                   S5        [
        R"                  " XF5        g! [
        R"                  " XF5        f = f)	z)Computes x *= a.

(*) x will be updated.
r   rn   r   r   r   r   r   N)r*   r,   r<   r%   rp   r   sscaldscalcscalzscalr1   r   r=   r   r.   r5   r6   rr   )rI   rf   r%   ru   rU   r   ry   s          r   scalr   \  s    
 	vv{<CCAFFKLLGGLLE|||	#||	#||	#||((%%'F+Fu=Ai1VVVUFFJJ2f0f0s   (D D.c                   U R                   S:w  a$  [        SR                  U R                   5      5      eUR                   S:w  a$  [        SR                  UR                   5      5      eU R                  UR                  :w  a/  [        SR                  U R                  UR                  5      5      eU R                  UR                  :w  a/  [        SR                  U R                  UR                  5      5      eg )Nr   rn   z!y must be a 1D array (actual: {})z1x and y must be the same size (actual: {} and {})z2x and y must be the same dtype (actual: {} and {}))r*   r,   r<   r.   r%   r1   )rf   r   s     r   r   r   x  s    vv{<CCAFFKLLvv{<CCAFFKLLvv "F16616624 	4ww!'' !6!''17735 	5 r   c                d   [         R                  " U 5      nUb  [        U[        R                  5      (       ag  Ub  UR
                  U:w  a  [        R                  " / US9nOUnUR                  R                  n[         R                  " U [         R                  5        O[        U[        R                  5      (       ad  UR
                  U:w  a  [        R                  " / US9nOUnUR                  R                  n[         R                  " U [         R                  5        O[        S5      eXTU4$ )Nr$   z(out must be either cupy or numpy ndarray)r   getPointerMode
isinstancer/   ndarrayr%   r0   r5   r6   rr   CUBLAS_POINTER_MODE_DEVICErA   rE   CUBLAS_POINTER_MODE_HOSTr1   )rU   rg   r%   moderx   rw   s         r   rq   rq     s      (D
{jdll33;#))u,ZZ%0FF[[__
ff&G&GH	C	'	'99[[51FF]]''
ff&E&EFBCCt##r   c                $   [        X5      u  p[        R                  " U 5      n[        U[        R
                  5      (       a&  [        R                  " U [        R                  5        O%[        R                  " U [        R                  5        XU4$ r   )	_get_scalar_ptrr   r   r   r/   r   rr   r   r   )rU   rI   r%   r   r   s        r   r   r     se    q(HA  (D!T\\""ff&G&GHff&E&EFT>r   c                r   [        U [        R                  5      (       a=  U R                  U:w  a  [        R                  " XS9n U R
                  R                  nX4$ [        U [        R                  5      (       a  U R                  U:X  d  [        R                  " XS9n U R                  R
                  nX4$ )Nr$   )	r   r/   r   r%   arrayr5   r6   rA   rE   )rI   r%   r   s      r   r   r     s    !T\\""77e

1*A


 8O 1emm,,E1AA+A8Or   c                   UR                   R                  nUS:X  a  [        R                  nOPUS:X  a  [        R                  nO9US:X  a  [        R
                  nO"US:X  a  [        R                  nO[        S5      eUR                  S:X  d   eUR                  UR                  s=:X  a  S:X  d   e   eUR                   UR                   s=:X  a  UR                   :X  d   e   eUR                  u  p[        U 5      n U [        R                  :X  a  XpOXpUR                  S   U
:X  d   eUR                  S   U:X  d   e[        XR                   5      u  p[        XBR                   5      u  pM[        R                  " 5       n[        R                  " U5      n[!        U["        R$                  5      (       d  [!        U["        R$                  5      (       a  [!        U["        R$                  5      (       d,  ["        R&                  " U5      nUR(                  R*                  n[!        U["        R$                  5      (       d,  ["        R&                  " U5      nUR(                  R*                  n[        R,                  " U[        R.                  5        O%[        R,                  " U[        R0                  5         UR2                  (       aK  U" XXXR(                  R*                  XR(                  R*                  SXR(                  R*                  S5        OUR4                  (       a  U [        R6                  :w  a  U [        R                  :X  a  [        R8                  n O[        R                  n U" XXXR(                  R*                  XR(                  R*                  SXR(                  R*                  S5        OYUR;                  SS	9nU" XXXR(                  R*                  XR(                  R*                  SXR(                  R*                  S5        [        R,                  " X5        g
! [        R,                  " X5        f = f)zComputes y = alpha * op(a) @ x + beta * y

op(a) = a if transa is 'N', op(a) = a.T if transa is 'T',
op(a) = a.T.conj() if transa is 'H'.

Note: ''y'' will be updated.
r   r   r   r   r   r"   r   r   orderN)r%   rp   r   sgemvdgemvcgemvzgemvr1   r*   r+   _trans_to_cublas_oprD   r   r   r=   r   r   r/   r   r   r5   r6   rr   r   r   _f_contiguous_c_contiguousCUBLAS_OP_CCUBLAS_OP_Tr&   )transaalpharI   rf   betar   r%   ru   mrP   xlenylen	alpha_ptrbeta_ptrrU   ry   s                   r   gemvr     s    GGLLE|||	#||	#||	#||((66Q;;66QVV q     77agg((((((77DA (F###dd771:771:&ugg6E$T773ND%%'F%%f-I%&&*T4<<*H*H%..JJu%E

I$--::d#Dyy}}Hff&G&GHff&E&EF1??y&&**aQ66::q*__6+=+=!=+++++++y&&**aQ66::q* S!Ay&&**aQ66::q* 	f0f0s   ;EP, ,Qc                j   UR                   R                  nUS:X  a  [        R                  nO3US:X  a  [        R                  nOUS;   a  [        S5      e[        S5      eUR                  S:X  d   eUR                  UR                  s=:X  a  S:X  d   e   eUR                   UR                   s=:X  a  UR                   :X  d   e   eUR                  u  pgUR                  S   U:X  d   eUR                  S   U:X  d   e[        R                  " 5       n[        XU5      u  p	n
UR                  R                  UR                  R                  p UR                  (       a$  U" XXyUSUSUR                  R                  U5
        O}UR                  (       a$  U" XXiUSUSUR                  R                  U5
        OHUR                  S	S
9nU" XXyUSUSUR                  R                  U5
        [         R"                  " X5        [        R$                  " X5        g! [        R$                  " X5        f = f)<Computes a += alpha * x @ y.T

Note: ''a'' will be updated.
r   r   r   z#Use geru or gerc for complex dtypesr   r"   r   r   r   r   N)r%   rp   r   sgerdgerr1   r*   r+   r   r=   r   r5   r6   r   r   r&   r   rs   rr   r   rf   r   rI   r%   ru   r   rP   rU   r   ry   x_ptry_ptraas                 r   gerr     s   
 GGLLE|{{	#{{	$=>>((66Q;;66QVV q     77agg((((((77DA771:??771:??%%'F"3F5"IEi66::qvvzz5
1??A%E1affjj!L__A%E1affjj!Lc"BA%E1bggkk1M""2)f0f0s   B2H H2c                l   UR                   R                  nUS;   a  [        XX#5      $ US:X  a  [        R                  nO"US:X  a  [        R
                  nO[        S5      eUR                  S:X  d   eUR                  UR                  s=:X  a  S:X  d   e   eUR                   UR                   s=:X  a  UR                   :X  d   e   eUR                  u  pgUR                  S   U:X  d   eUR                  S   U:X  d   e[        R                  " 5       n[        XU5      u  p	n
UR                  R                  UR                  R                  p UR                  (       a$  U" XXyUSUSUR                  R                  U5
        O}UR                  (       a$  U" XXiUSUSUR                  R                  U5
        OHUR!                  SS9nU" XXyUSUSUR                  R                  U5
        ["        R$                  " X5        [        R&                  " X5        g	! [        R&                  " X5        f = f)
r   r   r   r   r   r"   r   r   r   N)r%   rp   r   r   cgeruzgerur1   r*   r+   r   r=   r   r5   r6   r   r   r&   r   rs   rr   r   s                 r   gerur     s   
 GGLLE}5Q""	#||	#||((66Q;;66QVV q     77agg((((((77DA771:??771:??%%'F"3F5"IEi66::qvvzz5
1??A%E1affjj!L__A%E1affjj!Lc"BA%E1bggkk1M""2)f0f0s   B2H H3c                   UR                   R                  nUS;   a  [        XX#5      $ US:X  a  [        R                  nO"US:X  a  [        R
                  nO[        S5      eUR                  S:X  d   eUR                  UR                  s=:X  a  S:X  d   e   eUR                   UR                   s=:X  a  UR                   :X  d   e   eUR                  u  pgUR                  S   U:X  d   eUR                  S   U:X  d   e[        R                  " 5       n[        XU5      u  p	n
UR                  R                  UR                  R                  p UR                  (       a$  U" XXyUSUSUR                  R                  U5
        OHUR                  SS9nU" XXyUSUSUR                  R                  U5
        [         R"                  " X5        [        R$                  " X5        g	! [        R$                  " X5        f = f)
zCComputes a += alpha * x @ y.T.conj()

Note: ''a'' will be updated.
r   r   r   r   r"   r   r   r   N)r%   rp   r   r   cgerczgercr1   r*   r+   r   r=   r   r5   r6   r   r&   r   rs   rr   r   s                 r   gercr   <  s   
 GGLLE}5Q""	#||	#||((66Q;;66QVV q     77agg((((((77DA771:??771:??%%'F"3F5"IEi66::qvvzz51??A%E1affjj!Lc"BA%E1bggkk1M""2)f0f0s   A=G& &G>c                   UR                   R                  nUS:X  a  [        R                  nO"US:X  a  [        R                  nO[        S5      eUR                  S:X  d   eUR                  UR                  s=:X  a  S:X  d   e   eUR                   UR                   s=:X  a  UR                   :X  d   e   eUR                  u  pUR                  S   U
:X  d   eUR                  S   U
:X  d   eUR                  (       d  UR                  SS9n[        XR                   5      u  p[        XBR                   5      u  pL[        R                  " 5       n[        R                  " U5      n[        U[        R                   5      (       d  [        U[        R                   5      (       a  [        U[        R                   5      (       d,  [        R"                  " U5      nUR$                  R&                  n[        U[        R                   5      (       d,  [        R"                  " U5      nUR$                  R&                  n[        R(                  " U[        R*                  5        O%[        R(                  " U[        R,                  5        U(       a  [        R.                  nO[        R0                  n[        R                  " 5       n U" XXXR$                  R&                  XR$                  R&                  SXR$                  R&                  S5        [        R(                  " X5        U$ ! [        R(                  " X5        f = f)	z)Computes y = alpha*A @ x + beta * y

    r   r   zComplex dtypes not supportedr"   r   r   r   r   )r%   rp   r   ssbmvdsbmvr1   r*   r+   r   r&   r   r   r=   r   r   r/   r   r   r5   r6   rr   r   r   CUBLAS_FILL_MODE_LOWERCUBLAS_FILL_MODE_UPPER)kr   rI   rf   r   r   r   r%   ru   r   rP   r   r   rU   ry   uplos                   r   sbmvr   _  sD    GGLLE|||	#||67766Q;;66QVV q     77agg((((((77DA771:??771:????FFF&ugg6E$T773ND%%'F%%f-I%&&*T4<<*H*H%..JJu%E

I$--::d#Dyy}}Hff&G&GHff&E&EF,,,,%%'F1V1

Avvzz1vvzz1	& 	f0H 	f0s   A
L( (M c                >   U S:X  d  U [         R                  :X  a  [         R                  n U $ U S:X  d  U [         R                  :X  a  [         R                  n U $ U S:X  d  U [         R                  :X  a  [         R                  n U $ [	        SR                  U 5      5      e)NNTHzinvalid trans (actual: {}))r   rD   r   r   r1   r<   )transs    r   r   r     s    |u 2 22"" L 
#&"4"44""
 L	 
#&"4"44"" L 4;;EBCCr   c                    S nU[         R                  [         R                  4;   aH  U R                  (       a  U R                  S   nX!4$ U R
                  (       a  U R                  S   nSU-
  nX!4$ )Nr   r   )r   rD   r   r   r+   r   )rI   r   lds      r   _decide_ld_and_transr     se    	B##V%7%788??B 9 __BIE9r   c                l    Uc/  U R                   S   nU R                  (       d  U R                  SS9n X4$ )Nr   r   r   )r+   r   r&   )rI   rV   s     r   _change_order_if_necessaryr     s2    
{ggajS!A6Mr   c                
   UR                   UR                   s=:X  a  S:X  d   e   eUR                  UR                  :X  d   eUR                  R                  nUS:X  a  [        R                  nOPUS:X  a  [        R
                  nO9US:X  a  [        R                  nO"US:X  a  [        R                  nO[        S5      e[        U 5      n [        U5      nU [        R                  :X  a  UR                  u  pOUR                  u  pU[        R                  :X  a%  UR                  S   nUR                  S   U
:X  d   eO$UR                  S   nUR                  S   U
:X  d   eUc  [        R                  " X4USS	9nS
nO7UR                   S:X  d   eUR                  X4:X  d   eUR                  U:X  d   e[        XRR                  5      u  p\[        XbR                  5      u  pm[        R                   " 5       n[        R"                  " U5      n[%        U[        R&                  5      (       d  [%        U[        R&                  5      (       a  [%        U[        R&                  5      (       d,  [        R(                  " U5      nUR*                  R,                  n[%        U[        R&                  5      (       d,  [        R(                  " U5      nUR*                  R,                  n[        R.                  " U[        R0                  5        O%[        R.                  " U[        R2                  5        [5        X 5      u  nn [5        X15      u  nnUb  Ub  UR6                  (       af   U" XXXUUR*                  R,                  UUR*                  R,                  UXR*                  R,                  U	5        [        R.                  " X5        U$ UR8                  (       am   U" USU-
  SU -
  XXUR*                  R,                  UUR*                  R,                  UXR*                  R,                  U5        [        R.                  " X5        U$ [;        UU5      u  nn[;        UU5      u  nnUnUR6                  (       d  UR=                  SS9n U" XXXXR*                  R,                  UUR*                  R,                  UUUR*                  R,                  U	5        [        R.                  " X5        UR6                  (       d  [>        R@                  " UU5        U$ ! [        R.                  " X5        f = f! [        R.                  " X5        f = f! [        R.                  " X5        f = f)zComputes out = alpha * op(a) @ op(b) + beta * out

op(a) = a if transa is 'N', op(a) = a.T if transa is 'T',
op(a) = a.T.conj() if transa is 'H'.
op(b) = b if transb is 'N', op(b) = b.T if transb is 'T',
op(b) = b.T.conj() if transb is 'H'.
r"   r   r   r   r   r   r   r   r%   r           r   )!r*   r%   rp   r   sgemmdgemmcgemmzgemmr1   r   rD   r+   r/   r0   r   r   r=   r   r   r   r   r5   r6   rr   r   r   r   r   r   r   r&   r   rs   )r   transbrI   rJ   rg   r   r   r%   ru   r   r   rP   r   r   rU   ry   rV   rY   r   s                      r   gemmr     s    66QVV q     77aggGGLLE|||	#||	#||	#||(( (F (F###ww1ww###GGAJwwqzQGGAJwwqzQ
{jj!uC8xx1}}yyQF"""yyE!!!&ugg6E$T773ND%%'F%%f-I%&&*T4<<*H*H%..JJu%E

I$--::d#Dyy}}Hff&G&GHff&E&EF&q1KC&q1KCK3;9VViVVZZaffjj#x %%f8J9VQZVQ1VVZZaffjj#x %%f8J'3/FAs'3/FAsAHH3H1VViSVVZZh

A	7 	f0q#&J1 %%f8 %%f8 	f0s'   $AT AT0 AU T-0UU#c                x
   UR                   UR                   s=:X  a  S:X  d   e   eUR                  UR                  :X  d   eUR                  R                  nUS:X  a  [        R                  nOPUS:X  a  [        R
                  nO9US:X  a  [        R                  nO"US:X  a  [        R                  nO[        S5      e[        U 5      n [        U5      nU [        R                  :X  a  UR                  u  pOUR                  u  pU[        R                  :X  a  UR                  X4:X  d   eOUR                  X4:X  d   eUc  [        R                  " X4USS9nO7UR                   S:X  d   eUR                  X4:X  d   eUR                  U:X  d   e[        X#R                  5      u  p+[        XCR                  5      u  pL[        R                   " 5       n[        R"                  " U5      n[%        U[        R&                  5      (       d  [%        U[        R&                  5      (       a  [%        U[        R&                  5      (       d,  [        R(                  " U5      nUR*                  R,                  n[%        U[        R&                  5      (       d,  [        R(                  " U5      nUR*                  R,                  n[        R.                  " U[        R0                  5        O%[        R.                  " U[        R2                  5        [5        X05      u  p[5        XQ5      u  nnUb  Ub  UR6                  (       ae   U" XXXUR*                  R,                  XUR*                  R,                  UUR*                  R,                  U	5        [        R.                  " X5        U$ UR8                  (       al   U" USU -
  SU-
  XXR*                  R,                  XUR*                  R,                  UUR*                  R,                  U
5        [        R.                  " X5        U$ [;        X?5      u  p?[;        UU5      u  nnUnUR6                  (       d  UR=                  SS	9n U" XXXUR*                  R,                  UXR*                  R,                  UUR*                  R,                  U	5        [        R.                  " X5        UR6                  (       d  [>        R@                  " UU5        U$ ! [        R.                  " X5        f = f! [        R.                  " X5        f = f! [        R.                  " X5        f = f)
zComputes alpha * op(a) + beta * op(b)

op(a) = a if transa is 'N', op(a) = a.T if transa is 'T',
op(a) = a.T.conj() if transa is 'H'.
op(b) = b if transb is 'N', op(b) = b.T if transb is 'T',
op(b) = b.T.conj() if transb is 'H'.
r"   r   r   r   r   r   r   r   r   )!r*   r%   rp   r   sgeamdgeamcgeamzgeamr1   r   rD   r+   r/   r0   r   r   r=   r   r   r   r   r5   r6   rr   r   r   r   r   r   r   r&   r   rs   )r   r   r   rI   r   rJ   rg   r%   ru   r   rP   r   r   rU   ry   rV   rY   r   s                     r   geamr     s    66QVV q     77aggGGLLE|||	#||	#||	#||(( (F (F###ww1ww###ww1&   ww1&   
{jj!uC8xx1}}yyQF"""yyE!!!&ugg6E$T773ND%%'F%%f-I%&&*T4<<*H*H%..JJu%E

I$--::d#Dyy}}Hff&G&GHff&E&EF&q1KC&q1KCK3;9VVaffjjAFFJJSXX\\1F %%f8J9VQvXqxy&&**AFFJJSXX\\1F %%f8J'/FA'3/FAsAHH3H1VVaffjj#vvzz3

A	7 	f0q#&J/ %%f8 %%f8 	f0s'   ?AS+ 5AT AT! +TT!T9c                   UR                   S:X  d   eSUR                   s=::  a  S::  d   e   eUR                  UR                  :X  d   eUR                  R                  nUS:X  a  [        R                  nOPUS:X  a  [        R
                  nO9US:X  a  [        R                  nO"US:X  a  [        R                  nO[        S5      eU S:X  d  U [        R                  :X  a  [        R                  n OEU S	:X  d  U [        R                  :X  a  [        R                  n O[        S
R                  U 5      5      eUR                  u  pxU [        R                  :X  a%  UR                  US-
  [        U5      -  S-   :  d   eO$UR                  US-
  [        U5      -  S-   :  d   eUc-  UR                   (       a  Sn	OSn	["        R$                  " Xx4XYS9nOJUR                   S:X  d   eUR                  UR                  :X  d   eUR                  UR                  :X  d   e[&        R(                  " 5       n
UR                   (       ap  UR                   (       d  UR+                  SS9nU" U
SU -
  XUR,                  R.                  XR,                  R.                  UUR,                  R.                  U5
        U$ UR0                  (       d  UR+                  SS9nUnUR0                  (       d  UR+                  SS9nU" XXxUR,                  R.                  XrR,                  R.                  UUR,                  R.                  U5
        UR0                  (       d  [2        R4                  " X5        U$ )zfComputes diag(x) @ a or a @ diag(x)

Computes diag(x) @ a if side is 'L', a @ diag(x) if side is 'R'.
r"   r   r   r   r   r   r   LRzinvalid side (actual: {})r   Cr   r   )r*   r%   rp   r   sdgmmddgmmcdgmmzdgmmr1   CUBLAS_SIDE_LEFTCUBLAS_SIDE_RIGHTr,   r<   r+   r.   absr   r/   r0   r   r=   r&   r5   r6   r   r   rs   )siderI   rf   rg   incxr%   ru   r   rP   r   rU   r   s               r   dgmmr  h  s   
 66Q;;!77aggGGLLE|||	#||	#||	#||((s{df555&&	 8 88''4;;DABB77DAv&&&vv!a%3t9,q0000vv!a%3t9,q0000
{??EEjj!u:xx1}}yyAGG###yyAGG###%%'F
S!AVQXqQVVZZFFJJXX\\1	 J S!A  s#AV1Q

DVVZZ	  ""1*Jr   c                   UR                   S:X  d   eUR                  R                  nUS:X  a  [        R                  nOPUS:X  a  [        R
                  nO9US:X  a  [        R                  nO"US:X  a  [        R                  nO[        S5      e[        U 5      n U [        R                  :X  a  UR                  u  pOUR                  u  pUc  [        R                  " X4USS9nSnO7UR                   S:X  d   eUR                  X4:X  d   eUR                  U:X  d   eU(       a  [        R                  n
O[        R                  n
[!        X1R                  5      u  p;[!        XAR                  5      u  pL["        R$                  " 5       n[        R&                  " U5      n[)        U[        R*                  5      (       d  [)        U[        R*                  5      (       a  [)        U[        R*                  5      (       d,  [        R,                  " U5      nUR.                  R0                  n[)        U[        R*                  5      (       d,  [        R,                  " U5      nUR.                  R0                  n[        R2                  " U[        R4                  5        O%[        R2                  " U[        R6                  5        [9        X5      u  p[9        X 5      u  nnUR:                  (       a  UR:                  (       d#  UR=                  S	S
9nSU -
  n UR                  S   n U" USU
-
  XU	XR.                  R0                  UXR.                  R0                  U5        [        R2                  " X5        U$ UR>                  (       d#  UR=                  SS
9nUR                  S   nSU -
  n UnUR>                  (       d  UR=                  SS
9n U" XXU	XR.                  R0                  UXR.                  R0                  U5        [        R2                  " X5        UR>                  (       d  UUS'   U$ ! [        R2                  " X5        f = f! [        R2                  " X5        f = f)a  Computes out := alpha*op1(a)*op2(a) + beta*out

op1(a) = a if trans is 'N', op2(a) = a.T if transa is 'N'
op1(a) = a.T if trans is 'T', op2(a) = a if transa is 'T'
lower specifies  whether  the  upper  or  lower triangular
part  of the  array  out  is to be  referenced
r"   r   r   r   r   r   r   r   r  r   r   r   .) r*   r%   rp   r   ssyrkdsyrkcsyrkzsyrkr1   r   rD   r+   r/   zerosr   r   r   r   r=   r   r   r   r   r5   r6   rr   r   r   r   r   r&   r   )r   rI   rg   r   r   r   r%   ru   rP   r   r   r   r   rU   ry   rV   ldo_r   s                      r   syrkr    sK    66Q;;GGLLE|||	#||	#||	#||((&E"""ww1ww
{jj!uC8xx1}}yyQF"""yyE!!!,,,,&ugg6E$T773ND%%'F%%f-I%&&*T4<<*H*H%..JJu%E

I$--::d#Dyy}}Hff&G&GHff&E&EF%a/JC!#-FC
S!AIE''!*C	5T5QFFJJ88<<. !!&4$ J S!A''!*CIE  s#A	5uFFJJ88<<. !!&4  CHJ% !!&4 !!&4s   !:P. 
6Q	 .Q	Q!r   )F)N      ?r   )Nr   )Nr  r   F),
__future__r   r3   rA   r   r:   r/   r   cupy_backends.cuda.libsr   	cupy.cudar   cupy.linalgr   r   r   r   r`   rh   rl   rd   r   r   r   r   r   r   r   r   rq   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r   r   r   <module>r     s    "       *   
 
WOt%%BB16:::B18
5$*	>1B#1L"1J 1F2j	ZzUp9xTr   