
    3jt                        S SK r S SKrS SKJs  Jr  S SKJr  SSKJ	r	J
r
  SSKJr  SSKJr  SSKJr  SSK
Jr  SS	KJrJrJrJrJrJrJr  SS
KJrJrJr  SSKJrJr  SSK J!r!J"r"J#r#  S\RH                  S\
R"                  S\
R"                  S\
R"                  S\
R"                  S\%\&\
R"                  \
R"                  S-  4   4S jr'S\&S\"S\"S\"S-  S\"S-  S\"S-  S\"S-  S\"4S jr(    S"S\S\)\   S\)\   4S jjr*S \*l+        S! r,g)#    N)mm_args   )configir)CppGemmTemplate)CppGroupedGemmTemplatecreate_epilogue_with_attr)	TensorBox)addadd_needs_realized_inputsatenpermuteregister_loweringto_dtypeview)autotune_select_algorithmChoiceCallerExternKernelChoice)use_aten_gemm_kernelsuse_cpp_gemm_template)opsOpsValueVW_tensorpacked_weightx_scalex_zpw_scalereturnc                 >   S n[        S X#U4 5       5      nU(       Ga  [        R                  R                  UR	                  5          [        R                  R                  UR	                  5          -  n[        R                  R                  UUR	                  5       S-   S9n[        R                  " U R                  [        R                  5      SS9n[        R                  R                  UR	                  5          n	X-  U	-  n[        R                  R                  UUR	                  5       S-   S9n
Oa[        R                  " U R                  [        R                  5      SS9n[        R                  R                  UUR	                  5       S-   S9n
UU
U4$ )Nc              3   ^  #    U  H  n[        U[        R                  5      =(       a}    UR                  5       [        R
                  R                  ;   =(       aK    [        UR                  S 5      =(       a.    [        UR                  R                  [        R                  5      v   M     g7f)dataN)

isinstancer   r   get_namer   graph	constantshasattrr#   ConstantBuffer).0items     Z/home/wildlama/miniconda3/lib/python3.13/site-packages/torch/_inductor/mkldnn_lowerings.py	<genexpr>+create_int8_compensation.<locals>.<genexpr>+   sz      *
 -D	 	4& 	:MMOqww000	:DIIv&	: tyy~~r'8'89	: -s   B+B-_x_w_compensnamer   )dim_BMatrixCompens)
allr   r&   r'   r%   add_tensor_constanttorchsumtofloat)r   r   r   r   r   	x_w_scaleuse_int8_fast_compensation_pathx_w_scale_tensorweight_compens_tensorx_zp_tensorweight_compenss              r,   create_int8_compensationr@      su    &*I&) *
 G,* '# 'GGg..01gg 0 0 234 	 GG//'')N: 0 
	 !&		(++ekk*B Jgg''8 5 H; V44!''),== 5 

 !&		(++ekk*B J44!''),== 5 

 	(     r;   input_weight_compo_x_scale_x_zp_w_scale
_x_w_scalec                 t   U (       a.  [         R                  " [         R                  " UU5      U5      nU$ [         R                  " [         R                  " UU5      U5      n[         R                  " U[         R                  " [         R                  " [         R                  " UU5      U5      U5      5      nU$ N)r   submul)r;   rB   rC   rD   rE   rF   rG   temps           r,   'codegen_int8_gemm_template_compensationrM   O   s     'wwGG 
H K9 wwGG 
 wwGGGG    	
 KrA   xwbc           	         U R                  5       n[        U5      S:  a  [        U SUS   /5      n [        U5      n[        R                  (       d  [        R
                  (       d   eU V	s/ s H&  oc  U	O[        R                  R                  U	5      PM(     nn	/ n
[        U [        US   SS/5      US9Gt ppU V	s/ s H  oS LPM     sn	SS [        R                  [        U5      U 5      S.nU /UQnUR                  U V	s/ s H	  oc  M  U	PM     sn	5        [        R                   " U
UU40 UD6  [        U
5      S:w  d   e[#        SU
UU5      u  pUR$                  R$                  n[        U5       Vs/ s H!  n[        R&                  " Xo[(        U4/5      PM#     nn[        R*                  " US   R-                  5       S	9Ul        UUl        [        U5       Vs/ s H%  n[        R2                  R5                  UU   5      PM'     nn[        U5      S:  a@  [        U5       H1  n[        UU   / US S QUU   R                  5       S   P75      UU'   M3     U$ s  sn	f s  sn	f s  sn	f s  snf s  snf )
N   r   r   layoutT)has_biastrans_wepilogue_creatoract_mappinggrouped_gemm)device)get_sizelenr   r   max_autotunemax_autotune_gemmr   ExternKernelrealize_inputr   r   dictfromkeysrangeextendr   add_choicesr   r#   MultiOutputlistMultiOutputLayout
get_devicerU   outputsr   create)rN   rO   rP   attrscalars	algorithmrU   x_sizenum_gemmbiaschoices_kwargsinput_nodesresulttemplate_bufgemm_idxreturn_bufsreturn_tensorss                      r,   grouped_gemm_loweringr|      ss    ZZ\F
6{QR$%1vH&":":::STUST42??#@#@#F	FSTAU"$Gq'!A$A"7GQ 344!$%!4 }}U8_a8	F 'q'K??@&& 	 w<1)	IF ;;##L h'H 	vtX.>-?@'  
 ..k!n6O6O6QRL&LCH?CRxK12?   6{QhH'+x(G&"+G~h7@@B2FG(N8$ (
 ] 	V 5 @"s$   ,-I>I$I)I)8(I. ,I3Tc            !        ^^^^^^ [         R                  R                  (       GaJ  SSKJm  [        [         R                  R                  R                  SSTR                  R                  S9m[        [         R                  R                  R                  R                  SSTR                  R                  S9m[        [         R                  R                  R                  SSTR                  R                  S9m[        [         R                  R                  R                  R                  SSTR                   R                  S9m[         R                  R                  R"                  [         R                  R                  R$                  [         R                  R                  R&                  [         R                  R                  R                  [(        R*                  R,                  [         R                  R                  R.                  /n [1        [         R                  R                  R"                  5      S[2        S[2        S	[2        4U4S
 jj5       n[1        [         R                  R                  R"                  R                  5      S[2        S[2        S[2        S	[2        4U4S jj5       n[1        [         R                  R                  R$                  R                  5      S[2        S[2        S[2        S	[2        4U4S jj5       n[1        [         R                  R                  R                  5       S3S[2        S[2        S[2        4U4S jjj5       n[1        [         R                  R                  R                  R                  5       S3S[2        S[2        S[2        S[2        4U4S jjj5       n[1        [         R                  R                  R&                  5      S[2        S[2        S	[2        4U4S jj5       n[1        [(        R*                  R,                  5      S[2        S[2        S[2        S[2        S[2        S[2        S[2        S[4        S[6        [8           S[8        S[8        S[8        S[4        S [4        S![4        S"[4        4 U4S# jj5       n[1        [         R                  R                  R.                  S S$9S[2        S%[2        S&[2        S	[2        4U4S' jj5       n[1        [         R                  R                  R:                  R                  S S$9[1        [         R                  R                  R:                  R<                  S S$9S[2        S%[2        S&[2        S([2        S	[2        4
U4S) jj5       5       n	[1        [         R                  R                  R                  S S$9 S3S[2        S%[2        S&[2        S*[2        S	[2        4
U4S+ jjj5       n
[1        [         R                  R                  R                  R                  S S$9[1        [         R                  R                  R                  R<                  S S$9 S3S[2        S%[2        S&[2        S*[2        S,[2        S	[2        4U4S- jjj5       5       n[         R                  R>                  (       a  [        [         R                  R@                  RB                  S.STRD                  R                  S9mU RG                  [         R                  R@                  RB                  5        [1        [         R                  R@                  RB                  5      S S/.S[2        S0[2        S1[2        S[2        S -  4U4S2 jjj5       n[I        U 5        g g )4Nr   )	mkldnn_irzmkldnn::_linear_pointwiseF)has_out_variantkernel_creatorzonednn::qlinear_pointwiserN   weightrr   c
                 t   > [         R                  " T
R                  R                  U UUUUUUUUU	5
      5      $ rI   )r   rl   ConvolutionUnary)rN   r   rr   paddingstridedilationgroupsrm   rn   ro   r~   s             r,   convolution_unary5register_onednn_fusion_ops.<locals>.convolution_unary   sJ     ##**11 rA   otherc                 z   > [         R                  " TR                  R                  U UUUUUUUUU	U
UU5      5      $ rI   )r   rl   ConvolutionBinaryrN   r   r   rr   r   r   r   r   binary_attrbinary_alpha
unary_attrunary_scalarsunary_algorithmr~   s                r,   convolution_binary6register_onednn_fusion_ops.<locals>.convolution_binary  sS      ##++22 !# rA   c                 z   > [         R                  " TR                  R                  U UUUUUUUUU	U
UU5      5      $ rI   )r   rl   ConvolutionBinaryInplacer   s                r,   convolution_binary_inplace>register_onednn_fusion_ops.<locals>.convolution_binary_inplace*  sS      ##2299 !# rA   rO   rP   c                   >^^^ U R                  5       n[        U5      S:  a  [        U SUS   /5      n Ub  [        R                  R                  U5      n/ n[        R                  (       d  [        R                  (       ao  [        USS/5      n	[        X	US9Gt pp	[        X`U	5      (       aC  UUU4S jnUS LSTS:X  a  S OUS	.nUb  / S
QUS'   [        R                  " UUUc  X/OXU/40 UD6  [        U5      S:X  d  [        5       (       a>  [        TTTS9nUc  S US'   UR!                  TR"                  " Uc  X/OXU/U40 UD65        UR%                  5       [&        R(                  R*                  ;   d   eSS 0n[-        SUUc  X/OXU/UUS9u  p[        U5      S:  a%  [        U/ US S QUR                  5       S   P75      nU$ )NrR   rS   r   r   rT   c                    > [        U TTTS9$ )Nrn   ro   r	   )bufro   rm   rn   s    r,   rX   Jregister_onednn_fusion_ops.<locals>.linear_unary.<locals>.epilogue_creatorb  s    8w)  rA   TnonerV   rW   rX   )rR   r   r   input_indices)rm   rn   ro   Bc                 X    [         R                  R                  U R                  5          $ rI   r   r&   r'   r%   rN   s    r,   <lambda>Bregister_onednn_fusion_ops.<locals>.linear_unary.<locals>.<lambda>      QWW..qzz|<rA   linear_unaryinput_gen_fnsr\   r]   r   r   r`   ra   r   r^   r_   r   r   r   r   rf   r   rb   appendbindr%   r   r&   r'   r   )rN   rO   rP   rm   rn   ro   rU   rp   rs   transposed_wrt   rX   ru   r   rw   aten_mkldnn_linear_unarys      ```         r,   r   0register_onednn_fusion_ops.<locals>.linear_unaryL  s    ZZ\F6{QR,-}OO11!4*,G""f&>&>&q1a&1.5af.U+A(LAA %&TM#'$(FND8H	F }2;/#//"#)! !	 7|q $9$;$;4IN9"&F3K,11"#)! ! ::<177#4#4444<M 2)!+IF 6{Qf&Ks&KV__5Fr5J&KLMrA   yc                   >^^ U R                  5       n[        U5      S:  a  [        U SUS   /5      n TR                  5       n[        U5      S:  a  [        TSUS   /5      mUb  [        R                  R                  U5      n/ n[        R                  (       d  [        R                  (       ao  [        USS/5      n	[        X	TUS9Gt pp	m[        XPU	5      (       aA  UU4S jnUS LSUS.nUc  / S	QO/ S
QUS'   [        R                  " UUUc  U TU/OU TX#/40 UD6  [        U5      S:X  d  [        5       (       a?  [        TS9nUc  S US'   UR!                  TR"                  " Uc  U TU/OU TX#/U40 UD65        UR%                  5       [&        R(                  R*                  ;   d   eSS 0n[-        SUUc  U TU/OU TX#/UUS9u  p[        U5      S:  a%  [        U/ US S QUR                  5       S   P75      nU$ )NrR   rS   r   r   rT   c                    > [        U TTS9$ )N)r   r	   )r   rm   r   s    r,   rX   Kregister_onednn_fusion_ops.<locals>.linear_binary.<locals>.epilogue_creator  s    8d!LLrA   Tr   )r   rR   r   )   r   rR   r   r   )rm   r   c                 X    [         R                  R                  U R                  5          $ rI   r   r   s    r,   r   Cregister_onednn_fusion_ops.<locals>.linear_binary.<locals>.<lambda>  r   rA   linear_binaryr   r   )rN   r   rO   rP   rm   rU   rp   y_sizers   r   rt   rX   ru   r   rw   aten_mkldnn_linear_binarys    `  `          r,   r   1register_onednn_fusion_ops.<locals>.linear_binary  s#    ZZ\F6{QR,-ZZ\F6{QR,-}OO11!4*,G""f&>&>&q1a&118Qv2.AQ )LAAM %&TM#',<F <=9i,F?+#//%&YAq	Q1L !	 7|q $9$;$;49"&F3K-22%&YAq	Q1L ! ::<177#4#4444<M 2YAq	Q1L+IF 6{Qf&Ks&KV__5Fr5J&KLMrA   c                 v   > [         R                  " TR                  R                  U UUUUUUUUU	U
5      5      $ rI   )r   rl   ConvolutionTransposeUnary)rN   r   rr   r   output_paddingr   r   r   rm   rn   ro   r~   s              r,   convolution_transpose_unary?register_onednn_fusion_ops.<locals>.convolution_transpose_unary  sM     ##33::" rA   w0w1w2w3hxcxreversebatch_sizesmodehidden_size
num_layers
has_biasesbidirectionalbatch_firsttrainc                    > [         R                  " [        R                  TR                  R                  U UUUUUUUUU	U
UUUUU5      5      $ rI   )pytreetree_mapr   rl   MkldnnRnnLayer)rN   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r~   s                   r,   mkldnn_rnn_layer4register_onednn_fusion_ops.<locals>.mkldnn_rnn_layer  sc    & ??  ((//!! rA   )type_promotion_kindr   r   c                 R  > [        U[        R                  5      (       dS  [        U5      [        L d   e[
        R                  R                  [        R                  " U[        R                  S9SS9nUc?  [
        R                  R                  [        R                  " S[        R                  S9SS9n[        U[        R                  5      (       dS  [        U5      [        L d   e[
        R                  R                  [        R                  " U[        R                  S9SS9nUc?  [
        R                  R                  [        R                  " S[        R                  S9SS9n[        R                  " TR                  R                  U UUUUUUUUU	U
UUUUUU5      5      $ )Ndtyper   r0   r   r   w_zp)r$   r   r   typer9   r   r&   r5   r6   tensorfloat32int32intrl   QConvPointWisePT2E)rN   r   r   r   r   r   rr   r   r   r   r   o_inv_scaleo_zero_pointoutput_dtyperm   rn   ro   r~   s                    r,   qconvolution_unary6register_onednn_fusion_ops.<locals>.qconvolution_unary  sk   ( gr||44G}---''55LL>Y 6  |ww22LL%++6V 3  dBLL11DzS(((ww22LLU[[9 3  |ww22LL%++6V 3  ##,,33!  # rA   accumc                 R  > [        U[        R                  5      (       dS  [        U5      [        L d   e[
        R                  R                  [        R                  " U[        R                  S9SS9nUc?  [
        R                  R                  [        R                  " S[        R                  S9SS9n[        U[        R                  5      (       dS  [        U5      [        L d   e[
        R                  R                  [        R                  " U[        R                  S9SS9nUc?  [
        R                  R                  [        R                  " S[        R                  S9SS9nUS:X  au  U[        R                  [        R                  4;   aQ  UR                  5       [        R                  [        R                  4;   a  UR                  5       U:w  a  [        Xn5      n[        R                   " TR"                  R!                  U UUUUUUUUU	U
UUUUUUUUUUU5      5      $ )Nr   r   r0   r   r   r   r7   )r$   r   r   r   r9   r   r&   r5   r6   r   r   r   r   bfloat16	get_dtyper   rl   QConvPointWiseBinaryPT2E)rN   r   r   r   r   r   r   rr   r   r   r   r   r   r   r   accum_scaleaccum_zpr   alphar   r   r   r~   s                         r,   qconvolution_binary7register_onednn_fusion_ops.<locals>.qconvolution_binaryV  s   < gr||44G}---''55LL>Y 6  |ww22LL%++6V 3  dBLL11DzS(((ww22LLU[[9 3  |ww22LL%++6V 3 
 u$ U]]ENN$CCOO%%--)HHOO%5 !5##2299!  !#- rA   r   c                   >^^^^^^^	^
^^^^^^ UR                  5       [        R                  [        R                  4;   d   S5       eU R	                  5       n[        U5      S:  a  [        U SUS   /5      n [        T[        R                  5      (       dT  [        T5      [        L d   e[        R                  R                  [        R                  " T[        R                   S9SS9mOeTR#                  5         [%        S TR	                  5        5       5      (       a  [        T/ 5      m[        TR	                  5       5      S;   d   S	5       eTc?  [        R                  R                  [        R                  " S
[        R&                  S9SS9m[        T[        R                  5      (       dT  [        T5      [(        L d   e[        R                  R                  [        R                  " T[        R&                  S9SS9mOTR#                  5         TR+                  5       S:X  d   S5       eUc?  [        R                  R                  [        R                  " S
[        R&                  S9SS9nTR#                  5         UR#                  5         UR                  5       [        R&                  :w  a  [        [        R,                  R/                  U5      [        R0                  5      (       a  [        R                  R2                  UR5                  5          R7                  [        R&                  5      n[        R                  R                  [        R                  " U[        R&                  S9UR5                  5       S9nTc  S OTR                  5       m/ n[8        R:                  (       d  [8        R<                  (       Ga  [?        XUT	S9Gt npn[        [        R,                  R/                  U5      [        R0                  5      (       Ga\  [        R@                  " [        RB                  " [        R                  R2                  UR5                  5          5      [        R                  R2                  UR5                  5          5      (       a  [E        XU5      (       a  [        R                  R2                  UR5                  5          RG                  5       n[I        UUTTT5      u  mmmUU
UUUUU	UUUUUUU4S jnU R                  5       [        RJ                  [        R                  4;   d   e[L        RN                  " UUTc  U TTUTU/OU TTUTUT/TS LUTc  / SQO/ SQS9  [        U5      S
:X  d  [Q        5       (       aK  [S        TTT	T
TTS9nTc  S US'   URU                  TRV                  " Tc  U TTUTU4OU TTUTUT4U40 UD65        UR5                  5       [        R                  R2                  ;   d   eS S S S S.n[        [        R,                  R/                  T5      [        R0                  5      (       a  S US'   [        [        R,                  R/                  T5      [        R0                  5      (       a  S US'   [Y        SUTc  U TTUTU/OU TTUTUT/UUS9u  nn[        U5      S:  a%  [        U/ US S QUR	                  5       S   P75      nU$ )Nz=Only int8 and e4m3fn weights are supported by oneDNN qlinear.rR   rS   r   r   r0   c              3   *   #    U  H	  oS :H  v   M     g7fr   N r*   r2   s     r,   r-   Dregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.<genexpr>       >+=Cax+=   r   r   x_scale must be 0D or 1Dr   r   r   z(x_zp is incompatible with oneDNN qlinearr   rU   	out_dtypec                 d  >^^^^^	^
^^^^ T[         R                  [         R                  [         R                  [         R                  4;   d   eU R                  5       mTR                  5       mS mT(       a  Tc   eTR                  5       mTR                  5       mTR                  5       m
TR                  5       mS mTb  TR                  5       mUUUUUU
UUUU4
S jn[        R                  " U R                  5       [         R                  UU R                  5       S9nTS:w  a  [        UTTTS9nT[         R                  :X  aL  UR                  5       mUU4S jn[        R                  " UR                  5       TUUR                  5       S9nU$ T[         R                  [         R                  4;   aw  SSKJm  UR                  5       m	UUU	4S jn[        R                  " UR                  5       T[        R                  " U[!        T5      [#        T5      S	9UR                  5       S9nU$ )
Nc           	        >
 T" U 5      n[         R                  " U[        R                  5      nU S   4nS nS nS nT(       d  T" S5      nT" S5      nT" U5      nT" U5      nS nT(       a  Tc   eT" U5      n[	        TUUUUUU5      nT
b}  T" U5      n	T[        R                  [        R
                  4;   d   eT[        R
                  :X  a%  [         R                  " U	[        R                  5      n	[         R                  " X5      nU$ NrS   r   r   r   r6   r   rM   r   r   )indexrB   weight_compens_indexrD   rE   rF   rC   rG   rL   _biasrr   
bias_dtypebias_loaderinput_loaderr;   w_scale_loaderweight_compens_loaderx_scale_loaderx_w_scale_loaderx_zp_loaders             r,   inner_fn]register_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.epilogue_creator.<locals>.inner_fn5  s	   $0$7E %(LL$FE49"I<0'+H$(E'+H#B+9"+=(3B+9:N+O,ABV,WM)-J>'7'C C'C-=>R-S
#J ? % - ( % ( *$D  $/(34H(I'1emmU^^5T'T T'T#-#?,/LL,NE'*wwt';#'KrA   r[   r   r  rangesr   r   c                 B   > T" U 5      n[         R                  " UT5      $ rI   r   r   r   rB   output_cast_loaderr   s     r,   inner_fn_cast_output_to_bf16qregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.epilogue_creator.<locals>.inner_fn_cast_output_to_bf16p      (:5(A'*||E<'H HrA   r   _create_constantsc                   > T" U 5      nT	" SU-  U[         R                  S9u  pB[        R                  " X4-  5      U-   nT
[         R                  :X  a  T	" SS[         R                  S9u  pgOT	" SS[         R                  S9u  pg[        R
                  " [        R                  " XV5      U5      n[        R                  " UT
5      $ Ng      ?r   r      i   r6   r   r   rounduint8minimummaximumr   r   scale
zero_pointrB   	inv_scalevalqminqmaxclampedr  r   requant_input_loaders            r,   inner_fn_requanteregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.epilogue_creator.<locals>.inner_fn_requant  s    (<U(C8I$'%K5==9" 5	 '*ii0A&BZ&O#/5;;#>1B()3emm2&JD$ 2C(,c2&JD +.++ckk#6Ld*S'*||G\'J JrA   r"  r#  r6   r   r   r  int8make_loaderr   	Pointwiserj   r\   r
   get_device_or_errorloweringr  	functoolspartialr9   r   )input_bufferr  
output_bufr  r*  r  r  r  r  r)  r  r  r  r	  r
  ro   rm   rr   r  o_scaler   r   rn   r;   r   r?   r   r:   r   s        @@@@@@@@@@r,   rX   Kregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.epilogue_creator  s   +!MM!NN!KK!JJ	0      (4'?'?'A0>0J0J0L-+/(:#,#88#8/8/D/D/F,)0)<)<)>)0)<)<)>&*&6&6&8&*+*.*:*:*<K(( ((T &(\\#/#:#:#<"'--%-#/#8#8#:	&
  6>)B *D'Y*J
 (5>>91;1G1G1I.I *,'1'E'E'G&2)E'1':':'<	*JN  *)C *ekk5::-FFC3=3I3I3K0K" *,'1'E'E'G&2)2):):$4*/./2</@*"
 (2':':'<	*J  *)rA   )r   r   r   rR         )   r   r   r   rR   r9  r:  rV   rX   r   )output_scaleoutput_zero_pointr   post_op_namepost_op_argspost_op_algorithmrr   c                 X    [         R                  R                  U R                  5          $ rI   r   r   s    r,   r   Cregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.<lambda>  r   rA   c                 X    [         R                  R                  U R                  5          $ rI   r   r   s    r,   r   rC    r   rA   c                 X    [         R                  R                  U R                  5          $ rI   r   r   s    r,   r   rC    r   rA   c                 X    [         R                  R                  U R                  5          $ rI   r   r   s    r,   r   rC    r   rA   )r   r9  r:  r;  c                 X    [         R                  R                  U R                  5          $ rI   r   r   s    r,   r   rC        QWW->->qzz|-LrA   c                 X    [         R                  R                  U R                  5          $ rI   r   r   s    r,   r   rC    rH  rA   qlinear_unaryr   )-r   r6   r.  float8_e4m3fnr\   r]   r   r$   r   r   r   r9   r   r&   r5   r   r   realizer4   r   r   	get_numelInputsKernelunwrap_storage_for_inputr)   r'   r%   r8   r   r^   r_   r   equal
zeros_liker   to_denser@   r  r   rf   r   rb   r   r   r   )rN   r   r   r   r   r   rr   r7  r   r   rm   rn   ro   rU   rp   w_zp_tensorrs   rt   r   rX   ru   r   rw   r  r;   r?   r:   aten_mkldnn_qlinear_unarys    `` ` ```````          @@@@r,   rJ  1register_onednn_fusion_ops.<locals>.qlinear_unary  s   " !**,U=P=P0QQ OQ ZZ\F6{QR,-gr||44G}---''55LL>Y 6  !>7+;+;+=>>> #7B/G7++-.&8T:TT8|
 ww22LL%++6V 3  dBLL11DzS(((ww22LLU[[9 3  >>#q(T*TT(
 |
 ww22LL%++6V 3  OOLLN~~5;;.:88>!!4 4
  gg//@CCEKKPww22LLEKK@t}} 3  "&4>>3CJ*,G""f&>&>&>/6V|0,F} @@F))  (():):4==?)KL))$--/:  ,F}EE ww001G1G1IJSSUH 1 %	7&!|* |* |*| ;;=U[[%**,EEEE#//< GT='4H$wdS!%T!1)9< '92 7|q $9$;$;!(&2!-!%!(&/ <%)F6N-22< GT='4H$wdS	
 ! !))+qww/@/@@@@<<<<	M 88A!! 
 $Ma 88>!!  $Ma 1< GT='4@$wdK+IFA 6{Qf&Ks&KV__5Fr5J&KLMrA   x2c                 J  >^^^^^^^	^
^^^^^^^ ^! U R                  5       nTR                  5       n[        U5      [        U5      :X  d   e[        U5      S:  a(  US;   a"  [        U SUS   /5      n [        TSUS   /5      m[        T[        R
                  5      (       dT  [        T5      [        L d   e[        R                  R                  [        R                  " T[        R                  S9SS9mOeTR                  5         [        S TR                  5        5       5      (       a  [        T/ 5      m[        TR                  5       5      S;   d   S	5       eTc?  [        R                  R                  [        R                  " S
[        R                   S9SS9mUc?  [        R                  R                  [        R                  " S
[        R                   S9SS9n[        T[        R
                  5      (       dT  [        T5      ["        L d   e[        R                  R                  [        R                  " T[        R                   S9SS9mOTR                  5         TR                  5         UR                  5         UR%                  5       [        R                   :w  a  [        [        R&                  R)                  U5      [        R*                  5      (       a  [        R                  R,                  UR/                  5          R1                  [        R                   5      n[        R                  R                  [        R                  " U[        R                   S9UR/                  5       S9nUS:X  a  T
[        R                  [        R2                  4;   aS  TR%                  5       [        R                  [        R2                  4;   a!  TR%                  5       T
:w  a  [5        TT
5      mOTR%                  5       T
:X  d   S5       eTR%                  5       m Tb  TR%                  5       OS m/ n[6        R8                  (       d  [6        R:                  (       Ga  US;   Ga  [=        XTUT
S9Gt nnpm[        [        R&                  R)                  T5      [        R*                  5      (       Ga  [        TR?                  5       R@                  5      S
:X  Gak  [        [        R&                  R)                  U5      [        R*                  5      (       Ga.  [        RB                  " [        RD                  " [        R                  R,                  UR/                  5          5      [        R                  R,                  UR/                  5          5      (       a  [G        UX5      (       a  [        R                  R,                  UR/                  5          nURI                  5       n[K        UUTTT5      u  mmm!UUUU	U
UUUUUUUU UU!U4S jn[L        RN                  " UUTc	  U TTUTUT/O	U TTUTUTT/TS LUTc  / SQO/ SQS9  [        U5      S
:X  d  [Q        5       (       aQ  [S        TT	T
UUUUTTTS9
nTc  S US'   URU                  T"RV                  " Tc	  U TTUTUT4O	U TTUTUTT4U40 UD65        UR/                  5       [        R                  R,                  ;   d   eS S S S.nTb  S US'   [Y        SUTc	  U TTUTUT/O	U TTUTUTT/UUS9u  nn[        URZ                  RZ                  [        R\                  5      (       Ga  US:X  Ga  URZ                  RZ                  R^                  TR?                  5       :X  a  [        R
                  Ra                  [        R\                  " [        Rb                  " [        Rd                  " TTR?                  5       S95      URZ                  RZ                  Rf                  URZ                  RZ                  Rh                  URZ                  RZ                  Rj                  URZ                  RZ                  Rl                  S95      n[        U5      S:  a+  US;   a%  [        U/ US S QUR                  5       S   P75      nU$ ) NrR   )r   r7   rS   r   r   r0   c              3   *   #    U  H	  oS :H  v   M     g7fr   r   r   s     r,   r-   Eregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.<genexpr>  r   r   r   r   r   r   r   r7   zCdtype of accum for qlinear post op sum should be the same as outputr   c                   >^^^^^	^
^^^^^ T[         R                  [         R                  [         R                  [         R                  4;   d   eU R                  5       mTR                  5       mTR                  5       mS mT(       a  Tc   eTR                  5       mTR                  5       mTR                  5       m
TR                  5       mS mTb  TR                  5       mUUUUUU
UUUUUU4S jn[        R                  " U R                  5       [         R                  UU R                  5       S9nTS:w  a  [        UTTTS9nT[         R                  :X  aL  UR                  5       mUU4S jn[        R                  " UR                  5       TUUR                  5       S9nU$ T[         R                  [         R                  4;   a  SSKJm  UR                  5       m	UUU	4S jn[        R                  " UR                  5       [         R                  [        R                  " U[!        T5      [#        T5      S	9UR                  5       S9nU$ )
Nc           	        > T" U 5      nT" U 5      nS nS nS nU S   4nT(       d  T" S5      nT" S5      nT" U5      n[         R                  " U[        R                  5      nT" U5      nS nT(       a  Tc   eT" U5      n[	        TUUUUUU5      n	Tb}  T" U5      n
T[        R                  [        R
                  4;   d   eT[        R
                  :X  a%  [         R                  " U
[        R                  5      n
[         R                  " X5      n	T[        R                  [        R
                  4;   d   eT[        R
                  :X  a%  [         R                  " U[        R                  5      n[         R                  " X5      n	U	$ r   r   )r   rB   _x2rD   rE   rF   r  rC   rG   rL   r  rr   r  r  r  r;   r  r  x2_dtype	x2_loaderr  r	  r
  s              r,   r  ^register_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.epilogue_creator.<locals>.inner_fn|  s\   $0$7E"+E"2C'+H$(E'+H49"I<0#B+9"+=(3B+9:N+O$'LL$FE,ABV,WM)-J>'7'C C'C-=>R-S
#J ? % - ( % ( *$D  $/(34H(I'1emmU^^5T'T T'T#-#?,/LL,NE'*wwt'; $,u~~/N#NN#N'5>>9&)ll3&F#&774#5D#'KrA   r  r   r   c                 B   > T" U 5      n[         R                  " UT5      $ rI   r  r  s     r,   r  rregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.epilogue_creator.<locals>.inner_fn_cast_output_to_bf16  r  rA   r   r  c                   > T" U 5      nT	" SU-  U[         R                  S9u  pB[        R                  " X4-  5      U-   nT
[         R                  :X  a  T	" SS[         R                  S9u  pgOT	" SS[         R                  S9u  pg[        R
                  " [        R                  " XV5      U5      n[        R                  " U[         R                  5      $ r  r  r!  s            r,   r*  fregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.epilogue_creator.<locals>.inner_fn_requant  s    (<U(C8I$'%K5==9" 5	 '*ii0A&BZ&O#/5;;#>1B()3emm2&JD$ 2C(,c2&JD +.++ckk#6Ld*S'*||GU[['I IrA   r,  r-  ) r5  r  r6  r  r*  r  r  r  r  r)  r  r  r^  r  r	  r
  rr   r  r7  r   r   r   r   r   r;   r   r?   rV  r]  r   r:   r   s         @@@@@@@@@@@r,   rX   Lregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.epilogue_creatord  s   +!MM!NN!KK!JJ	0      (4'?'?'A$&NN$4	0>0J0J0L-+/(:#,#88#8/8/D/D/F,)0)<)<)>)0)<)<)>&*&6&6&8&*+*.*:*:*<K.( .( .(` &(\\#/#:#:#<"'--%-#/#8#8#:	&
 &/)B * *(5*9	*J (5>>91;1G1G1I.I *,'1'E'E'G&2)E'1':':'<	*JN  *)C *ekk5::-FFC3=3I3I3K0J" *,'1'E'E'G&+kk)2):):$4*/./2</@*"
 (2':':'<	*J  *)rA   )r   r   r   rR   r9  r:  r;  )   r   r   r   rR   r9  r:  r;  r<  )
r=  r>  r   other_scaleother_zpbinary_post_opr   unary_post_opunary_post_op_argsunary_post_op_algorithmrr   c                 X    [         R                  R                  U R                  5          $ rI   r   r   s    r,   r   Dregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.<lambda>  r   rA   c                 X    [         R                  R                  U R                  5          $ rI   r   r   s    r,   r   rm    r   rA   c                 X    [         R                  R                  U R                  5          $ rI   r   r   s    r,   r   rm    r   rA   )r   r9  r:  c                 X    [         R                  R                  U R                  5          $ rI   r   r   s    r,   r   rm    rH  rA   re  qlinear_binaryr   )r#   rU   )rU   inputsmake_kernel_rendertemplatechoice)7r\   r]   r   r$   r   r   r   r9   r   r&   r5   r6   r   r   rL  r4   r   r   r   rN  rO  r)   r'   r%   r8   r   r   r   r^   r_   r   
get_layoutsizerP  rQ  r   rR  r@   r   rf   r   rb   r   r   r   r#   CppTemplateBufferrU   rl   NonOwningLayoutReinterpretViewrr  rs  rt  ru  )#rN   r   r   r   r   r   rV  rr   r7  r   r   x2_scalex2_zpr   r   r   r   r   rU   rp   x2_sizerS  rs   rt   r   rX   ru   r   rw   r  r;   r?   r]  r:   aten_mkldnn_qlinear_binarys#    `` ` `````    ```           @@@@@r,   rq  2register_onednn_fusion_ops.<locals>.qlinear_binary  sB   6 ZZ\FkkmGv;#g,...6{Q;.#@R,-"r72;/0gr||44G}---''55LL>Y 6  !>7+;+;+=>>> #7B/G7++-.&8T:TT8|ww22LL%++6V 3  |ww22LL%++6V 3  dBLL11DzS(((ww22LLU[[9 3  
 OOLLN~~5;;.:88>!!4 4  gg//@CCEKKPww22LLEKK@t}} 3  e#MMNN$  lln(GG||~5
 &b,7<<>\9 ]9 ||~H-1-=)4J*,G##v'?'?'?[ U F 4;b<40FAb @@F))  DOO-223q8"@@F))  (():):4==?)KL))$--/:  .faGG ww001G1G1IJH'002H
 1 %	7&!G* G* G*R $//< GT='4L$wbRVW!%T!1)9  < '<5 7|q $9$;$;!(&2!- ("#.!&",'4,; <%)F6N.33< GT='4L$wbRVW	
 ! !))+qww/@/@@@@<<<M
 #La 1 < GT='4D$wb$O+IFA 6;;++R-A-ABB5(KK$$++r}}> ,,((!11..Br}}O   &{{//66+1;;+;+;+N+N!'!1!1!:!:%{{//66
 6{Q;.#@f&Ks&KV__5Fr5J&KLMrA   zmkl::_mkl_linearrT   packed_worig_wc          	      v  > / n[         R                  (       d  [         R                  (       aH  [        USS/5      n[	        XUS9Gt pp[        XPU5      (       a  [        R                  " UUXU/SSS/S9  [        U5      S:X  d  [        5       (       a#  UR                  TR                  XU4US US95        UR                  5       [        R                  R                  ;   d   eUR                  5       [        R                  R                  ;   d   eS S	 S
.n	[!        SUXU/UU	S9u  pUb  [#        X5      n
U
$ )Nr   r   rT   TrR   )rW   r   )r   
batch_sizec                 X    [         R                  R                  U R                  5          $ rI   r   r   s    r,   r   Gregister_onednn_fusion_ops.<locals>.mkl_packed_linear.<locals>.<lambda>h      !2!21::<!@rA   c                 X    [         R                  R                  U R                  5          $ rI   r   r   s    r,   r   r  i  r  rA   )r   rR   packed_linearr   )r   r^   r_   r   r   r   r   rf   r]   r   r   r   r%   r   r&   r'   r   r   )rN   r  r  rP   r  rU   rs   r   rt   r   rw   aten_mkl_linears              r,   mkl_packed_linear5register_onednn_fusion_ops.<locals>.mkl_packed_linearC  sA    /1&&&*B*B#*6Aq6#:L293/Q -VEE'33#"&1$(+,a& w<1$(=(?(?NN',,&16Tj -   ((*agg.?.????(AGG,=,==== A@!
 6#&)"/	 = ^FrA   rI   )%r6   _C_has_mkldnn r~   r   r   mkldnn_linear_pointwiseLinearUnaryrl   binaryLinearBinaryonednnqlinear_pointwiseQLinearPointwisePT2EQLinearPointwiseBinaryPT2E_convolution_pointwise_convolution_pointwise_ _convolution_transpose_pointwiser   r   defaultqconv_pointwiser   r   boolrh   r   qconv2d_pointwisebinary_tensorhas_mklmkl_mkl_linearMKLPackedLinearr   r   )cpu_needs_realized_inputsr   r   r   r   r   r   r   r   r   rJ  rq  r  r  r   r   r~  rT  r~   s                @@@@@@r,   register_onednn_fusion_opsr     s   xx#5II..'!$0077	$
  %7II..55'!$1188	%
! %7II..'!$99@@	%
! &8II..55'!$??FF	&
" II33II44II==II..!!))II,,
 	" 
599++BB	C			 	 
D	6 
599++BBII	J			 	 		 
K	B 
599++CCJJ	K			 	 		 
L	B 
599++==	> A	A	A	 A	 
?A	F 
599++==DD	EQU=	=	&=	+4=	9B=	 
F=	~ 
599++LL	M			 	 
N	: 
40088	9&	&	&	 &	 	&	
 &	 &	 &	 &	 c&	 &	 &	 &	 &	  &	 &	  !&	 
:&	P 
599++;;QU	V<	<	 %	<	
 <	 <	 
W<	| 
II..554

 
II..<<RV

Q	Q	 %	Q	
 Q	 Q	 Q	



Q	f 
599++==SW	X l	l	 %	l	
 l	 l	 l	 
Yl	\	 
II..554

 
II..<<RV

, 'T	T	 %	T	
 T	 T	 T	 T	



T	l
 880		))" %(88??	O &,,UYY]]-F-FGuyy}}889 11#1 "1 t#	1 :1f 	"";<c% rA   )NNNN)-r3  r6   torch.utils._pytreeutils_pytreer    torch._inductor.kernel.mm_commonr   r  r   r   codegen.cpp_gemm_templater   !codegen.cpp_grouped_gemm_templater   codegen.cpp_utilsr
   r   r2  r   r   r   r   r   r   r   select_algorithmr   r   r   r   r   virtualizedr   r   r   Tensortupler  r@   rM   rh   r|   _inductor_lowering_functionr  r   rA   r,   <module>r     sc     $ $ 4  6 E 8    
 @ ) )-ll-<<- \\- ,,	-
 \\- LLLL4-`.%).. . o	.
 d?. o. 4. .j 
??I? I?D 59  1r=rA   