
    3j                        S SK r S SKJr  S SKrS SKJr  SSKJrJr  SSK	J
r
Jr  SSKJr  SSKJrJr  SS	KJr  SS
KJrJrJr  SSKJrJr  SSKJr  \ R6                  " \5      r\" \R<                  SSS9r\" \R@                  RB                  RD                  SS\RF                  S9r$\R@                  RB                  r!\R@                  RJ                  r%\R@                  RL                  r&SS jr'SS jr(g)    N)Any)mm_args   )configlowering)CppGemmTemplateCppWoqInt4GemmTemplate)create_epilogue_with_attr)expandregister_lowering)WeightInt4PackMatmul)autotune_select_algorithmExternKernelChoicerealize_inputs)use_aten_gemm_kernelsuse_cpp_gemm_template)Vzat::_weight_int8pack_mmF)has_out_variantz*at::native::_weight_int4pack_mm_cpu_tensor)r   kernel_creatorc                  b   [         R                  " [        R                  [        R
                  [        R                  /5        [         R                  " [        R                  5        [         R                  " [        R
                  5        [         R                  " [        R                  5        g N)r   add_needs_realized_inputs	quantized
max_pool2d
_quantized$wrapped_fbgemm_pack_gemm_matrix_fp16!wrapped_fbgemm_linear_fp16_weightmake_fallback     ]/home/wildlama/miniconda3/lib/python3.13/site-packages/torch/_inductor/quantized_lowerings.pyregister_quantized_opsr"   '   sn    &&  ;;88	
 9//0:JJK:GGHr    c                      [        [        R                  S S9S S.S[        R                  S[        R                  S[        R                  S[
        S[
        4
S jj5       n [        [        R                  S S9S S.S[        R                  S[        R                  S	[        S
[        R                  S[
        S[
        4S jj5       n[        R                  " [        R                  5        [        R                  " [        R                  5        g )N)type_promotion_kind)layoutinputweightscaler%   returnc                  ^^ [        XTSS9u      nmpVUR                  5       [        R                  [        R                  [        R
                  4;   a"  UR                  5       [        R                  :X  d   eTn[        5       (       a  [        R                  XVT4U5      /O/ nS[        R                  S[        4UU4S jjn	[        XuUSS9(       a  [        R                  " UUXVT/SU	S9  [        SXUT/U5      u  pU
$ )	NT)r%   mat2_transposedbufr)   c           
      T   > [        U S[        [        TTR                  5      5      S9$ )Nmul)other)r
   r   r   size)r,   r%   r(   s    r!   _mul_epilogue?register_woq_mm_ops.<locals>.int8pack_mm.<locals>._mul_epilogueO   s'    ,U.v{{1K"L r    )r+   )trans_wepilogue_creator_weight_int8pack_mm)r   	get_dtypetorchbfloat16float16floatint8r   aten__weight_int8pack_mmbindTensorr   r   r   add_choicesr   )r&   r'   r(   r%   _mat1mat2aten_layoutchoicesr1   nodes     ``       r!   int8pack_mm(register_woq_mm_ops.<locals>.int8pack_mm5   s    '.&$'
#1a NN LL EJJ.	
/ 
 %&& &**D+>LM 		u|| 	 	 	
 !D$O''U#!. ,!74,?
 r    
qGroupSizeqScaleAndZerosc          	      `   [        XUSSS9u      pTpgUR                  5       [        R                  [        R                  [        R
                  4;   a"  UR                  5       [        R                  :X  d   e[        R                  R                  [        R                  " U[        R                  S9S S9nUn	[        5       (       a  [        R                  XgX4U	5      /O/ n
[        R                   (       d  [        R"                  (       aR  [%        U	UUSSUS9(       a?  UR'                  5       R)                  5       (       a  [*        U   R-                  U
U	XgX/5        S[        R.                  R0                  R2                  S[        R4                  4S jnUS	 S
.n[7        SU
XgX/U	US9u  pU$ )NT)r%   use_4x2_dimr+   )dtype)name)r+   is_woq_int4q_group_sizexr)   c                     U R                  5       R                  5       (       d   eU R                  5       nU R                  5       n[        R
                  " SSU[        R                  US9$ )Nr      )rL   device)
get_layoutis_contiguousget_size
get_devicer7   randintuint8)rP   shaperS   s      r!   get_example_weightHregister_woq_mm_ops.<locals>.int4pack_mm_cpu.<locals>.get_example_weight   sN    <<>//1111JJLE\\^F==Cekk&QQr    c                 X    [         R                  R                  U R                  5          $ r   )r   graph	constantsget_name)rP   s    r!   <lambda>>register_woq_mm_ops.<locals>.int4pack_mm_cpu.<locals>.<lambda>   s    **1::<8r    )r      _weight_int4pack_mm_for_cpu)input_gen_fns)r   r6   r7   r8   r9   r:   rY   r   r^   add_tensor_constanttensorint64r   aten__weight_int4pack_mm_cpur=   r   max_autotunemax_autotune_gemmr   rT   rU   r	   r?   	_inductorirIRNoder>   r   )r&   r'   rH   rI   r%   r@   rA   rB   
group_sizerC   rD   r[   re   rE   s                 r!   int4pack_mm_cpu,register_woq_mm_ops.<locals>.int4pack_mm_cpub   s    '.&dD'
#1a NN LL EKK/	
0 WW00LL5;;7d 1 

  %&&	 -11<k  	   F$<$<% $ ' !//11 #:.::Z8	R%//"4"4";"; 	R 	R "8

 ,)4'
 r    )r   atenr5   r7   r>   r   rd   intr   r   _dyn_quant_matmul_4bit_dyn_quant_pack_4bit_weight)rF   rp   s     r!   register_woq_mm_opsrv   4   s    t//TJ *||** ||*
 * 
* K*X t77TR E||EE E 	E E 
E SEN 46674;;<r    )r)   N))loggingtypingr   r7    torch._inductor.kernel.mm_commonr    r   r   codegen.cpp_gemm_templater   r	   codegen.cpp_utilsr
   r   r   	mkldnn_irr   select_algorithmr   r   r   utilsr   r   virtualizedr   	getLogger__name__logr5   r<   opsr   int4mm_packed_weight_cpucreateri   r   rr   r"   rv   r   r    r!   <module>r      s       4  N 8 / + 
 @  !-	8%   2	II000'..	   II	YY!!
yy~~
Iw=r    