
    3j                        S SK r SrS\ R                  S\ R                  4S jrS\4S\ R                  S\ R                  S\ R                  S	\ R                  S
\ R                  S-  S\S\ R                  4S jjr\ R                  R                  SSS9S\4S\ R                  S\ R                  S\ R                  S	\ R                  S
\ R                  S-  S\S\ R                  4S jj5       r\R                  SS j5       r
g)    N@   packedreturnc                    U R                  [        R                  5      nUS-  nUS-	  S-  n[        R                  " X#/SS9nUR                  " / U R
                  SS QSP76 R                  [        R                  5      $ )zH(..., K//2) int8 storing two uint4 per byte -> (..., K) int8 in [0, 15].      )dimN)totorchint32stackreshapeshapeint8)r   x32lohistackeds        Z/home/wildlama/miniconda3/lib/python3.13/site-packages/comfy_kitchen/backends/eager/awq.py_unpack_uint4_row_majorr      sn    
))EKK
 C	tB
(d	Bkk2(+G??2FLL"-2r255ejjAA    xqweightwscaleswzerosbias
group_sizec                    U R                   nU R                  SUS   5      nUR                   u  pX-  S:w  a  [        SU	 SU 35      eUR                   u  pUS-  U	:w  a  [        SU SU	 35      eUR                  n[	        U5      R                  U5      nUR                  XU-  U5      nUR                  5       R                  S5      nUR                  5       R                  S5      nUS-
  U-  U-   R                  X5      nUR                  U5      UR                  5       -  nUb  UU-   nUR                  " / US	S QU
P76 $ )
a|  AWQ W4A16 GEMV / small-batch GEMM.

Args:
    x: (M, K) or (K,) fp16/bf16 input.
    qweight: (N, K // 2) int8 packed (kitchen-native row-major uint4).
    wscales: (K // group_size, N) fp.
    wzeros: (K // group_size, N) fp.
    bias: (N,) fp or None.
    group_size: quantization group size (default 64).

Returns:
    Output with the input's leading shape and trailing dim N.
r	   r   zK=z not divisible by group_size=   zqweight K//2=z inconsistent with x K=g       @N)	r   r   
ValueErrordtyper   r   viewt	unsqueeze)r   r   r   r   r   r   
orig_shapex2d_mknk_halfcompute_dtypew_uintw_groups	scales_ngzeros_ngw_fpouts                      r   gemv_awq_w4a16r3   "   sA   * J
))B
2
'CIIEB~2aS =j\JKKIAzQ=0GsKLLMMM %W-00?F{{1:oz:H		%%b)Ixxz##B'H^y(8399!?D
&&
$&&(
*CDj;;+
3B+++r   zcomfy_kitchen::gemv_awq_w4a16 )mutates_argsc                 J    SSK Jn  XX#XES.nUR                  SUS9nU" S0 UD6$ )Nr   )registry)r   r   r   r   r   r   r3   )kwargsr4   )comfy_kitchen.registryr7   get_implementation)	r   r   r   r   r   r   r7   r8   impls	            r   _op_gemv_awq_w4a16r<   V   s<     0 wF &&'7&GD>&>r   c                     / U R                   S S QUR                   S   P7n[        R                  " XbR                  U R                  S9$ )Nr	      )r"   device)r   r   emptyr"   r?   )r   r   r   r   r   r   	out_shapes          r   _op_gemv_awq_w4a16_fakerB   i   s=    1!''#2,1a 01I;;yahhGGr   )Nr   )r   _DEFAULT_GROUPTensorr   intr3   library	custom_opr<   register_fakerB   r4   r   r   <module>rI      sG  ( BELL BU\\ B !%$,,||,,\\,, \\,, LL	,,
 ,,
,, ,, \\,,h 8rJ !%$||\\ \\ LL	
 ,,
  \\ K$ !!H "Hr   