
    3j!                       S r SSKJr  SSKrSSKJr  SSKJr  SSKrSSK	r
SSKJr  SSKJrJrJrJr  \(       a  SS	KJr  \R&                  " \5      r " S
 S\5      r  S             SS jjrSS jr\" \R2                  R4                  R6                  R8                  \5      S 5       r\" \R2                  R4                  R<                  R8                  \5      S 5       r\" \R2                  R4                  R@                  R8                  \5      S 5       r!\" \R2                  R4                  RD                  R8                  \5      S 5       r#g)zDMXFP8 (Microscaling FP8) block quantization layout for tensor cores.    )annotationsN)	dataclass)TYPE_CHECKING)roundup   )BaseLayoutParamsQuantizedLayoutdequantize_argsregister_layout_opQuantizedTensorc                      \ rS rSrSrSr\" SS9 " S S\5      5       r\	    SS j5       r
\	SS	 j5       r\	SS
 j5       r\	SS j5       r\	SS j5       r\	SS j5       r\	SS j5       rSrg)TensorCoreMXFP8Layout   ah  MXFP8 block quantization with E8M0 (power-of-2) block scaling.

MXFP8 uses:
- FP8 E4M3 data (no packing)
- E8M0 block scales (pure power-of-2 exponents)
- Block size of 32

Auto-pads to 32x32 alignment for cuBLAS compatibility.

Note:
    Requires SM >= 10.0 (Blackwell) for hardware-accelerated matmul.
    Shape operations (view, reshape) are not supported.
)
   r   T)frozenc                  8    \ rS rSr% SrSrS\S'   S
S jrS rSr	g	)TensorCoreMXFP8Layout.Params&   zMXFP8 layout parameters.

Inherits scale, orig_dtype, orig_shape from BaseLayoutParams.
scale contains the E8M0 per-block scaling factors.
Fbool
transposedc                    S/$ )Nscale selfs    T/home/wildlama/miniconda3/lib/python3.13/site-packages/comfy_kitchen/tensor/mxfp8.py_tensor_fields+TensorCoreMXFP8Layout.Params._tensor_fields/   s
    9    c                    g Nr   r   s    r   _validate_tensor_fields4TensorCoreMXFP8Layout.Params._validate_tensor_fields2   s    r    r   N)returnz	list[str])
__name__
__module____qualname____firstlineno____doc__r   __annotations__r   r#   __static_attributes__r   r    r   Paramsr   &   s    	
 !
D 		r    r-   c                $   UR                  5       S:w  a  [        SUR                  5        S35      eUR                  n[        UR                  5      nU R                  U5      nXT:g  n[        R                  " XS9u  pxU R                  UUUS9n	Xy4$ )N   zMXFP8 requires 2D tensor, got D)pad_32x)r   
orig_dtype
orig_shape)	dim
ValueErrordtypetupleshapeget_padded_shapeckquantize_mxfp8r-   )
clstensorkwargsr2   r3   padded_shapeneeds_paddingqdatablock_scaleparamss
             r   quantizeTensorCoreMXFP8Layout.quantize5   s     ::<1=fjjl^1MNN\\
6<<(
++J7$2..vM!!  

 }r    c                X    [         R                  " XR                  UR                  5      $ r"   )r:   dequantize_mxfp8r   r2   r<   rA   rC   s      r   
dequantize TensorCoreMXFP8Layout.dequantizeM   s    ""5,,8I8IJJr    c                F    UR                   UR                  R                  4$ r"   )_qdata_paramsr   )r<   qtensors     r   get_plain_tensors'TensorCoreMXFP8Layout.get_plain_tensorsQ   s    ~~w4444r    c                    XR                   S.$ )N) _scale)r   rH   s      r   state_dict_tensors(TensorCoreMXFP8Layout.state_dict_tensorsU   s    \\22r    c                    [        U5      S:w  a  [        S[        U5       S35      eUu  p#[        US5      [        US5      4$ )Nr/   zMXFP8 requires 2D shape, got r0       )lenr5   r   )r<   r3   rowscolss       r   r9   &TensorCoreMXFP8Layout.get_padded_shapeY   sH    z?a<S_<MQOPP
b!74#455r    c                $    U R                  U5      $ r"   )r9   )r<   r3   s     r   get_storage_shape'TensorCoreMXFP8Layout.get_storage_shape`   s    ##J//r    c                    U$ r"   r   )r<   storage_shapes     r   get_logical_shape_from_storage4TensorCoreMXFP8Layout.get_logical_shape_from_storaged   s    r    r   N)r=   torch.Tensorr%   ztuple[torch.Tensor, Params])rA   rc   rC   r-   r%   rc   )rN   r   r%   z!tuple[torch.Tensor, torch.Tensor])rA   rc   rC   r-   r%   zdict[str, torch.Tensor])r3   tuple[int, ...]r%   rd   )r`   rd   r%   rd   )r&   r'   r(   r)   r*   MIN_SM_VERSIONr   r   r-   classmethodrD   rI   rO   rT   r9   r]   ra   r,   r   r    r   r   r      s     Nd!    
%	 . K K 5 5 3 3 6 6 0 0  r    r   c           	     2    [         R                  " XUUUUS9$ )z@MXFP8 scaled matmul: computes a @ b.T + bias (linear semantics).)block_scale_ablock_scale_bbias	out_dtype)r:   scaled_mm_mxfp8)a_qdatab_qdatascale_ascale_brj   rk   s         r   _mxfp8_scaled_mmrq   i   s&      r    c                h    U R                   S   U:w  d  U R                   S   U:w  a  U SU2SU24   $ U $ )z7Slice padded matmul output back to original dimensions.r   r   N)r8   )resultorig_morig_ns      r   _slice_to_original_shaperv   {   s>    ||A& FLLOv$=gvgww&''Mr    c                   SSK Jn  US   n[        XC5      (       d4  [        R                  R
                  R                  R                  " U0 UD6$ UR                  R                  n[        R                  UR                  R                  US   US   4UR                  R                  UR                  R                  (       + S9nU" UR                  SU5      $ )z2Handle transpose as a logical flag flip for MXFP8.r   r   r   )r2   r3   r   r   r   )baser   
isinstancetorchopsatentdefaultrM   r3   r   r-   r2   r   r   rL   )qtargsr>   r   input_tensor	old_shape
new_paramss          r   _handle_mxfp8_transposer      s     &7Ll44yy~~''888$$//I&--''22aL)A,/""((#++666	 . J <..0GTTr    c                   SSK Jn  US   US   pT[        XC5      (       a  [        XS5      (       d  [        R                  " [        U5      6 $ UR                  R                  5       S:w  a  [        R                  " [        U5      6 $ [        UR                  SS5      n[        UR                  SS5      nU(       d  U(       d  [        R                  " [        U5      6 $ [        R                  U5      u  p[        R                  U5      u  pUR                  SUR                  R                  5      n [        XXUS9n[        XR                  R                   S   UR                  R                   S   5      $ ! ["        [$        4 a>  n[&        R)                  S	U 35        [        R                  " [        U5      6 s S
nA$ S
nAff = f)zAMXFP8 mm: requires b to be logically transposed (from .t() call).r   r   r   r/   r   Frk   )rk   zMXFP8 mm failed: N)rx   r   ry   rz   mmr
   rL   r4   getattrrM   r   rO   getr2   rq   rv   r3   RuntimeError	TypeErrorloggerwarning)r   r   r>   r   aba_transposedb_transposedrm   ro   rn   rp   rk   rs   es                  r   _handle_mxfp8_mmr      s_    &7DGqq**z!/M/Mxx.//xx||~xx.//199lE:L199lE:L<xx.//,>>qAG,>>qAG

;		(<(<=I0!'GPYZ'		0D0DQ0GI]I]^_I`aa)$ 0*1#./xx.//0s   3AE8 8G3G;GGc                   SSK Jn  US   US   US   pen[        XS5      (       a  [        Xc5      (       d  [        R                  " [        XEU45      6 $ UR                  R                  5       S:w  a  [        R                  " [        XEU45      6 $ [        UR                  SS5      n[        UR                  SS5      nU(       d  U(       d  [        R                  " [        XEU45      6 $ [        R                  U5      u  p[        R                  U5      u  pUR                  R                  n [        XXXM5      nUR                  R                  S   nUR                  R                  S   n[        XU5      $ ! [         ["        4 a@  n[$        R'                  SU 35        [        R                  " [        XEU45      6 s SnA$ SnAff = f)	zJMXFP8 addmm: bias + input @ weight.T (decomposed from F.linear with bias).r   r   r   r/   r   FzMXFP8 addmm failed: N)rx   r   ry   rz   addmmr
   rL   r4   r   rM   r   rO   r2   rq   r3   rv   r   r   r   r   )r   r   r>   r   rj   mat1mat2input_transposedweight_transposedinput_qdataro   weight_qdatarp   rk   rs   rt   ru   r   s                     r   _handle_mxfp8_addmmr      s    &AwQaDt--*T2S2S{{OT,>?@@{{A{{OT,>?@@t||\5AlEB0{{OT,>?@@0BB4HK1CCDIL''IA!+Wt_((+((+'??)$ A-aS12{{OT,>?@@As   .A
E9 9G		5G>G	G	c                j   SSK Jn  US   US   pT[        U5      S:  a  US   OSn[        XC5      (       a  [        XS5      (       d2  [        R
                  R                  R                  " [        XEU45      6 $ UR                  R                  5       S:w  a2  [        R
                  R                  R                  " [        XEU45      6 $ [        UR                  SS5      (       d  [        UR                  SS5      (       a2  [        R
                  R                  R                  " [        XEU45      6 $ [        R                  U5      u  px[        R                  U5      u  pUR                  SUR                  R                   5      n [#        XyXXk5      n[%        XR                  R&                  S   UR                  R&                  S   5      $ ! [(        [*        4 aT  n[,        R/                  S	U 35        [        R
                  R                  R                  " [        XEU45      6 s SnA$ SnAff = f)
z&MXFP8 linear: input @ weight.T + bias.r   r   r   r/   Nr   Frk   zMXFP8 linear failed: )rx   r   rX   ry   rz   nn
functionallinearr
   rL   r4   r   rM   r   rO   r   r2   rq   rv   r3   r   r   r   r   )r   r   r>   r   r   weightrj   r   ro   r   rp   rk   rs   r   s                 r   _handle_mxfp8_linearr      s    &7DG&$i!m47D|55*V:]:]xx""))?LRV;W+XYY A%xx""))?LRV;W+XYY|##\599WV^^Uach=i=ixx""))?LRV;W+XYY0BB<PK1CCFKL

;(<(<(G(GHIZ!+Wt_'0D0D0O0OPQ0RTZTbTbTmTmnoTpqq)$ Z.qc23xx""))?LRV;W+XYYZs    AG H2A	H-'H2-H2)NN)rm   rc   rn   rc   ro   rc   rp   rc   rj   ztorch.Tensor | Nonerk   ztorch.dtype | Noner%   rc   )rs   rc   rt   intru   r   r%   rc   )$r*   
__future__r   loggingdataclassesr   typingr   rz   comfy_kitchenr:   comfy_kitchen.float_utilsr   rx   r   r	   r
   r   r   	getLoggerr&   r   r   rq   rv   r{   r|   r}   r~   r   r   r   r   r   r   r   r   r    r   <module>r      sf   J "  !     - X X%			8	$QO Qr !%$(  	
  " $ EIINN$$,,.CDU EU$ EIINN%%--/DE0 F0: EIINN((002GHA IA> EIINN))113HIZ JZr    