
    3jbD                       % S r SSKJr  SSKrSSKrSSKrSSKJrJr  SSKJ	r	  SSK
Jr  SSKJr  SSKrSSKr\R                   " \5      r0 rS$S jrS$S	 jr\" S
S9S%S j5       r\	" SS9 " S S5      5       r " S S\5      r " S S\R2                  5      rS rS rS rS rS&S jrS r S r!S r"S r#S r$\RJ                  RL                  RN                  RP                  \\RJ                  RL                  RR                  RP                  \\RJ                  RL                  RT                  RP                  \ \RJ                  RL                  RV                  RX                  \\RJ                  RL                  RV                  RZ                  \\RJ                  RL                  R\                  RP                  \!\RJ                  RL                  R^                  RP                  \"\RJ                  RL                  R`                  RP                  \#\RJ                  RL                  Rb                  RP                  \$\RJ                  RL                  Rd                  RP                  S 0
r30 r4S \5S!'   S'S" jr6S(S# jr7g))z@Base classes for quantized tensors with typed layout parameters.    )annotationsN)ABCabstractmethod)	dataclass)	lru_cache)Anyc                    U[         U '   g NLAYOUTS)nameclss     S/home/wildlama/miniconda3/lib/python3.13/site-packages/comfy_kitchen/tensor/base.pyregister_layout_classr      s    GDM    c                    [         U    $ r
   r   )r   s    r   get_layout_classr      s    4=r      )maxsizec                     [         R                  R                  5       (       d  g[         R                  R                  5       $ )z1Get CUDA compute capability (SM version), cached.N)torchcudais_availableget_device_capability r   r   get_cuda_capabilityr      s-     ::""$$::++--r   T)frozenc                  p    \ rS rSr% SrS\S'   S\S'   S\S'   S	 rS
 rSS jrSS jr	SS jr
SSS jjrSrg)BaseLayoutParams(   zBase dataclass for layout parameters with common functionality.

Subclasses should define additional fields and override _tensor_fields()
if they have additional tensor fields beyond 'scale'.
torch.Tensorscaletorch.dtype
orig_dtypetuple[int, ...]
orig_shapec                $    U R                  5         g r
   )_validate_tensor_fieldsselfs    r   __post_init__BaseLayoutParams.__post_init__3   s    $$&r   c                    g r
   r   r)   s    r   r(   (BaseLayoutParams._validate_tensor_fields6   s    r   c                    S/$ )zBReturn list of field names that are tensors. Override in subclass.r"   r   r)   s    r   _tensor_fieldsBaseLayoutParams._tensor_fields:   s
    yr   c                   [         R                  " U 5       Vs0 s H"  o"R                  [        XR                  5      _M$     nnU R	                  5        H  nX4   R                  US9X4'   M     [        U 5      " S0 UD6$ s  snf )z/Move all tensor fields to the specified device.devicer   )dataclassesfieldsr   getattrr0   totype)r*   r4   fkwargsfields        r   	to_deviceBaseLayoutParams.to_device>   su    9D9K9KD9QR9QA&&'$//9QR((*E"M,,F,;FM +Dz#F## Ss   )A?c                
   [         R                  " U 5       Vs0 s H"  oR                  [        XR                  5      _M$     nnU R	                  5        H  nX#   R                  5       X#'   M     [        U 5      " S0 UD6$ s  snf )zClone all tensor fields.r   )r5   r6   r   r7   r0   cloner9   )r*   r:   r;   r<   s       r   r@   BaseLayoutParams.cloneE   sp    9D9K9KD9QR9QA&&'$//9QR((*E"M//1FM +Dz#F## Ss   )B c                ,   [         R                  " U 5       Hz  n[        XR                  5      nUR                  U R	                  5       ;   a$  [        XR                  5      R                  XBS9  MZ  [        R                  XR                  U5        M|     g)zCopy tensor fields in-place from src, reusing existing memory.

Args:
    src: Source params to copy from (must be same type).
    non_blocking: If True, use non-blocking copy for tensors.
non_blockingN)r5   r6   r7   r   r0   copy_object__setattr__)r*   srcrD   r<   src_vals        r   	copy_fromBaseLayoutParams.copy_fromL   sj     !''-Ec::.GzzT0022jj)///S""4W= .r   r   N)returnz	list[str])r4   ztorch.devicerL   r   )rL   r   F)rH   r   rD   boolrL   None)__name__
__module____qualname____firstlineno____doc____annotations__r+   r(   r0   r=   r@   rJ   __static_attributes__r   r   r   r   r   (   s>    
 '$$> >r   r   c                      \ rS rSr% SrS\S'   SrS\S'   \\SS j5       5       r	\\SS	 j5       5       r
\\SS
 j5       5       r\\SS j5       5       r\SS j5       r\SS j5       rSrg)QuantizedLayout[   zNBase class for quantization layouts. Subclasses define inner Params dataclass.z	type[Any]ParamsNtuple[int, int] | NoneMIN_SM_VERSIONc                    [         er
   NotImplementedError)r   tensorr;   s      r   quantizeQuantizedLayout.quantizec   
     "!r   c                    [         er
   r^   r   qdataparamss      r   
dequantizeQuantizedLayout.dequantizeh   rc   r   c                    [         er
   r^   )r   qtensors     r   get_plain_tensors!QuantizedLayout.get_plain_tensorsm   rc   r   c                    [         er
   r^   re   s      r   state_dict_tensors"QuantizedLayout.state_dict_tensorsr   rc   r   c                V    U R                   c  g[        5       nUc  gXR                   :  $ )z@Check if fast quantized matmul is supported on current hardware.TF)r\   r   r   caps     r   supports_fast_matmul$QuantizedLayout.supports_fast_matmulw   s2     %!#;((((r   c                h    [        5       nU R                  U R                  UU R                  5       S.$ )z6Return hardware/software requirements for this layout.)layoutmin_sm_versioncurrent_sm_versionfast_matmul_supported)r   rP   r\   rt   rr   s     r   get_requirements QuantizedLayout.get_requirements   s5     "#ll!00"%%(%=%=%?	
 	
r   r   )r`   r!   rL   ztuple[torch.Tensor, Any])rf   r!   rg   r   rL   r!   )rk   QuantizedTensorrL   ztuple[torch.Tensor, ...])rf   r!   rg   r   rL   dict[str, torch.Tensor]rL   rN   )rL   zdict[str, Any])rP   rQ   rR   rS   rT   rU   r\   classmethodr   ra   rh   rl   ro   rt   r{   rV   r   r   r   rX   rX   [   s    X .2N*1"  " "  " "  " "  " ) ) 
 
r   rX   c                     \ rS rSrSr\      SS j5       r      SS jrSS jr\	SS j5       r
\	SS j5       r\	SS j5       r\	SS	 j5       r\	SS
 j5       r\	S S j5       r\	S!S j5       r\      S"S j5       r   S#       S$S jjrS rS rS rS%S jrS&S'S jjrS r\S 5       r\S(S j5       r\S 5       rSrg))r}      z
Quantized tensor with typed layout parameters.

Properties:
    shape: Original (unpadded) shape
    storage_shape: Actual shape of quantized data (may be padded)
    is_padded: True if storage_shape != original shape
c                    [         R                  R                  U UR                  UR                  UR
                  SS9$ )NF)r4   dtyperequires_grad)r   Tensor_make_wrapper_subclassr&   r4   r$   )r   rf   
layout_clsrg   s       r   __new__QuantizedTensor.__new__   s?     ||22<<## 3 
 	
r   c                V    [        U[        5      (       d   eXl        X l        X0l        g r
   )
isinstancestr_qdata_layout_cls_params)r*   rf   r   rg   s       r   __init__QuantizedTensor.__init__   s'     *c****%r   c           	         S[        U R                  5       SU R                   SU R                   SU R                  R
                   S3	$ )NzQuantizedTensor(shape=z, storage_shape=z	, layout=z, dtype=))tupleshapestorage_shaper   r   r$   r)   s    r   __repr__QuantizedTensor.__repr__   sV    $U4::%6$7 8!//0 1&&' (\\,,-Q0	
r   c                @    [        U R                  R                  5      $ r
   )r   r   r   r)   s    r   r   QuantizedTensor.storage_shape   s    T[[&&''r   c                .    U R                   R                  $ )zKThe dtype of the underlying quantized storage (e.g., float8_e4m3fn, uint8).)r   r   r)   s    r   storage_dtypeQuantizedTensor.storage_dtype   s     {{   r   c                .    U R                   R                  $ )zReturn the actual storage size in bytes.

Note: This returns the true memory footprint of the quantized data,
not the logical size based on orig_dtype. For example, an FP8 quantized
tensor with logical dtype bfloat16 returns the FP8 storage size.
)r   nbytesr)   s    r   r   QuantizedTensor.nbytes   s     {{!!!r   c                    [        U R                  5      n[        US5      (       a  UR                  U R                  5      $ U R                  $ )Nget_logical_shape_from_storage)r   r   hasattrr   r   )r*   r   s     r   padded_shapeQuantizedTensor.padded_shape   sC    %d&6&67
:?@@<<T=O=OPP!!!r   c                H    U R                   R                  U R                  :g  $ r
   )r   r&   r   r)   s    r   	is_paddedQuantizedTensor.is_padded   s    ||&&$*;*;;;r   c                ,    [        U R                  5      $ r
   )r   r   r)   s    r   r   QuantizedTensor.layout_cls   s     0 011r   c                    U R                   $ r
   )r   r)   s    r   rg   QuantizedTensor.params   s    ||r   c                P    [        U5      R                  " U40 UD6u  pEU " XBU5      $ r
   )r   ra   )r   r`   r   r;   rf   rg   s         r   
from_floatQuantizedTensor.from_float   s-     )4==fOO5f--r   Nc                    Uc-  U(       a  U R                   R                  5       OU R                   n[        Ub  UOU R                  U R                  U5      $ )ao  Create a copy with optionally modified qdata/params.

Args:
    qdata: New quantized data tensor. If None, uses self._qdata.
    params: New parameters. If None, uses self._params (cloned if clone_params=True).
    clone_params: If True and params is None, clone self._params. Set to False
        when you know params don't need cloning (e.g., they're already new).
)r   r@   r}   r   r   )r*   rf   rg   clone_paramss       r   
_copy_withQuantizedTensor._copy_with   sJ     >-9T\\'')t||F&EDKK
 	
r   c                6    U R                   R                  5       $ r
   )r   data_ptrr)   s    r   r   QuantizedTensor.data_ptr  s    {{##%%r   c                6    U R                   R                  5       $ r
   )r   	is_pinnedr)   s    r   r   QuantizedTensor.is_pinned  s    {{$$&&r   c                6    U R                   R                  5       $ r
   )r   storager)   s    r   r   QuantizedTensor.storage  s    {{""$$r   c                   U R                   R                  5       (       d  U R                   R                  5       OU R                   n[        U R                  SS5      nU(       a  U R                  R
                  S   U R                  R
                  S   4nU R                  R                  XR                  5      nUR                  S S U:w  a  [        S U 5       5      nXE   nUR                  5       $ U R                  R                  XR                  5      nU R                  R
                  nUR                  U:w  a  [        S U 5       5      nXE   $ U$ )N
transposedFr   r      c              3  :   #    U  H  n[        S U5      v   M     g7fr   Nslice.0ss     r   	<genexpr>-QuantizedTensor.dequantize.<locals>.<genexpr>  s     CNquQ{{N   c              3  :   #    U  H  n[        S U5      v   M     g7fr   r   r   s     r   r   r   !  s     515A;;r   )r   is_contiguous
contiguousr7   r   r&   r   rh   r   r   t)r*   rf   is_transposedphysical_shapefullslicesorigs          r   rh   QuantizedTensor.dequantize  s    150I0I0K0K&&(QUQ\Q\  lEB"ll55a8$,,:Q:QRS:TUN??--e\\BDzz"1~/CNCC|668O))%>||&&::555F<r   c                    U R                   R                  U R                  U R                  5      nUR	                  5        VVs0 s H  u  p4U U 3U_M     snn$ s  snnf r
   )r   ro   r   r   items)r*   prefixtensorssuffixr`   s        r   
state_dictQuantizedTensor.state_dict%  sN    //44T[[$,,OBI--/R/6(6(#V+/RRRs   Ac                   S/n0 n0 n[         R                  " U R                  5       H  n[        U R                  UR                  5      n[        U[        R                  5      (       aF  SUR                   3n[        R                  XU5        UR                  U5        XbUR                  '   M  XSUR                  '   M     UU R                  [        U R                  5      UUS.4$ )Nr   _param_)r   params_classtensor_fieldsnon_tensor_fields)r5   r6   r   r7   r   r   r   r   rF   rG   appendr   r9   )r*   inner_tensorsr   r   r<   value	attr_names          r   __tensor_flatten__"QuantizedTensor.__tensor_flatten__+  s    !
 ''5EDLL%**5E%..%ejj\2	""4E:$$Y/,5ejj)05%**- 6 ** .*!2	
 
 	
r   c                    [        US   5      nUS   R                  5        H  u  pVX   XE'   M     US   " S0 UD6n[        U S   US   U5      $ )Nr   r   r   r   r   r   )dictr   r}   )r   ctx
outer_sizeouter_strideparams_kwargs
field_namer   rg   s           r   __tensor_unflatten__$QuantizedTensor.__tensor_unflatten__A  sf    S!456%(%9%?%?%A!J(5(@M% &B ^$5}5}X6L8I6RRr   c                   U=(       d    0 nU(       a  US   OS n[         R                  U5      nUb	  U" XSU5      $ [        U5      nU(       a8  U[        ;   a.  [        U   nUR                   H  n	X;   d  M
  X   " XSU5      s  $    [
        R                  SU SU(       a  UR                  OS S35        U R                  XU5      $ )Nr   zUnhandled op z for unknownz, dequantizing)	_DISPATCH_TABLEget_get_layout_from_args_LAYOUT_DISPATCH_TABLE__mro__loggerdebugrP   _dequant_and_fallback)
r   functypesargsr;   qthandlerr   op_handlers
parent_clss
             r   __torch_dispatch__"QuantizedTensor.__torch_dispatch__L  s    2T!W$ "%%d+2V,, +40
$"8806K(00
,&22VDD 1
 	}TF%z
0C0CW`/aaopq((V<<r   c                6    U" [        U5      0 [        U5      D6$ r
   dequantize_args)r   r   r   r;   s       r   r   %QuantizedTensor._dequant_and_fallbackb  s    _T*Fof.EFFr   )r   r   r   )rf   r!   r   r   rg   r   )rL   r   )rL   r%   )rL   r#   )rL   intr   )rL   type[QuantizedLayout])rL   r   )r`   r!   r   r   rL   r}   )NNT)rf   ztorch.Tensor | Nonerg   z
Any | Noner   rN   rL   r}   )rL   r!   ) )r   r   rL   r~   )r   N)rP   rQ   rR   rS   rT   staticmethodr   r   r   propertyr   r   r   r   r   r   rg   r   r   r   r   r   r   rh   r   r   r   r  r   rV   r   r   r   r}   r}      s    

 
 	
 
		 	 		
 ( ( ! ! " " " " < < 2 2  
 .. .
 
. . &*!!	
"
 
 	

 

0&'%.S
, S S = =* G Gr   r}   c                J   [        U [        5      (       a  U R                  5       $ [        U [        5      (       a/  U R	                  5        VVs0 s H  u  pU[        U5      _M     snn$ [        U [        [        45      (       a  [        U 5      " S U  5       5      $ U $ s  snnf )zRecursively dequantize QuantizedTensors in args/kwargs.

Useful for fallback implementations that need to convert quantized
tensors back to regular tensors before calling the underlying op.
c              3  8   #    U  H  n[        U5      v   M     g 7fr
   r  )r   as     r   r   "dequantize_args.<locals>.<genexpr>t  s     ;d/!,,ds   )	r   r}   rh   r   r   r  listr   r9   )r   kvs      r   r  r  i  s     $((  	D$		26**,?,$!?1%%,??	D4-	(	(Dz;d;;;K @s   Bc                
   UR                  S5      nUR                  S5      nU SS  H  n[        U[        R                  5      (       a  UnM&  [        U[        R                  5      (       a  UnMI  [        U[
        5      (       d  M`  [        R                  " [        5         [        R                  " U5      nSSS5        M     [        U[
        5      (       a  [        R                  " U5      nX#4$ ! , (       d  f       M  = f)z.Extract device and dtype from .to() arguments.r4   r   r   N)	r   r   r   r4   r   r   
contextlibsuppress	Exception)r   r;   r4   r   args        r   _parse_to_argsr  z  s    ZZ!FJJwEABxc5<<((FU[[))ES!!$$Y/c* 0/  &#f%=	 0/s   "C33
D	c                P    U R                  U R                  R                  5       S9$ N)rf   )r   r   detachr   r   r;   s      r   _handle_detachr    s     ==ryy//1=22r   c                P    U R                  U R                  R                  5       S9$ r  )r   r   r@   r  s      r   _handle_cloner    s    ==ryy0=11r   c                2   [        X5      u  pEUS L=(       a    X@R                  R                  :g  nUS L=(       a    XPR                  R                  :g  nU(       d  U(       d	  U(       d  U $ U(       a5  U R                  R                  US9nU R                  R                  U5      n	OGU(       a  U R                  R                  5       OU R                  nU R                  R                  5       n	U(       a  [        R                  " XS9n	U R                  XSS9$ )Nr3   r$   Frf   rg   r   )r  r   r4   r   r$   r8   r=   r@   r5   replacer   )
r   r   r;   
force_copytarget_devicetarget_dtypeneeds_deviceneeds_dtype	new_qdata
new_paramss
             r   
_handle_tor+    s    "0">M ,R))BRBR1RLd*T|zz?T?T/TKJ	IILLL6	ZZ))-8
)3BIIOO%	ZZ%%'
 ((M
==y%=PPr   c                    [        XUSS9$ )NT)r$  )r+  r  s      r   _handle_to_copyr-    s    b488r   c                    U R                   R                  5       (       a  U $ U R                  U R                   R                  5       S9$ r  )r   r   r   r   r  s      r   _handle_contiguousr/    s8    	yy  	==ryy335=66r   c                6    U R                   R                  5       $ r
   )r   r   r  s      r   _handle_is_contiguousr1    s    99""$$r   c                D   US   US   pC[        U[        5      (       d"  [        S[        U5      R                   S35      eUR
                  UR
                  :w  a%  [        SUR
                   SUR
                   35      eUR                  R                  nUR                  S[        U5      S:  5      nUR                  R                  UR                  US	9  UR                  R                  UR                  US	9  [        R                  " UR                  US
9Ul        U$ )Nr   r   zCannot copy z to QuantizedTensorzLayout mismatch: z vs rD      rC   r!  )r   r}   	TypeErrorr9   rP   r   r   r$   r   lenr   rE   rJ   r5   r#  )r   r   r;   dstrH   dst_orig_dtyperD   s          r   _handle_copy_r8    s    AwQc?++,tCy'9'9&::MNOO
#//)+COO+<D@QRSS[[++N::nc$i1n=LJJSZZl;KK#++LA%%ckknMCKJr   c                0   UR                  SS 5      nUR                  S5      n[        R                  " U R                  US9nU R
                  R                  5       nUb  UR                  U5      nUb  [        R                  " XcS9nU R                  XVSS9$ )Nr   r4   r3   r!  Fr"  )popr   r   
empty_liker   r   r@   r=   r5   r#  r   )r   r   r;   r&  r%  r)  r*  s          r   _handle_empty_liker<    s    ::gt,LJJx(M  =AI!!#J ))-8
 ((M
==y%=PPr   c                    g)NTr   r  s      r   <lambda>r>    s    W[r   z+dict[Any, dict[type[QuantizedLayout], Any]]r   c                   ^ ^ UU 4S jnU$ )zDecorator to register a layout-specific operation handler.

Args:
    torch_op: PyTorch operation (e.g., torch.ops.aten.linear.default)
    layout_cls: Layout class (e.g., TensorCoreFP8Layout)
c                F   > T[         ;  a	  0 [         T'   U [         T   T'   U $ r
   )r   )handler_funcr   torch_ops    r   	decorator%register_layout_op.<locals>.decorator  s-    11/1"8,7Cx(4r   r   )rB  r   rC  s   `` r   register_layout_oprE    s    
 r   c                   U  H  n[        U[        5      (       a  [        UR                  5      s  $ [        U[        [
        45      (       d  ML  U H1  n[        U[        5      (       d  M  [        UR                  5      s  s  $    M     g)z.Extract layout class from operation arguments.N)r   r}   r   r   r  r   )r   r  items      r   r   r     sf    c?++#COO44dE]++dO44+D,<,<== 	  r   )r   r   )rL   r[   rM   )rB  r   r   r  )rL   ztype[QuantizedLayout] | None)8rT   
__future__r   r  r5   loggingabcr   r   r   	functoolsr   typingr   r   torch._dynamo	getLoggerrP   r   r   r   r   r   r   rX   r   r}   r  r  r  r  r+  r-  r/  r1  r8  r<  opsatenr  defaultr@   _to_copyr8   dtype_layoutr   r   r   rE   r;  !_has_compatible_shallow_copy_typer   r   rU   rE  r   r   r   r   <module>rU     s   F "    # !    			8	$
 1. . $/> /> />d/
c /
dWGell WGx""32Q,97% Q  
IINN!!>	IINN  -	IINN##_	IINN""J	IINNZ	IINN%%'9	IINN  ((*?	IINN  -	IINN%%'9	IINN44<<>[ GI C H	r   