
    
3jiR                     v   S r SSKrSSKJr  SSKJs  Jr   SSKrSSKJ	r
  SSKJr  SSKJr  SSKJr  SSKJrJrJr  SSKrSSKrSSKrSSKJr  SS	K J!r!  \RD                   S0S
\
RF                  S\
RF                  S\
RF                  S\
RF                  S\
RF                  4
S jj5       r$\RD                   S0S
\
RF                  S\
RF                  S\
RF                  S\
RF                  S\
RF                  4
S jj5       r%\RD                  S\
RF                  S
\
RF                  S\
RF                  4S j5       r&\RD                  S\
RF                  S
\
RF                  S\
RF                  S\
RF                  4S j5       r'\RP                  " S V VVs/ s H)  n S  H  nS  H  n\RS                  XS.US9PM     M!     M+     snnn SS/S9" \$5      r*\RP                  " S V Vs/ s H  n S  H  n\RS                  S
U 0US9PM     M!     snn SS/S9" \%5      r+\RP                  " S V Vs/ s H  n S  H  n\RS                  S
U 0US9PM     M!     snn S/SS /S!9" \&5      r,\RP                  " S V Vs/ s H  n S  H  n\RS                  S
U 0US9PM     M!     snn SS"/S9" \'5      r-S1S# jr.S1S$ jr/S2S% jr0 " S& S'\Rb                  5      r2 " S( S)\Rb                  5      r3 " S* S+\Rb                  5      r4S3S, jr5S4S- jr6 " S. S/\!Rn                  5      r8g! \ a  r\" S5      \eSrCff = fs  snnn f s  snn f s  snn f s  snn f )5a  Fused norm+SiLU Triton kernels for VAE decode/encode.

Two architectures are covered, auto-detected by build_object_patches:
- Wan video VAEs: channel-dim RMSNorm (F.normalize(x, dim=1) * sqrt(C) * gamma)
  followed by SiLU, wired as Sequential pairs.
- KL image VAEs (Flux2, SDXL, SD1.5, ...): GroupNorm followed by a shared swish
  module inside ResnetBlock, plus the GN-only norm_out heads.

The eager chains make 4-5 full memory passes per call; the fused kernels do one
read+write with fp32 accumulation. The GroupNorm kernel requires channels_last activations, which is
also what makes channels_last conv layout viable for KL VAEs at all: PyTorch's
own channels_last GroupNorm kernel is ~4x slower than NCHW, ours is ~4x faster.
    NzFPatchTritonVAE requires triton (pip install triton, or triton-windows))RMS_norm)ResnetBlock)	PixelNorm)ResnetBlock3DEncoderDecoder)CallbacksMP)ioBLOCK_SBLOCK_C	HAS_GAMMASILU
EPS_INSIDEc           	         [         R                  " S5      n[         R                  " S5      nX-  [         R                  " SU	5      -   nUU:  nXR                  [         R                  5      U-  -   n[         R
                  " U	/[         R                  S9n[        SX:5       H  nU[         R                  " SU
5      -   nUU:  nUUS S 2S 4   R                  [         R                  5      U-  -   US S S 24   -   n[         R                  " UUS S 2S 4   US S S 24   -  SS9R                  [         R                  5      nU[         R                  " UU-  SS9-  nM     U(       a   U[         R                  " UU-  U-   5      -  nO.U[         R                  " [         R                  " U5      U5      -  nX/R                  [         R                  5      U-  -   n[        SX:5       GH  nU[         R                  " SU
5      -   nUU:  nUS S 2S 4   US S S 24   -  nUUS S 2S 4   R                  [         R                  5      U-  -   US S S 24   -   n[         R                  " UUSS9R                  [         R                  5      nUUS S S 24   -  nU(       aB  [         R                  " UU-   USS9R                  [         R                  5      nUUS S 2S 4   -  nU(       a  U[         R                  " U5      -  n[         R                  " UUS S 2S 4   R                  [         R                  5      U-  -   US S S 24   -   UR                  UR                  R                  5      US9  GM     g )Nr      dtype        maskotheraxisr   )tl
program_idarangetoint64zerosfloat32rangeloadsumsqrtmaximumsigmoidstorer   
element_ty)x_ptrg_ptrout_ptrCSstride_bepsscalercountr   r   r   r   r   pid_spid_boffs_smask_sbaseaccc0offs_cmask_cptrsvinvout_basemygs                                 M/home/wildlama/comfy/ComfyUI/custom_nodes/ComfyUI-KJNodes/nodes/triton_vae.py_rms_silu_kernelrD   #   s    MM!EMM!E_ryyG44FaZF88BHH%00D
((G9BJJ
/CAq"bii7++!fQWo((2Q66aHGGDvaga@LOOPRPZPZ[rvva!e!$$ # bggcFlS011bjjs33"((+h66HAq"bii7++!1d7OfT1Wo-fQWo((2Q66aHGGDq,//

;D!GV3?BB2::NAAagJABJJqM!A
F1d7O..rxx81<<vdAgNPQPTPTU\UbUbUmUmPnuvw #    c                 6   [         R                  " S5      nX-  [         R                  " SU5      -   n[         R                  " SU	5      nX:  nX:  nUS S 2S 4   US S S 24   -  nXS S 2S 4   R                  [         R                  5      U-  -   US S S 24   -   n[         R
                  " UUSS9R                  [         R                  5      n[         R                  " UU-  SS9nU(       a   U[         R                  " UU-  U-   5      -  nO.U[         R                  " [         R                  " U5      U5      -  nUUS S 2S 4   -  nU
(       aA  [         R
                  " X-   USS9R                  [         R                  5      nUUS S S 24   -  nU(       a  U[         R                  " U5      -  n[         R                  " X.S S 2S 4   R                  [         R                  5      U-  -   US S S 24   -   UR                  UR                  R                  5      US9  g )Nr   r   r   r   r   r   )r   r   r   r   r   r#   r!   r$   r%   r&   r'   r(   r   r)   )r*   r+   r,   r-   ROWSr0   r1   r2   r   r   r   r   r   pidr5   r:   r6   r;   r@   r<   r=   r8   r>   rA   rB   s                            rC   _rms_silu_cl_kernelrI   G   s    --
C]RYYq'22FYYq'"F]FZFq$w&q/)A!T'?%%bhh/!33fT1WoED
1C(++BJJ7A
&&QQ
CbggcFlS011bjjs33	C4LAGGENs;>>rzzJ$'
N

1HHWag))"((3a77&q/I144PWP]P]PhPhKipqrrE   Gc                 V   [         R                  " S5      n[         R                  " S5      n[         R                  " SU5      n	[         R                  " U/[         R                  S9n
[         R                  " U/[         R                  S9nXu-  n[         R
                  " S5      U-  nX:  Ga  U[         R                  " SU5      -   nX:  S S 2S 4   nXR                  [         R                  5      U-  US S 2S 4   -   U-  -   U	S S S 24   -   n[         R                  " UUSS9R                  [         R                  5      n[         R                  " UXTXd-  45      nU
[         R                  " [         R                  " USS9SS9-  n
U[         R                  " [         R                  " UU-  SS9SS9-  nX-  nX:  a  GM  [         R                  " XU-  -   [         R                  " SU5      -   U
5        [         R                  " X(U-  -   [         R                  " SU5      -   U5        g )Nr   r   r   r   r      r   )r   r   r   r    r!   num_programsr   r   r#   reshaper$   
atomic_add)r*   sum_ptr	sumsq_ptrr.   rJ   r   r   rH   br:   acc1acc2s0stepr5   r@   ptrr=   v3s                      rC   _gn_stats_clrY   a   s    --
C
aAYYq'"F88QCrzz*D88QCrzz*D	B??1'D
&bii7++ZD!ttBHH~)F1d7O;wFFPTVWPWXGGCas+..rzz:ZZG56rvvbq)22rvvb2gA.Q77

 & MM'E/BIIaO3T:MM)!e#bii1o5t<rE   c                 ,   [         R                  " S5      n[         R                  " S5      nX-  nX-  [         R                  " SU
5      -   n[         R                  " SU5      nUU:  S S 2S 4   nXR                  [         R                  5      U-  US S 2S 4   -   U-  -   US S S 24   -   n[         R
                  " UUSS9R                  [         R                  5      n[         R
                  " XU	-  -   [         R                  " SU	5      -   5      U-  n[         R
                  " X.U	-  -   [         R                  " SU	5      -   5      U-  UU-  -
  n[         R                  " US5      nS[         R                  " UU-   5      -  n[         R                  " [         R                  " US S 2S 4   X45      U45      n[         R                  " [         R                  " US S 2S 4   X45      U45      n[         R
                  " UU-   5      R                  [         R                  5      n[         R
                  " UU-   5      R                  [         R                  5      nUUS S S 24   -
  US S S 24   -  US S S 24   -  US S S 24   -   nU(       a  U[         R                  " U5      -  n[         R                  " X^R                  [         R                  5      U-  US S 2S 4   -   U-  -   US S S 24   -   UR                  UR                  R                  5      US9  g )Nr   r   r   r         ?r   )r   r   r   r   r   r#   r!   r&   r%   rN   broadcast_tor'   r(   r   r)   )r*   rP   rQ   w_ptrbias_ptrr,   r.   countr0   rJ   r   r   r   rH   rR   CSr5   r:   r@   rW   r=   mean_gvar_grstd_gmean_crstd_cwbiasrA   s                                rC   _gn_silu_apply_clrh   y   sv    --
C
aA|B]RYYq'22FYYq'"F	!QWA
44>A%q$w77B
BVDRSG_
TC
!3'**2::6AWWW1u_ryyA67%?FGGIA%		!Q785@6F?REJJuc"E27753;''FZZq$w!AG:NFZZq$w!AG:NF
""2::.A778f$%((4D	
VD!G_	tQw/!D!G*<tD!G}LA

1HHWRXX*VAtG_<GG&QUWXQX/YTT'--**+!5rE   )      i   )    @   )      )r   r   )	num_warpsr-   r.   )configskey)rm   rn      rk   rl   rG   )rn   rr   rk   rl   rP   rQ   )rp   rq   reset_to_zeror_   c                   ^^ U R                   S   U R                   S   smnU R                  5       TU-  -  mU R                  5       n [        R                  " U 5      nUS LXVS.n	Ub  UOU n
U(       a$  UU4S jn[
        U   " X
XTUT-  X2SU-  4	0 U	D6  U$ [        [        R                  " TS5      T4   " X
XTUT-  X2SU-  4	SSSS	.U	D6  U$ )
Nr   r   r   r   r   c                 <   > [         R                  " TU S   5      T4$ Nr   tritoncdivmetaBr.   s    rC   <lambda> fused_rms_silu.<locals>.<lambda>   s    V[[DO<a@rE   r[   rj   rk   rn   r   r   ro   )	shapenumel
contiguoustorch
empty_like_rms_silu_kernel_tunedrD   ry   rz   )xgammar1   r0   autotunesilu
eps_insider-   outflagsrB   gridr}   r.   s               @@rC   fused_rms_silur      s    771:qwwqzDAq		a!eA	A


1
Ct+TTE"A@t$Q31a!eSqZTYZ J 	&++a-q121AE3WZ]^W^ 	b;>VW	b[`	bJrE   c                   ^ U R                   S   nU R                  5       U-  m[        R                  " U 5      n[        R
                  " U5      n	US LXVS.n
Ub  UOU nU(       a!  U4S jn[        U   " XXTX2SU-  4SU	0U
D6  U$ [        SSU	-  5      n[        [        R                  " TU5      4   " XXTX2SU-  4XSS.U
D6  U$ )	Nr   ru   c                 :   > [         R                  " TU S   5      4$ rw   rx   )r|   rowss    rC   r~   #fused_rms_silu_cl.<locals>.<lambda>   s    V[[tI?ArE   r[   r   i   rm   r   )
r   r   r   r   ry   next_power_of_2_rms_silu_cl_kernel_tunedmaxrI   rz   )r   r   r1   r0   r   r   r   r-   r   r   r   rB   r   r   r   s                 @rC   fused_rms_silu_clr      s    	
A779>D


1
C$$Q'Gt+TTE"AA!$'cdCajY`jdij
 J a)V[[w79:1s[^ab[b 	sCJgh	slq	sJrE   c                 V  ^^ U R                   u  mpxn	X-  m[        R                  " STU-  U R                  [        R                  S9n
[        R
                  " U 5      nTXs-  -  nU(       a@  UU4S jn[        U   " X
S   U
S   TX7S9  UU4S jn[        U   " X
S   U
S   XUTXX7US9  U$ [        SS	U-  5      n[        S
[        R                  " TU5      5      n[        UT4   " X
S   U
S   TX?USS9  [        [        R                  " TU5      T4   " X
S   U
S   XUTXX?XuSS9  U$ )NrL   )devicer   c                 P   > [        S[        R                  " TU S   5      5      T4$ )N   r   )minry   rz   r{   s    rC   r~   "fused_gn_silu_cl.<locals>.<lambda>   s     3tV[[DO-L#Mq"QrE   r   r   )rJ   r   c                 <   > [         R                  " TU S   5      T4$ rw   rx   r{   s    rC   r~   r      s    6;;q$y/#BA"FrE   )rJ   r   r   i    r   rn   )rJ   r   r   ro   )rJ   r   r   r   ro   )r   r   r    r   r!   r   _gn_stats_cl_tuned_gn_silu_apply_cl_tunedr   r   ry   rz   rY   rh   )r   weightrg   groupsr0   r   r   r-   HWsumsr   r_   
grid_stats
grid_applyr   nprogr}   r.   s                    @@rC   fused_gn_silu_clr      s>   JAqQ	A;;q!f*QXXU]]KD


1
CEQ
:&qq'47ASF

+AAwQsTUW\.4d	L J a#D&++a12eQZ GT!Wa6\]ijk6;;q'2A677DGV[^`ach:@[\st	vJrE   c                   2   ^  \ rS rSrSU 4S jjrS rSrU =r$ )FusedRMSSiLU   c                 r   > [         TU ]  5         UR                  U l        UR                  U l        X l        g N)super__init__r   r1   r   )selfrmsr   	__class__s      rC   r   FusedRMSSiLU.__init__   s)    YY
YY
 rE   c                    UR                   (       dR  [        R                  " [        R                  " USS9U R                  -  U R
                  R                  U5      -  5      $ U R
                  nUR                  UR                  :w  a  UR                  UR                  5      nUR                  S5      nUR                  5       (       d  UR                  5       nUR                  S:X  aV  UR                  [        R                  S9(       a4  UR                  5       (       d  [        XU R                  U R                  S9$ [!        XU R                  U R                  S9$ )Nr   )dim   memory_formatr   )is_cudaFr   	normalizer1   r   r   r   rN   is_contiguousr   ndimr   channels_last_3dr   r   r   )r   r   r   s      rC   forwardFusedRMSSiLU.forward   s    yy66!++aQ/$**<tzz}}Q?OOPP

<<188#HHQXX&Eb!""$$$$&E66Q;1??9O9O?PYZYhYhYjYj$QtzzDMMRRa

T]]KKrE   )r   r   r1   F__name__
__module____qualname____firstlineno__r   r   __static_attributes____classcell__r   s   @rC   r   r      s    !L LrE   r   c                   2   ^  \ rS rSrSU 4S jjrS rSrU =r$ )FusedPixelNorm   c                 \   > [         TU ]  5         UR                  U l        X l        X0l        g r   )r   r   r0   r   r   )r   pnr   r   r   s       rC   r   FusedPixelNorm.__init__   s#    66	 rE   c           
      2   UR                   (       a  UR                  S:X  ad  UR                  [        R                  S9(       aB  UR                  5       (       d-  [        US SU R                  U R                  U R                  SS9$ [        US SU R                  U R                  U R                  SS9$ U[        R                  " [        R                  " US-  SSS9U R                  -   5      -  nU R                  (       a  [        R                  " U5      nU$ )	Nr   r   r[   T)r   r   r   rL   r   )r   keepdim)r   r   r   r   r   r   r0   r   r   r   r%   meanr   )r   r   r   s      rC   r   FusedPixelNorm.forward   s    99vv{qU=S=ST]^]l]l]n]n(D#txx$--^b^g^gtxyy!!T34==W[W`W`mqrr%**UZZQAtDtxxOPP99&&+C
rE   )r   r0   r   TFr   r   s   @rC   r   r      s    ! rE   r   c                   2   ^  \ rS rSrSU 4S jjrS rSrU =r$ )FusedGNSiLUi  c                    > [         TU ]  5         UR                  U l        UR                  U l        UR                  U l        UR
                  U l        X l        X0l        g r   )r   r   r   rg   
num_groupsr0   r   r   )r   gnr   r   r   s       rC   r   FusedGNSiLU.__init__  sD    iiGG	--66	 rE   c           
         UR                   S   nU R                  nU R                  U R                  pTUR                  UR                  :w  a6  UR                  UR                  5      nUR                  UR                  5      nUR                  (       a  UR                  S:X  a  X"S-
  -  S:X  a  X3S-
  -  S:X  at  X#-  S:X  al  UR                  [        R                  S9(       aJ  UR                  5       (       d5  [        XXPR                  U R                  U R                  U R                  S9$ [        R                   " XR                  XEU R                  5      nU R                  (       a  [        R                  " U5      nU$ )Nr   rm   r   r   r   )r   r   r   rg   r   r   r   r   r   r   channels_lastr   r0   r   r   r   
group_norm)r   r   r-   rJ   r   rg   r   s          rC   r   FusedGNSiLU.forward  s   GGAJOO{{DII==AHH$YYqxx(F77188$D991!1u+!);!eQR@RWXW\`aWaOO%2E2EOFqO`O`#At__dhhPTPYPY-1]]< <ll1oovTXXF99&&+C
rE   )r   rg   r0   r   r   r   r   r   r   s   @rC   r   r     s    ! rE   r   c                    U R                  5        H  n[        U[        R                  5      (       a6  UR	                  U(       a  [
        R                  O[
        R                  S9  MX  [        U[        R                  5      (       d  My  UR	                  U(       a  [
        R                  O[
        R                  S9  M     g )Nr   )
modules
isinstancennConv3dr   r   r   contiguous_formatConv2dr   )modelr   mods      rC   convert_conv_layoutr      sp    }}c299%%FF=!7!7eNeNeFfRYY''FF!4!45KbKbFc	 rE   c                    0 nU R                  5        GH3  u  p4[        U[        R                  5      (       Ga!  [	        [        U5      S-
  5       GH  n[        XE   [        5      (       a  [        XES-      [        R                  5      (       a_  XE   R                  R                  S:X  aC  XE   R                  c4  [        XE   US9X# SU 3'   [        R                  " 5       X# SUS-    3'   M  [        XE   [        5      (       d  M  [        XES-      [        R                  5      (       d  M  XU   l        XE   X# SU 3'   XES-      X# SUS-    3'   GM     GMG  [        U[        5      (       a  [        US5      (       d  S He  n[!        XF5      n[        U[        R"                  5      (       a  [%        XqS9X# SU 3'   M?  [        U[$        5      (       d  MV  Xl        XrU SU 3'   Mg     [        R                  " 5       X# S3'   GM  UR'                  S5      (       aT  [        U[        R"                  5      (       a  [%        US	US
9X#'   GM:  [        U[$        5      (       a  Xl        XBU'   GM\  GM_  [        U[(        5      (       a  UR*                  (       + nS Hl  n[!        XF5      n[        U[,        5      (       a#  UR.                  S:X  a  [1        XxUS
9X# SU 3'   MF  [        U[0        5      (       d  M]  Xl        XrU SU 3'   Mn     U(       a)  U S3U;   a  [        R                  " 5       X# S3'   GM$  GM'  GM*  [        U[2        [4        45      (       d  GMH  [        U[2        5      =(       d    UR*                  (       + n[        UR6                  [,        5      (       a4  UR6                  R.                  S:X  a  [1        UR6                  XS
9X# S3'   O@[        UR6                  [0        5      (       a!  XR6                  l        UR6                  X# S3'   U(       d  GM  U S3U;   d  GM  [        R                  " 5       X# S3'   GM6     U$ )zObject patches for vae.patcher: applied at model load, reverted at unload.
Also matches already-fused modules so a loaded+patched model rebuilds cleanly.r   rm   r   .	temb_proj)norm1norm2z.swishnorm_outF)r   r   z.norm1z.non_linearityz.conv_norm_outz	.conv_act)named_modulesr   r   
Sequentialr"   lenr   SiLUr   r   rg   r   Identityr   r   hasattrgetattr	GroupNormr   endswithLTXResnetBlock3Dtimestep_conditioningr   r   r   
LTXEncoder
LTXDecoderconv_norm_out)	r   r   patchesnamer   i	norm_namenorm	fuse_silus	            rC   build_object_patchesr  (  s    G((*	c2==))3s8a<(cfh//Jsq5z2774S4SFLL--2sv{{7J-9#&8-TGfAaSM*13GfAa!eW-.55*SQZQSQ\Q\:]:]&.FO-0VGfAaSM*14UGfAa!eW-. ) [))'#{2K2K/	s.dBLL115@5YGfAi[12k22$,M59tfAi[12 0 (*{{}GfFO$]]:&&#r||,, +Ceh OC--' # . -..  555I/	s.dI..488q=5CDck5lGfAi[12n55$,M59tfAi[12 0 vV_735;;=&/0 8yj*566"3
3T3;T;T7TI#++Y77C<M<M<Q<QUV<V3A#BSBSZc3w&/0C--~>>-5!!*363D3D&/0yv^4?.0kkm&	*+_ +` NrE   c                   T    \ rS rSr\S 5       r\SS\R                  4S jj5       rSr	g)PatchTritonVAEi_  c                 ^   [         R                  " SSSSS[         R                  R                  S5      [         R                  R                  SSSS	9[         R                  R                  S
SSS	9[         R                  R                  SSSS	9/[         R                  R                  SS9/S9$ )Nr  zPatch Triton VAEzKJNodes/experimentalTa  Speeds up VAE decode/encode with fused Triton norm+SiLU kernels and channels_last conv layout. Supported VAEs (auto-detected): Wan 2.1/2.2 video VAEs incl. Qwen-Image (RMSNorm, ~1.4x/1.15x), KL image VAEs such as Flux/Flux2, SDXL and SD1.5 (GroupNorm, ~1.6-1.8x at 2048px), and LTXV/LTX2 video VAEs (PixelNorm; timestep-conditioned decoder blocks get norm-only fusion). Other architectures are not supported. Applied as object patches on a cloned patcher, so it only exists while this VAE is loaded.vaefuse_norm_siluzReplace norm+SiLU chains (RMSNorm for Wan, GroupNorm for KL VAEs) with fused Triton kernels (single pass, fp32 accumulation). Requires triton.)defaulttooltipr   zConvert conv weights to channels_last memory format, removing cuDNN layout transposes around every conv. Required for the fused GroupNorm kernel to engage on KL VAEs.r   FzBenchmark several kernel block-size configs on first use of each tensor shape and cache the fastest. Brief stutter per new resolution, usually a few percent faster after warmup.)display_name)node_idr  categoryis_experimentaldescriptioninputsoutputs)r
   SchemaVaeInputBooleanOutput)clss    rC   define_schemaPatchTritonVAE.define_schema`  s    yy$++ u U#

  !14  JZ   [

  $  Iq   r

  U  Ex   y	 51!
 	
rE   returnc                    [         R                   " U5      nUR                  nUR                  R                  5       (       a  UR	                  UR
                  5        [        R                  R                  XQR                  R                  UR                  R                  S9nUR                  Ul        Xal        S nUR                  R                  [        R                  SU5        OUR                  R                  5       Ul        U(       ar  [!        XTS9nU(       d  [#        S5      eUR%                  5        H   u  pUR                  R'                  X5        M"     [(        R*                  " S[-        U5       S35        U(       a/  S nUR                  R                  [        R                  S	U5        O
[/        US
S9  [0        R2                  " U5      $ )N)load_deviceoffload_devicec                     U R                   R                  5        H7  n[        US5      (       d  M  [        US5      (       a  M)  SUl        S Ul        M9     g )Ncomfy_cast_weightsprev_comfy_cast_weightsF)r   r   r   r"  _v_signature)patcher	device_tolowvram_model_memoryforce_patch_weights	full_loadr   s         rC   clear_dynamic_cast_flags8PatchTritonVAE.execute.<locals>.clear_dynamic_cast_flags  sD    "==002Cs$899'#OhBiBi16.+/( 3rE   wan_vae_fused_clear_castr   zzNo fusable norm layers found, this node supports Wan video VAEs, KL image VAEs (Flux2/SDXL/SD1.5) and LTXV/LTX2 video VAEszPatchTritonVAE: registered z fused norm object patchesc                 ,    [        U R                  SS9  g )NTr   )r   r   )r%  r&  r'  r(  r)  s        rC   reapply_channels_last5PatchTritonVAE.execute.<locals>.reapply_channels_last  s    #GMMFrE   wan_vae_fused_channels_lastFr.  )copyfirst_stage_modelr%  
is_dynamicr   	vae_dtypecomfymodel_patcherModelPatcherr  r   parentadd_callback_with_keyr	   ON_LOADcloner  RuntimeErroritemsadd_object_patchlogginginfor   r   r
   
NodeOutput)r  r
  r  r   r   r   new_patcherr*  r   r  objr/  s               rC   executePatchTritonVAE.executew  sb   iin%%;;!!##HHS]]#--::;;#:#:3;;KeKe ; gK "%K%K0 KK--k.A.AC]_wx++++-CK*5DG"  $`  a  a$]]_	,,T7 -LL6s7|nD^_`GKK--k.A.AC`bwxU;}}S!!rE    NTTF)
r   r   r   r   classmethodr  r
   rB  rE  r   rG  rE   rC   r  r  _  s6    
 
, %"VXVcVc %" %"rE   r  rH  )g-q=FTFr   )Tr   )9__doc__r   torch.nnr   torch.nn.functional
functionalr   ry   triton.languagelanguager   ImportErrorecomfy.ldm.wan.vaer   (comfy.ldm.modules.diffusionmodules.modelr   #comfy.ldm.lightricks.vae.pixel_normr   1comfy.ldm.lightricks.vae.causal_video_autoencoderr   r   r   r   r   r   r2  r@  comfy.model_patcherr6  comfy.patcher_extensionr	   comfy_api.latestr
   jit	constexprrD   rI   rY   rh   r   Configr   r   r   r   r   r   r   Moduler   r   r   r   r  	ComfyNoder  )bsbcrf   s   000rC   <module>r`     s      g  ' @ 9 N  N    / kp xY[YeYe  xprp|p|  x " x;=<< x\^\h\h x  xF nssUWUaUa slnlxlx s#%<<s>@lls_a_k_ks s2 =LL=+-<<=BD,,= =. 55025GI||5[][g[g5 56  &K&26a ]]r9Q]GCI H19 H&K	c
 %& 
 #OOCUfCUR_eZ[V]]Ir?a]8_e8CUf	f*,  __CRc?R\bWXV]]Ir?a]8\b8?c	i577CE  !//CRc?R\bWXV]]Ir?a]8\b8?c	g)+ 
 ",L299 L*RYY $")) 6d4n>"R\\ >"W
  g
^
_effgDK
 g d ds/   
L 	0L"&L)
 &L/
(&L5
L	LL