
    
3jA                         S SK Jr  S SKrS SKJr  SSKJrJr  SSKJrJ	r	  SSK
Jr  SS	KJrJrJr  SS
KJr  SSKJr  SSKJr  \	R,                  " \5      r\ " S S\5      5       r " S S\\5      r " S S\5      rg)    )	dataclassN)nn   )ConfigMixinregister_to_config)
BaseOutputlogging   )AttentionProcessor)-HunyuanCombinedTimestepTextSizeStyleEmbedding
PatchEmbedPixArtAlphaTextProjection)
ModelMixin)HunyuanDiTBlock   )zero_modulec                   :    \ rS rSr% \\R                     \S'   Srg)HunyuanControlNetOutput#   controlnet_block_samples N)	__name__
__module____qualname____firstlineno__tupletorchTensor__annotations____static_attributes__r       i/home/wildlama/miniconda3/lib/python3.13/site-packages/diffusers/models/controlnets/controlnet_hunyuan.pyr   r   #   s    #ELL11r!   r   c                   0  ^  \ rS rSr\                SS\S\S\S\S-  S\S-  S\S	\S
\S\S\S\S\S\S\4U 4S jjj5       r	\
S\\\4   4S j5       rS\\\\4   -  4S jr\ SS j5       r         SS\R$                  S\4S jjrSrU =r$ )HunyuanDiT2DControlNetModel(   Nconditioning_channelsnum_attention_headsattention_head_dimin_channels
patch_sizeactivation_fntransformer_num_layers	mlp_ratiocross_attention_dimcross_attention_dim_t5pooled_projection_dimtext_lentext_len_t5"use_style_cond_and_image_meta_sizec                 r  > [         TU ]  5         X l        X#-  U l        [	        UUS-  USS9U l        [        R                  " [        R                  " X-   U[        R                  S95      U l        [        UUUUUS S9U l        [        UUUUUS9U l        [        R                   " / 5      U l        [        R                   " [%        U	S-  S-
  5       Vs/ s HF  n['        U R                  U R(                  R*                  U[-        U R                  U
-  5      US	S
S9PMH     sn5      U l        [1        [        R2                  " X5      5      U l        [%        [7        U R.                  5      5       H?  n[        R2                  " X5      n[1        U5      nU R"                  R9                  U5        MA     g s  snf )N   	silu_fp32)in_featureshidden_sizeout_featuresact_fn)dtype)heightwidthr)   	embed_dimr*   pos_embed_type)r0   seq_lenr.   r3   r
   r   TF)dimr'   r+   ff_inner_dimr.   qk_normskip)super__init__	num_heads	inner_dimr   text_embedderr   	Parameterr   randnfloat32text_embedding_paddingr   	pos_embedr   time_extra_emb
ModuleListcontrolnet_blocksranger   configr'   intblocksr   Linearinput_blocklenappend)selfr&   r'   r(   r)   r*   r+   sample_sizer8   r,   r-   r.   r/   r0   r1   r2   r3   layer_controlnet_block	__class__s                       r"   rF   $HunyuanDiT2DControlNetModel.__init__)   s   ( 	,,A6..2,	
 ')llKK.0C5==Y'
# $#!!
 L"7 6/Q
 "$r!2 mm ##9Q#>#BC DE  (,(G(G"/!$T^^i%?!@(;  D
 'ryy'JKs4;;'(A!yyB*+;<""))*:; )s   AF4returnc                    ^ 0 nS[         S[        R                  R                  S[        [         [
        4   4U4S jjmU R                  5        H  u  p#T" X#U5        M     U$ )z
Returns:
    `dict` of attention processors: A dictionary containing all attention processors used in the model with
    indexed by its weight name.
namemodule
processorsc                    > [        US5      (       a  UR                  SS9X  S3'   UR                  5        H  u  p4T" U  SU 3XB5        M     U$ )Nget_processorT)return_deprecated_lora
.processor.)hasattrrg   named_children)rc   rd   re   sub_namechildfn_recursive_add_processorss        r"   ro   PHunyuanDiT2DControlNetModel.attn_processors.<locals>.fn_recursive_add_processors   s`    v//282F2F^b2F2c
V:./#)#8#8#:+tfAhZ,@%T $; r!   )strr   r   Moduledictr   rl   )rZ   re   rc   rd   ro   s       @r"   attn_processors+HunyuanDiT2DControlNetModel.attn_processorsu   sb     
	c 	588?? 	X\]`bt]tXu 	 !//1LD'jA 2 r!   	processorc           	      d  ^ [        U R                  R                  5       5      n[        U[        5      (       a-  [        U5      U:w  a  [        S[        U5       SU SU S35      eS[        S[        R                  R                  4U4S jjmU R                  5        H  u  p4T" X4U5        M     g)	a  
Sets the attention processor to use to compute attention.

Parameters:
    processor (`dict` of `AttentionProcessor` or only `AttentionProcessor`):
        The instantiated processor class or a dictionary of processor classes that will be set as the processor
        for **all** `Attention` layers. If `processor` is a dict, the key needs to define the path to the
        corresponding cross attention processor. This is strongly recommended when setting trainable attention
        processors.
z>A dict of processors was passed, but the number of processors z0 does not match the number of attention layers: z. Please make sure to pass z processor classes.rc   rd   c                 
  > [        US5      (       aJ  [        U[        5      (       d  UR                  U5        O#UR                  UR	                  U  S35      5        UR                  5        H  u  p4T" U  SU 3XB5        M     g )Nset_processorri   rj   )rk   
isinstancers   ry   poprl   )rc   rd   rv   rm   rn   fn_recursive_attn_processors        r"   r|   SHunyuanDiT2DControlNetModel.set_attn_processor.<locals>.fn_recursive_attn_processor   ss    v//!)T22((3(($z7J)KL#)#8#8#:+tfAhZ,@%S $;r!   N)rX   rt   keysrz   rs   
ValueErrorrq   r   r   rr   rl   )rZ   rv   countrc   rd   r|   s        @r"   set_attn_processor.HunyuanDiT2DControlNetModel.set_attn_processor   s     D((--/0i&&3y>U+BPQTU^Q_P` a005w6QRWQXXkm 
	Tc 	T588?? 	T !//1LD'i@ 2r!   c                    UR                   nUR                  nUR                  nUR                  nUR                  n	UR
                  n
UR                  nUR                  nUR                  nUR                  nUR                  nUR                  nUR                  nUnU=(       d    UR                  nU " UUUUUU	U
UUUUUUUS9nU(       a9  UR                  UR                  5       SS9n[         R#                  SUS    35        U$ )N)r&   r,   r+   r(   r.   r/   r8   r)   r-   r'   r*   r[   r1   r2   F)strictz0controlnet load from Hunyuan-DiT. missing_keys: r   )rS   r+   r(   r.   r/   r8   r)   r-   r'   r*   r[   r1   r2   r,   load_state_dict
state_dictloggerwarning)clstransformerr&   r,   load_weights_from_transformerrS   r+   r(   r.   r/   r8   r)   r-   r'   r*   r[   r1   r2   
controlnetkeys                       r"   from_transformer,HunyuanDiT2DControlNetModel.from_transformer   s"    ##,,#66$88!'!>!>(((($$	$88&&
((??(( 5!7!X6;X;X"7#9'1 3#9## 3!##

  ),,[-C-C-Ee,TCNNMcRSfXVWr!   controlnet_condconditioning_scalec                 T   UR                   SS u  pU R                  U5      nXR                  U R                  U5      5      -   nU R                  X'XUR                  S9nUR                   u  nnnU R                  UR                  SUR                   S   5      5      nUR                  UUS5      n[        R                  " XW/SS9n[        R                  " Xh/SS9nUR                  S5      R                  5       n[        R                  " XeU R                  5      nSn[        U R                  5       H  u  nnU" UUUUS	9nUU4-   nM     Sn[        UU R                   5       H  u  nnU" U5      nUU4-   nM     U Vs/ s H  nUU-  PM
     nnU(       d  U4$ [#        US
9$ s  snf )  
The [`HunyuanDiT2DControlNetModel`] forward method.

Args:
hidden_states (`torch.Tensor` of shape `(batch size, dim, height, width)`):
    The input tensor.
timestep ( `torch.LongTensor`, *optional*):
    Used to indicate denoising step.
controlnet_cond ( `torch.Tensor` ):
    The conditioning input to ControlNet.
conditioning_scale ( `float` ):
    Indicate the conditioning scale.
encoder_hidden_states ( `torch.Tensor` of shape `(batch size, sequence len, embed dims)`, *optional*):
    Conditional embeddings for cross attention layer. This is the output of `BertModel`.
text_embedding_mask: torch.Tensor
    An attention mask of shape `(batch, key_tokens)` is applied to `encoder_hidden_states`. This is the output
    of `BertModel`.
encoder_hidden_states_t5 ( `torch.Tensor` of shape `(batch size, sequence len, embed dims)`, *optional*):
    Conditional embeddings for cross attention layer. This is the output of T5 Text Encoder.
text_embedding_mask_t5: torch.Tensor
    An attention mask of shape `(batch, key_tokens)` is applied to `encoder_hidden_states`. This is the output
    of T5 Text Encoder.
image_meta_size (torch.Tensor):
    Conditional embedding indicate the image sizes
style: torch.Tensor:
    Conditional embedding indicate the style
image_rotary_emb (`torch.Tensor`):
    The image rotary embeddings to apply on query and key tensors during attention calculation.
return_dict: bool
    Whether to return a dictionary.
N)hidden_dtyper   )rA   r
   r   )tembencoder_hidden_statesimage_rotary_emb)r   )shaperN   rW   rO   r;   rI   viewr   cat	unsqueezeboolwhererM   	enumeraterU   ziprQ   r   )rZ   hidden_statestimestepr   r   r   text_embedding_maskencoder_hidden_states_t5text_embedding_mask_t5image_meta_sizestyler   return_dictr<   r=   r   
batch_sizesequence_lengthr]   block_res_samplesr\   blockcontrolnet_block_res_samplesblock_res_sampler^   samples                             r"   forward#HunyuanDiT2DControlNetModel.forward   s   ^ &++BC0}5 &(8(89X(YY""U]UcUc # 

 *B)G)G&
OQ#'#5#5$))".F.L.LR.PQ$
  $<#@#@_^`#a  %		+@*[ab c#ii)<(U[]^1;;A>CCE %,?X\XsXs t%dkk2LE5!&;!1	M !2]4D D 3 (*$256GI_I_2`../0@A+GK[J]+]( 3a
 So'oRn1C(CRn$'o022&@\]] (ps   F%)	rU   rQ   rH   rW   rG   rN   rI   rM   rO   )r      X   NNzgelu-approximate    i  r%   g      @   i   r   M      T)r   NT	g      ?NNNNNNNT)r   r   r   r   r   rT   rq   floatr   rF   propertyrs   r   rt   r   classmethodr   r   r   r   r    __classcell__r_   s   @r"   r$   r$   (   s    &'#%"$"&!%/&(#'&*%)37#I<"I< !I<  	I<
 4ZI< $JI< I< !$I< I< !I< !$I<  #I< I<  !I<" -1#I< I<V c+=&=!>  .A,>cK]F]A^,^ A@ nr' '\ %(" !%#]^ 	]^
 "]^ ]^r!   r$   c                   h   ^  \ rS rSrSrU 4S jr         SS\R                  S\4S jjr	Sr
U =r$ )	 HunyuanDiT2DMultiControlNetModeli7  a  
`HunyuanDiT2DMultiControlNetModel` wrapper class for Multi-HunyuanDiT2DControlNetModel

This module is a wrapper for multiple instances of the `HunyuanDiT2DControlNetModel`. The `forward()` API is
designed to be compatible with `HunyuanDiT2DControlNetModel`.

Args:
    controlnets (`list[HunyuanDiT2DControlNetModel]`):
        Provides additional conditioning to the unet during the denoising process. You must set multiple
        `HunyuanDiT2DControlNetModel` as a list.
c                 X   > [         TU ]  5         [        R                  " U5      U l        g )N)rE   rF   r   rP   nets)rZ   controlnetsr_   s     r"   rF   )HunyuanDiT2DMultiControlNetModel.__init__D  s    MM+.	r!   r   r   c                     [        [        X4U R                  5      5       HQ  u  nu  pnU" UUUUUUUUU	U
UUS9nUS:X  a  UnM$  [        WS   US   5       VVs/ s H  u  nnUU-   PM     nnnU4nMS     W$ s  snnf )r   )r   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   r   )rZ   r   r   r   r   r   r   r   r   r   r   r   r   iimagescaler   block_samplescontrol_block_samplescontrol_block_sampleblock_samples                        r"   r   (HunyuanDiT2DMultiControlNetModel.forwardH  s    \ .7s?`d`i`i7j-k)A)j&+! %#(&;$7)A'= /!1'M  Av(5% ?BBWXYBZ\ijk\l>m)>m:,l )<7>m & ) *?(@%1 .l4 %$)s   A7)r   r   )r   r   r   r   __doc__rF   r   r   r   r   r    r   r   s   @r"   r   r   7  sP    
/ %(" !%#H% 	H%
 "H% H%r!   r   )dataclassesr   r   r   configuration_utilsr   r   utilsr   r	   attention_processorr   
embeddingsr   r   r   modeling_utilsr   #transformers.hunyuan_transformer_2dr   r   r   
get_loggerr   r   r   r$   r   r   r!   r"   <module>r      s|    "   B ( 4 
 ( A # 
		H	% 2j 2 2L^*k L^^Y%z Y%r!   