
    
3jA                    l   S SK Jr  S SKJr  S SKJr  S SKrS SKJrJr  SSK	J
r
Jr  SSKJrJr  SS	KJr  S
SKJr  S
SKJrJrJrJrJrJr  S
SKJrJr  S
SKJr  S
SKJ r J!r!J"r"J#r#J$r$J%r%J&r&  S
SK'J(r(  SSK)J*r*  \RV                  " \,5      r-\ " S S\5      5       r. " S S\R^                  5      r0 " S S\R^                  5      r1 " S S\R^                  5      r2        S<S\3S\3S\3S\3S\3S \3S-  S!\3\4\3   -  S-  S"\3S-  S#\3S-  S$\5S%\5S-  S&\5S-  4S' jjr6       S=S(\3S)\3S\3S-  S \3S-  S!\3S"\3S-  S#\3S-  S%\5S&\54S* jjr7S+\3S,\3S-\8\3   4S. jr9 " S/ S0\\\
5      r: " S1 S2\\\
5      r; " S3 S4\R^                  5      r< " S5 S6\R^                  5      r= " S7 S8\R^                  5      r>S>S9 jr?S: r@S; rAg)?    )	dataclass)gcd)AnyN)Tensornn   )ConfigMixinregister_to_config)
BaseOutputlogging)apply_freeu   )AttentionMixin)ADDED_KV_ATTENTION_PROCESSORSCROSS_ATTENTION_PROCESSORS	AttentionAttnAddedKVProcessorAttnProcessorFusedAttnProcessor2_0)TimestepEmbedding	Timesteps)
ModelMixin)CrossAttnDownBlock2DCrossAttnUpBlock2DDownsample2DResnetBlock2DTransformer2DModelUNetMidBlock2DCrossAttn
Upsample2D)UNet2DConditionModel   )ControlNetConditioningEmbeddingc                   (    \ rS rSr% SrSr\\S'   Srg)ControlNetXSOutput3   a%  
The output of [`UNetControlNetXSModel`].

Args:
    sample (`Tensor` of shape `(batch_size, num_channels, height, width)`):
        The output of the `UNetControlNetXSModel`. Unlike `ControlNetOutput` this is NOT to be added to the base
        model output, but is already the final output.
Nsample )	__name__
__module____qualname____firstlineno____doc__r&   r   __annotations____static_attributes__r'       d/home/wildlama/miniconda3/lib/python3.13/site-packages/diffusers/models/controlnets/controlnet_xs.pyr$   r$   3   s     FFr/   r$   c                      ^  \ rS rSrSr  SS\R                  S\R                  S\R                  S\R                  S-  S\R                  S-  4
U 4S	 jjjrS
r	U =r
$ )DownBlockControlNetXSAdapterA   zyComponents that together with corresponding components from the base model will form a
`ControlNetXSCrossAttnDownBlock2D`Nresnetsbase_to_ctrlctrl_to_base
attentionsdownsamplerc                 ^   > [         TU ]  5         Xl        X l        X0l        X@l        XPl        g N)super__init__r4   r5   r6   r7   downsamplers)selfr4   r5   r6   r7   r8   	__class__s         r0   r<   %DownBlockControlNetXSAdapter.__init__E   s,     	(($'r/   )r7   r5   r6   r=   r4   )NN)r(   r)   r*   r+   r,   r   
ModuleListConv2dr<   r.   __classcell__r?   s   @r0   r2   r2   A   sh    * ,0(,(( mm( mm	(
 MMD(( YY%( (r/   r2   c                   d   ^  \ rS rSrSrS\S\R                  S\R                  4U 4S jjrSr	U =r
$ )MidBlockControlNetXSAdapterU   zxComponents that together with corresponding components from the base model will form a
`ControlNetXSCrossAttnMidBlock2D`midblockr5   r6   c                 F   > [         TU ]  5         Xl        X l        X0l        g r:   )r;   r<   rH   r5   r6   )r>   rH   r5   r6   r?   s       r0   r<   $MidBlockControlNetXSAdapter.__init__Y   s     ((r/   )r5   r6   rH   )r(   r)   r*   r+   r,   r   r   rA   r<   r.   rC   rD   s   @r0   rF   rF   U   s3    ))!8 ) )egerer ) )r/   rF   c                   H   ^  \ rS rSrSrS\R                  4U 4S jjrSrU =r	$ )UpBlockControlNetXSAdapter`   zwComponents that together with corresponding components from the base model will form a `ControlNetXSCrossAttnUpBlock2D`r6   c                 .   > [         TU ]  5         Xl        g r:   )r;   r<   r6   )r>   r6   r?   s     r0   r<   #UpBlockControlNetXSAdapter.__init__c   s    (r/   r6   )
r(   r)   r*   r+   r,   r   rA   r<   r.   rC   rD   s   @r0   rL   rL   `   s     B)R]] ) )r/   rL   base_in_channelsbase_out_channelsctrl_in_channelsctrl_out_channelstemb_channelsmax_norm_num_groupstransformer_layers_per_blocknum_attention_headscross_attention_dimadd_downsampleupcast_attentionuse_linear_projectionc                 F   Sn/ n/ n/ n/ n[        U[        5      (       a  U/U-  n[        U5       H  nUS:X  a  U OUn US:X  a  UOUnUR                  [	        X 5      5        UR                  [        X -   UU[        X -   US9[        X5S9SS95        U(       a+  UR                  [        UX8-  UUU   U	UU[        X5S9S95        UR                  [	        X15      5        M     U
(       aC  UR                  [	        X5      5        [        X1-   SUSS	9nUR                  [	        X15      5        OS n[        [        R                  " U5      [        R                  " U5      [        R                  " U5      S
9nU(       a  [        R                  " U5      Ul        Ub  UUl        U$ )Nr   r   
max_factorh㈵>in_channelsout_channelsrU   groups
groups_outepsrb   
num_layersrY   r\   r[   norm_num_groupsTopuse_convrc   name)r4   r5   r6   )
isinstanceintrangeappendmake_zero_convr   find_largest_factorr   r   r2   r   rA   r7   r=   )rQ   rR   rS   rT   rU   rV   has_crossattnrW   rX   rY   rZ   r[   r\   rh   r4   r7   r6   r5   ir=   down_block_componentss                        r0   get_down_block_adapterrw   h   s    JGJLL.44(D'E
'R$:/0Av+;L/0Av+;L 	N+;NO,?.+*+;+N[no./@a		
 "'%< 1;A>(;*?%5$78I$j	 	N+<PQG J  	N+<PQ#1DO`gk
 	N+<PQ8g&]]<0]]<0 +-==+D(-9*  r/   base_channelsctrl_channelsc	                     [        X 5      n	[        UX-   UU[        [        XU -   5      U5      UUUUS9	n
[        X5      n[	        XUS9$ )N	rW   rb   rc   rU   resnet_groupsrY   rX   r\   r[   )r5   rH   r6   )rr   r   rs   r   rF   )rx   ry   rU   rV   rW   rX   rY   r[   r\   r5   rH   r6   s               r0   get_mid_block_adapterr}      sb     "-?L&%A!1"#)#m]=Z*[]pq//3)H "-?L&Lbnoor/   rc   prev_output_channelctrl_skip_channelsc                     / nSn[        U5       H*  nUS:X  a  UOU nUR                  [        X%   U5      5        M,     [        [        R
                  " U5      S9$ )Nr   r   rP   )rp   rq   rr   rL   r   rA   )rc   r~   r   r6   rh   ru   resnet_in_channelss          r0   get_up_block_adapterr      s\    
 LJ:45F0N+=+@BTUV  &2==3NOOr/   c                    D  ^  \ rS rSrSr\               SS\S\S\\   S\	S\
S\\\   -  S	\\   S
\\   S\S\\   S\S-  S\\\   -  S\
S\S\
4U 4S jjj5       r\        SS\S\	S-  S	\\   S-  S\\   S-  S\
S\S\S\S\\   4S jj5       rS rSrU =r$ )ControlNetXSAdapter   a  
A `ControlNetXSAdapter` model. To use it, pass it into a `UNetControlNetXSModel` (together with a
`UNet2DConditionModel` base model).

This model inherits from [`ModelMixin`] and [`ConfigMixin`]. Check the superclass documentation for it's generic
methods implemented for all models (such as downloading or saving).

Like `UNetControlNetXSModel`, `ControlNetXSAdapter` is compatible with StableDiffusion and StableDiffusion-XL. It's
default parameters are compatible with StableDiffusion.

Parameters:
    conditioning_channels (`int`, defaults to 3):
        Number of channels of conditioning input (e.g. an image)
    conditioning_channel_order (`str`, defaults to `"rgb"`):
        The channel order of conditional image. Will convert to `rgb` if it's `bgr`.
    conditioning_embedding_out_channels (`tuple[int]`, defaults to `(16, 32, 96, 256)`):
        The tuple of output channels for each block in the `controlnet_cond_embedding` layer.
    time_embedding_mix (`float`, defaults to 1.0):
        If 0, then only the control adapters's time embedding is used. If 1, then only the base unet's time
        embedding is used. Otherwise, both are combined.
    learn_time_embedding (`bool`, defaults to `False`):
        Whether a time embedding should be learned. If yes, `UNetControlNetXSModel` will combine the time
        embeddings of the base model and the control adapter. If no, `UNetControlNetXSModel` will use the base
        model's time embedding.
    num_attention_heads (`list[int]`, defaults to `[4]`):
        The number of attention heads.
    block_out_channels (`list[int]`, defaults to `[4, 8, 16, 16]`):
        The tuple of output channels for each block.
    base_block_out_channels (`list[int]`, defaults to `[320, 640, 1280, 1280]`):
        The tuple of output channels for each block in the base unet.
    cross_attention_dim (`int`, defaults to 1024):
        The dimension of the cross attention features.
    down_block_types (`list[str]`, defaults to `["CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "DownBlock2D"]`):
        The tuple of downsample blocks to use.
    sample_size (`int`, defaults to 96):
        Height and width of input/output sample.
    transformer_layers_per_block (`int | tuple[int]`, defaults to 1):
        The number of transformer blocks of type [`~models.attention.BasicTransformerBlock`]. Only relevant for
        [`~models.unet_2d_blocks.CrossAttnDownBlock2D`], [`~models.unet_2d_blocks.UNetMidBlock2DCrossAttn`].
    upcast_attention (`bool`, defaults to `True`):
        Whether the attention computation should always be upcasted.
    max_norm_num_groups (`int`, defaults to 32):
        Maximum number of groups in group normal. The actual number will be the largest divisor of the respective
        channels, that is <= max_norm_num_groups.
conditioning_channelsconditioning_channel_order#conditioning_embedding_out_channelstime_embedding_mixlearn_time_embeddingrX   block_out_channelsbase_block_out_channelsrY   down_block_typessample_sizeNrW   r[   rV   r\   c                 2  > [         T!U ]  5         US   nUS   S-  nUS;  a  [        SU 35      e[        U5      [        U
5      :w  a  [        SU SU
 S35      e[	        U[
        [        45      (       d  U/[        U
5      -  n[	        U	[
        [        45      (       d  U	/[        U
5      -  n	[	        U[
        [        45      (       d  U/[        U
5      -  n[        U5      [        U
5      :w  a  [        SU SU
 S35      e[        US   UUS	9U l        U(       a  [        UU5      U l
        OS U l
        [        R                  " / 5      U l        [        R                  " / 5      U l        [        R                  " SUS   S
SS9U l        [#        US   US   5      U l        US   nUS   n['        U
5       Hf  u  nnUnUU   nUnUU   nSU;   nU[        U
5      S-
  :H  nU R                  R)                  [+        UUUUUUUUU   UU   U	U   U(       + UUS95        Mh     [-        US   US   UUS   US   U	S   UUS9U l        US   /n['        U5       H1  u  nnU[        U5      S-
  :  a  S
OSnUR1                  U/U-  5        M3     [        [3        U5      5      nUS   n[5        [        U
5      5       HW  nUnUU   n[5        S
5       Vs/ s H  nUR7                  5       PM     n nU R                  R)                  [9        UUU S95        MY     g s  snf )Nr      )rgbbgrz&unknown `conditioning_channel_order`: zbMust provide the same number of `block_out_channels` as `down_block_types`. `block_out_channels`: z. `down_block_types`: .zdMust provide the same number of `num_attention_heads` as `down_block_types`. `num_attention_heads`: conditioning_embedding_channelsr   r   r   r!   kernel_sizepadding	CrossAttn)rQ   rR   rS   rT   rU   rV   rt   rW   rX   rY   rZ   r[   r\   )rx   ry   rU   rW   rX   rY   r[   r\   r   )rc   r~   r   )r;   r<   
ValueErrorlenrn   listtupler"   controlnet_cond_embeddingr   time_embeddingr   rA   down_blocksup_connectionsrB   conv_inrr   control_to_base_for_conv_in	enumeraterq   rw   r}   	mid_blockextendreversedrp   popr   )"r>   r   r   r   r   r   rX   r   r   rY   r   r   rW   r[   rV   r\   time_embedding_input_dimtime_embedding_dimrR   rT   ru   down_block_typerQ   rS   rt   is_final_blockr   rc   number_of_subblocks reversed_base_block_out_channelsprev_base_output_channel_ctrl_skip_channels_r?   s"                                    r0   r<   ControlNetXSAdapter.__init__#  s   0 	#:1#= 4Q7!; &^;EF`Eabcc!"c*:&;;t  vH  uI  I_  `p  _q  qr  s  6uFF,H+ICP`La+a(-e}==#6"7#>N:O"O-e}==#6"7#>N:O"O"#s+;'<<v  xK  wL  Lb  cs  bt  tu  v 
 *I,>q,AB"7*
&  "34LN`"aD"&D==, mmB/ yy$6q$9qRST+9:LQ:OQhijQk+l( 4A6.q1"+,<"=A0 7 :0 21 5'?:M#&6"7!";;N##&%5&7%5&7"4(;"/1Ma1P(;A(>(;A(>'5#5%5*? #>6 /1"5,R0,)Eb)I 3B 7 3B 7-"7	
 134();<OA|/0144!   %%|n7J&JK	  = ,09P0Q+R(<Q?s+,-A'8$ @ CEJ1X"NX#5#9#9#;X"N&&$!2(@': . #Os   Lunet
size_ratioc
                    USLn
USLnX-  (       d  [        S5      eU=(       d2    UR                  R                   Vs/ s H  n[        X-  5      PM     snnUc  UR                  R                  nU " UUU	UUUUUR                  R                  UR                  R
                  UR                  R                  UR                  R                  UR                  R                  UR                  R                  UR                  R                  UR                  R                  S9nUR                  UR                  5        U$ s  snf )ax  
Instantiate a [`ControlNetXSAdapter`] from a [`UNet2DConditionModel`].

Parameters:
    unet (`UNet2DConditionModel`):
        The UNet model we want to control. The dimensions of the ControlNetXSAdapter will be adapted to it.
    size_ratio (float, *optional*, defaults to `None`):
        When given, block_out_channels is set to a fraction of the base model's block_out_channels. Either this
        or `block_out_channels` must be given.
    block_out_channels (`list[int]`, *optional*, defaults to `None`):
        Down blocks output channels in control model. Either this or `size_ratio` must be given.
    num_attention_heads (`list[int]`, *optional*, defaults to `None`):
        The dimension of the attention heads. The naming seems a bit confusing and it is, see
        https://github.com/huggingface/diffusers/issues/2011#issuecomment-1547958131 for why.
    learn_time_embedding (`bool`, defaults to `False`):
        Whether the `ControlNetXSAdapter` should learn a time embedding.
    time_embedding_mix (`float`, defaults to 1.0):
        If 0, then only the control adapter's time embedding is used. If 1, then only the base unet's time
        embedding is used. Otherwise, both are combined.
    conditioning_channels (`int`, defaults to 3):
        Number of channels of conditioning input (e.g. an image)
    conditioning_channel_order (`str`, defaults to `"rgb"`):
        The channel order of conditional image. Will convert to `rgb` if it's `bgr`.
    conditioning_embedding_out_channels (`tuple[int]`, defaults to `(16, 32, 96, 256)`):
        The tuple of output channel for each block in the `controlnet_cond_embedding` layer.
NzePass exactly one of `block_out_channels` (for absolute sizing) or `size_ratio` (for relative sizing).)r   r   r   r   r   rX   r   r   rY   r   r   rW   r[   rV   r\   )r   configr   ro   attention_head_dimrY   r   r   rW   r[   ri   r\   todtype)clsr   r   r   rX   r   r   r   r   r   
fixed_sizerelative_sizebmodels                 r0   	from_unetControlNetXSAdapter.from_unet  s   R (t3
"$.*w 
 0pQUQ\Q\QoQo3pQoAC4GQo3p&"&++"@"@"7'A0S1!5 31$(KK$B$B $ ? ?![[99//)-)Q)Q![[99 $ ; ;"&++"C"C
& 	5 4qs   Ec                     [        S5      e)NzA ControlNetXSAdapter cannot be run by itself. Use it together with a UNet2DConditionModel to instantiate a UNetControlNetXSModel.)r   )r>   argskwargss      r0   forwardControlNetXSAdapter.forward  s     Q
 	
r/   )r   r   r   r   r   r   r   )r   r          rM            ?Fr   r      r   r   i@  i     r      r   r   r   DownBlock2DrM   r!   Tr   T)NNNFr   r   r   r   )r(   r)   r*   r+   r,   r
   ro   strr   floatboolr<   classmethodr    r   r   r   r.   rC   rD   s   @r0   r   r      s   ,\  &'*/:K$'%*01)7.D#'(
 #%9:!%#%&*+F"F %(F .33Z	F
 "F #F !5:-F "#JF "'sF !F  *F" 4Z#F$ '*E#J&6%F& 'F( !)F*  $+F FP  $(/304%*"%%&*/:KJ"J DLJ !I,	J
 "#Y-J #J  J  #J %(J .33ZJ JX
 
r/   r   c            .         ^  \ rS rSrSrSr\                      S:S\S-  S\\	   S\\	   S\\   S	\S-  S
\\\   -  S\\\   -  S\\\   -  S\	S-  S\S-  S\
S\
S\S-  S\S-  S\S\S\\   S\	S\
S\\   S\\\   -  S\4,U 4S jjj5       r\     S;S\S\S-  S\S-  S\\   S-  S\S-  S\S-  4S  jj5       rS<S" jrS# rS$\S%\S&\S'\4S( jrS) rS* rS+ r         S=S,\S-\R2                  \-  \-  S.\R2                  S/\R2                  S-  S0\S-  S1\R2                  S-  S2\R2                  S-  S3\R2                  S-  S4\\	\4   S-  S5\\	\R2                  4   S-  S6\
S7\
S!\\-  4S8 jjrS9rU =r$ )>UNetControlNetXSModeli  a  
A UNet fused with a ControlNet-XS adapter model

This model inherits from [`ModelMixin`] and [`ConfigMixin`]. Check the superclass documentation for it's generic
methods implemented for all models (such as downloading or saving).

`UNetControlNetXSModel` is compatible with StableDiffusion and StableDiffusion-XL. It's default parameters are
compatible with StableDiffusion.

It's parameters are either passed to the underlying `UNet2DConditionModel` or used exactly like in
`ControlNetXSAdapter` . See their documentation for details.
TNr   r   up_block_typesr   ri   rY   rW   rX   addition_embed_typeaddition_time_embed_dimr[   r\   time_cond_proj_dim%projection_class_embeddings_input_dimr   ctrl_conditioning_channels(ctrl_conditioning_embedding_out_channelsctrl_conditioning_channel_orderctrl_learn_time_embeddingctrl_block_out_channelsctrl_num_attention_headsctrl_max_norm_num_groupsc                   > [         T0U ]  5         US:  d  US:  a  [        S5      eUS:  a  U(       d  [        S5      eU	b  U	S:w  a  [        S5      e[        U[        [
        45      (       d  U/[        U5      -  n[        U[        [
        45      (       d  U/[        U5      -  n[        U[        [
        45      (       d  U/[        U5      -  n[        U[        [
        45      (       d  U/[        U5      -  nUnSU l        [        R                  " SUS   SSS	9U l
        [        US   UUS
9U l        [        R                  " SUS   SSS	9U l        [        US   US   5      U l        US   nUS   S-  n[!        US   SSS9U l        [%        UUUS9U l        U(       a  [%        UUS9U l        OS U l        U	c  S U l        S U l        O![!        U
SSS9U l        [%        UU5      U l        / nUS   nUS   n[/        U5       Ha  u  nnUnUU   nUn UU   nSU;   n!U[        U5      S-
  :H  n"UR1                  [3        UUU UUUUU!UU   UU   UU   UU   U"(       + UUS95        Mc     [5        US   US   UUUUS   US   US   US   UUS9U l        / n#[	        [9        U5      5      n$[	        [9        U5      5      n%[	        [9        U5      5      n&US   /n'[/        U5       H1  u  nn(U[        U5      S-
  :  a  SOSn)U'R;                  U(/U)-  5        M3     [	        [9        U5      5      n*U*S   n([/        U5       H  u  nn+U(n,U*U   n(U*[=        US-   [        U5      S-
  5         n-[?        S5       V.s/ s H  n.U'RA                  5       PM     n/n.SU+;   n!U[        U5      S-
  :H  n"U#R1                  [C        U-U(U,U/UUU!U$U   U%U   U&U   U"(       + UUUS95        M     [        RD                  " U5      U l#        [        RD                  " U#5      U l$        [        RJ                  " US   US9U l&        [        RN                  " 5       U l(        [        R                  " US   SSSS	9U l)        g s  sn.f )Nr   r!   z1`time_embedding_mix` needs to be between 0 and 1.zKTo use `time_embedding_mix` < 1, `ctrl_learn_time_embedding` must be `True`	text_timezAs `UNetControlNetXSModel` currently only supports StableDiffusion and StableDiffusion-XL, `addition_embed_type` must be `None` or `'text_time'`.r   r   r   r   T)flip_sin_to_cosdownscale_freq_shift)cond_proj_dim)rb   time_embed_dimr   rQ   rR   rS   rT   rU   ri   r   rt   rW   base_num_attention_headsr   rY   rZ   r[   r\   r   rx   ry   rU   ri   r   rW   r   r   rY   r[   r\   r   )rb   rc   r~   r   rU   resolution_idxrt   rW   rX   rY   add_upsampler[   ri   r\   )num_channels
num_groups)*r;   r<   r   rn   r   r   r   rb   r   rB   base_conv_inr"   r   ctrl_conv_inrr   r   r   base_time_projr   base_time_embeddingctrl_time_embeddingbase_add_time_projbase_add_embeddingr   rq    ControlNetXSCrossAttnDownBlock2DControlNetXSCrossAttnMidBlock2Dr   r   r   minrp   r   ControlNetXSCrossAttnUpBlock2DrA   r   	up_blocks	GroupNormbase_conv_norm_outSiLUbase_conv_actbase_conv_out)1r>   r   r   r   r   ri   rY   rW   rX   r   r   r[   r\   r   r   r   r   r   r   r   r   r   r   r   time_embed_input_dimr   r   rR   rT   ru   r   rQ   rS   rt   r   r    rev_transformer_layers_per_blockrev_num_attention_headsrev_cross_attention_dimr   rc   r   reversed_block_out_channelsup_block_typer~   rb   r   r   r?   s1                                                   r0   r<   UNetControlNetXSModel.__init__  s?   B 	!%7!%;PQQ!*Cjkk*/Bk/Q d  6uFF,H+ICP`La+a(-e}==#6"7#>N:O"O-e}==#6"7#>N:O"O2T5MBB(@'ACHXDY'Y$#6  IIa);A)>AWXY)H,CA,FG"<*
&
 IIa)@)CQR\]^+9:QRS:TVhijVk+l(  2!4+A.2'(:1(=tjkl#4 ,$
 
 %'80(D$ (,D$&&*D#&*D#&/0GY]tu&vD#&78]_m&nD# .q13A6"+,<"=A0 21 50 7 :'?:M#&6"7!";;N0%5&7%5&7"0$3-E"/1Ma1P-Ea-H-Ea-H(;A(>'5#5%5*? #>: 9,R01"5(+%=)Eb)I%=b%A%=b%A 3B 7-"7
 	+/9U0V+W("&x0H'I"J"&x0C'D"E 6a89()@AOA|4599q   %%|n7J&JK	  B '+84F+G&H#215 ). 9A}".6q9L5c!a%EWAX[\A\6]^KEJ1X"NX#5#9#9#;X"N'=8M#&8"9A"==N. +!-(;':"0#$"/1QRS1T(?(B(?(B%3!3%5$3*? !:8 ==5y1"$,,<Nq<Q^m"nWWYYY'9!'<aQXYZ; #Os   3Q)r   
controlnetr   ctrl_optional_kwargsc                    Uc  [         R                  " XU40 UD6nO$[        S X4XV4 5       5      (       a  [        S5      e/ SQnUR                  R                  5        VV	s0 s H  u  pX;   d  M  X_M     nnn	UR                  R                  US'   / SQn
UR                  R                  5        VV	s0 s H  u  pX;   d  M  SU-   U	_M     n
nn	UR                  R                  U
S'   U R                  0 UEU
E5      n/ SQnU H8  n[        US	U-   5      R                  [        X5      R                  5       5        M:     S
S/nU HX  n[        X5      (       d  M  [        X5      c  M#  [        US	U-   5      R                  [        X5      R                  5       5        MZ     UR                  R                  UR                  R                  5       5        UR                  R                  UR                  R                  5       5        UR                   b3  UR"                  R                  UR                   R                  5       5        UR$                  R                  UR$                  R                  5       5        [&        R(                  " S [+        UR,                  UR,                  5       5       5      Ul        [.        R1                  UR2                  UR2                  5      Ul        [&        R(                  " S [+        UR4                  UR6                  5       5       5      Ul        UR9                  UR:                  5        U$ s  sn	nf s  sn	nf )a^  
Instantiate a [`UNetControlNetXSModel`] from a [`UNet2DConditionModel`] and an optional [`ControlNetXSAdapter`]
.

Parameters:
    unet (`UNet2DConditionModel`):
        The UNet model we want to control.
    controlnet (`ControlNetXSAdapter`):
        The ControlNet-XS adapter with which the UNet will be fused. If none is given, a new ControlNet-XS
        adapter will be created.
    size_ratio (float, *optional*, defaults to `None`):
        Used to construct the controlnet if none is given. See [`ControlNetXSAdapter.from_unet`] for details.
    ctrl_block_out_channels (`list[int]`, *optional*, defaults to `None`):
        Used to construct the controlnet if none is given. See [`ControlNetXSAdapter.from_unet`] for details,
        where this parameter is called `block_out_channels`.
    time_embedding_mix (`float`, *optional*, defaults to None):
        Used to construct the controlnet if none is given. See [`ControlNetXSAdapter.from_unet`] for details.
    ctrl_optional_kwargs (`Dict`, *optional*, defaults to `None`):
        Passed to the `init` of the new controlnet if no controlnet was given.
c              3   (   #    U  H  oS Lv   M
     g 7fr:   r'   ).0os     r0   	<genexpr>2UNetControlNetXSModel.from_unet.<locals>.<genexpr>  s      'v!'vs   zWhen a controlnet is passed, none of these parameters should be passed: size_ratio, ctrl_block_out_channels, time_embedding_mix, ctrl_optional_kwargs.)r   r   r   r   ri   rY   rW   r   r   r[   r\   r   r   rX   )r   r   r   r   r   rX   rV   ctrl_r   )r   r   conv_norm_outconv_outbase_add_time_projadd_embeddingc              3   P   #    U  H  u  p[         R                  X5      v   M     g 7fr:   )r   from_modulesr  r   cs      r0   r  r  6  s'      *
E -99!??E   $&c              3   P   #    U  H  u  p[         R                  X5      v   M     g 7fr:   )r   r  r  s      r0   r  r  ;  s'      (
F +77==Fr  )r   r   anyr   r   itemsr   r   from_configgetattrload_state_dict
state_dicthasattrr   r   r   r   r   r   r   rA   zipr   r   r  r   r   r   r   r   )r   r   r
  r   r   r   r  params_for_unetkvparams_for_controlnetr   modules_from_unetmoptional_modules_from_unets                  r0   r   UNetControlNetXSModel.from_unet  s   < ,66"9=QJ  (2M_'v   ! m 

 -1KK,=,=,?X,?DA1CW414,?X151O1O-.!
 =G<M<M<S<S<U t<UDAYZYs1a<U t6@6G6G6Z6Z23  L? L6K LM
 #AE7Q;'778H8S8S8UV # &
" ,AtGD$4$@w{+;;GD<L<W<W<YZ ,
 	''77
8\8\8g8g8ij**:+=+=+H+H+JK$$0%%55j6O6O6Z6Z6\]))99*:`:`:k:k:mn MM *
D,,j.D.DE*
 
 :FFt~~WaWkWkl-- (
DNNJ,E,EF(
 
 	w Y !us   "M1M4M
Mreturnc                    U R                  5        H
  nSUl        M     / SQnU Vs/ s H  n[        X5      c  M  [        X5      PM     nnU H!  nUR                  5        H
  nSUl        M     M#     U R                   H  nUR	                  5         M     U R
                  R	                  5         U R                   H  nUR	                  5         M     gs  snf )Freeze the weights of the parts belonging to the base UNet2DConditionModel, and leave everything else unfrozen for fine
tuning.T)r   r   r   r   r   r   r  r  NF)
parametersrequires_gradr!  r   freeze_base_paramsr   r   )r>   param
base_partspartdus         r0   freeze_unet_params(UNetControlNetXSModel.freeze_unet_paramsE  s     __&E"&E '	

 7AdjdGDDW)gd)j
dD*&+# +  !!A  " "))+A  "   es
   CCc           	      ~   [        S U R                  R                  5        5       5      (       a  [        5       nOr[        S U R                  R                  5        5       5      (       a  [	        5       nO8[        S[        [        U R                  R                  5       5      5       35      eU R                  U5        g)zU
Disables custom attention processors and sets the default attention implementation.
c              3   F   #    U  H  oR                   [        ;   v   M     g 7fr:   )r?   r   r  procs     r0   r  CUNetControlNetXSModel.set_default_attn_processor.<locals>.<genexpr>g  s     iKh4~~!>>Kh   !c              3   F   #    U  H  oR                   [        ;   v   M     g 7fr:   )r?   r   r=  s     r0   r  r?  i  s     hJg$#==Jgr@  zOCannot call `set_default_attn_processor` when attention processors are of type N)	allattn_processorsvaluesr   r   r   nextiterset_attn_processor)r>   	processors     r0   set_default_attn_processor0UNetControlNetXSModel.set_default_attn_processorc  s     i4K_K_KfKfKhiii,.Ih$J^J^JeJeJghhh%Iabfgklp  mA  mA  mH  mH  mJ  hK  cL  bM  N  		*r/   s1s2b1b2c                     [        U R                  5       H9  u  pV[        USU5        [        USU5        [        USU5        [        USU5        M;     g)a  Enables the FreeU mechanism from https://huggingface.co/papers/2309.11497.

The suffixes after the scaling factors represent the stage blocks where they are being applied.

Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of values that
are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.

Args:
    s1 (`float`):
        Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
        mitigate the "oversmoothing effect" in the enhanced denoising process.
    s2 (`float`):
        Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
        mitigate the "oversmoothing effect" in the enhanced denoising process.
    b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
    b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
rK  rL  rM  rN  N)r   r   setattr)r>   rK  rL  rM  rN  ru   upsample_blocks          r0   enable_freeu"UNetControlNetXSModel.enable_freeus  sJ    $ "+4>>!:AND"-ND"-ND"-ND"-	 ";r/   c                     1 Skn[        U R                  5       H9  u  p#U H.  n[        X45      (       d  [        X4S5      c  M"  [	        X4S5        M0     M;     g)zDisables the FreeU mechanism.>   rM  rN  rK  rL  N)r   r   r$  r!  rP  )r>   
freeu_keysru   rQ  r'  s        r0   disable_freeu#UNetControlNetXSModel.disable_freeu  sH    -
!*4>>!:A>--D1Q1]Nt4   ";r/   c                    SU l         U R                  R                  5        H3  u  pS[        UR                  R
                  5      ;   d  M*  [        S5      e   U R                  U l         U R                  5        H)  n[        U[        5      (       d  M  UR                  SS9  M+     U R                  [        5       5        g)u   
Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query, key, value)
are fused. For cross-attention modules, key and value projection matrices are fused.

> [!WARNING] > This API is 🧪 experimental.
NAddedzQ`fuse_qkv_projections()` is not supported for models having added KV projections.T)fuse)original_attn_processorsrC  r  r   r?   r(   r   modulesrn   r   fuse_projectionsrG  r   )r>   r   attn_processormodules       r0   fuse_qkv_projections*UNetControlNetXSModel.fuse_qkv_projections  s     )-%!%!5!5!;!;!=A#n66??@@ !tuu "> )-(<(<%llnF&),,''T'2 % 	 5 78r/   c                 V    U R                   b  U R                  U R                   5        gg)u^   Disables the fused QKV projection if enabled.

> [!WARNING] > This API is 🧪 experimental.

N)r[  rG  )r>   s    r0   unfuse_qkv_projections,UNetControlNetXSModel.unfuse_qkv_projections  s)     ((4##D$A$AB 5r/   r&   timestepencoder_hidden_statescontrolnet_condconditioning_scaleclass_labelstimestep_condattention_maskcross_attention_kwargsadded_cond_kwargsreturn_dictapply_controlc                 
   U R                   R                  S:X  a  [        R                  " US/S9nUb2  SUR	                  UR
                  5      -
  S-  nUR                  S5      nUn[        R                  " U5      (       d  UR                  R                  S:H  nUR                  R                  S:H  n[        U[        5      (       a/  U(       d  U(       a  [        R                  O[        R                  nO.U(       d  U(       a  [        R                  O[        R                  n[        R                   " U/UUR                  S9nO7[#        UR$                  5      S	:X  a  US   R	                  UR                  5      nUR'                  UR$                  S	   5      nU R)                  U5      nUR	                  UR
                  S
9nU R                   R*                  (       aS  U(       aL  U R-                  UU5      nU R/                  UU5      nU R                   R0                  S-  nUU-  USU-
  -  -   nOU R/                  U5      nSnU R                   R2                  c  GOU R                   R2                  S:X  a  SU
;  a  [5        U R6                   S35      eU
R9                  S5      nSU
;  a  [5        U R6                   S35      eU
R9                  S5      nU R;                  UR=                  5       5      nUR?                  UR$                  S	   S45      n[        R@                  " UU/SS9nUR	                  UR
                  5      nU RC                  U5      nO#[5        SU R                   R2                   S35      eUb  UU-   OUnUnU=nn/ / nnU RE                  U5      n U RG                  U5      nU RI                  U5      nU b  UU -  nU(       a  UU RK                  U5      U-  -   nURM                  U5        URM                  U5        U RN                   H7  n!U!" UUUUUU	UUS9u  nnn"n#URQ                  U"5        URQ                  U#5        M9     U RS                  UUUUUU	UUS9u  nnU RT                   H>  n$[#        U$RV                  5      n%UU%* S n&UU%* S n'USU%*  nUSU%*  nU$" UU&U'UUUU	UUS9	nM@     U RY                  U5      nU R[                  U5      nU R]                  U5      nU(       d  U4$ [_        US9$ )ap  
The [`ControlNetXSModel`] forward method.

Args:
    sample (`Tensor`):
        The noisy input tensor.
    timestep (`torch.Tensor | float | int`):
        The number of timesteps to denoise an input.
    encoder_hidden_states (`torch.Tensor`):
        The encoder hidden states.
    controlnet_cond (`Tensor`):
        The conditional input tensor of shape `(batch_size, sequence_length, hidden_size)`.
    conditioning_scale (`float`, defaults to `1.0`):
        How much the control model affects the base model outputs.
    class_labels (`torch.Tensor`, *optional*, defaults to `None`):
        Optional class labels for conditioning. Their embeddings will be summed with the timestep embeddings.
    timestep_cond (`torch.Tensor`, *optional*, defaults to `None`):
        Additional conditional embeddings for timestep. If provided, the embeddings will be summed with the
        timestep_embedding passed through the `self.time_embedding` layer to obtain the final timestep
        embeddings.
    attention_mask (`torch.Tensor`, *optional*, defaults to `None`):
        An attention mask of shape `(batch, key_tokens)` is applied to `encoder_hidden_states`. If `1` the mask
        is kept, otherwise if `0` it is discarded. Mask will be converted into a bias, which adds large
        negative values to the attention scores corresponding to "discard" tokens.
    cross_attention_kwargs (`dict[str]`, *optional*, defaults to `None`):
        A kwargs dictionary that if specified is passed along to the `AttnProcessor`.
    added_cond_kwargs (`dict`):
        Additional conditions for the Stable Diffusion XL UNet.
    return_dict (`bool`, defaults to `True`):
        Whether or not to return a [`~models.controlnets.controlnet.ControlNetOutput`] instead of a plain
        tuple.
    apply_control (`bool`, defaults to `True`):
        If `False`, the input is run only through the base model.

Returns:
    [`~models.controlnetxs.ControlNetXSOutput`] **or** `tuple`:
        If `return_dict` is `True`, a [`~models.controlnetxs.ControlNetXSOutput`] is returned, otherwise a
        tuple is returned where the first element is the sample tensor.
r   r!   )dimsNg     mpsnpu)r   devicer   )r   g333333?r   text_embedsz has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `text_embeds` to be passed in `added_cond_kwargs`time_idsz has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`r   dimzgControlNet-XS currently only supports StableDiffusion and StableDiffusion-XL, so addition_embed_type = z is currently not supported.)hidden_states_basehidden_states_ctrltembrf  rh  rl  rk  ro  )	hidden_statesres_hidden_states_tuple_baseres_hidden_states_tuple_ctrlr{  rf  rh  rl  rk  ro  )r&   )0r   r   torchflipr   r   	unsqueeze	is_tensorrt  typern   r   float32float64int32int64tensorr   shapeexpandr   r   r   r   r   r   r   r?   getr   flattenreshapeconcatr   r   r   r   r   rq   r   r   r   r   r4   r   r  r  r$   )(r>   r&   re  rf  rg  rh  ri  rj  rk  rl  rm  rn  ro  	timestepsis_mpsis_npur   t_emb	ctrl_temb	base_tembinterpolation_paramr{  aug_embru  rv  time_embeds
add_embedscembh_ctrlh_basehs_basehs_ctrlguided_hintdownresidual_hbresidual_hcup	n_resnetsskips_hbskips_hcs(                                           r0   r   UNetControlNetXSModel.forward  s   p ;;66%?#jjsCO %."3"3FLL"AAXMN+55a8N 	y)) ]]''50F]]''50F(E***0F(.&u{{i[fmmTI!Q&!$**6==9I $$V\\!_5	##I.
 v||,;;00]00FI00FI"&++"@"@#"E22Y!FYBY5ZZD++E2D ;;**2[[,,;$55 ~~&  '{  |  ,//>K!22 ~~&  'x  y  ),,Z8H11(2B2B2DEK%--{/@/@/CR.HIK{K&@bIJ#tzz2J--j9Gyz~  {F  {F  {Z  {Z  z[  [w  x  ")!4tg~$ % ! r 44_E ""6*""6*"k!Fd>>vFI[[[Fvv$$D7;#)#)&*#5'=-+	84FFK NN;'NN;' % %%"&1#9)' ( 	
 ..BBJJI	z{+H	z{+Hk	z*Gk	z*G$-5-5&*#5'=-+
F !& ((0##F+##F+9!00r/   )r   r   r  r   r   r  r   r   r   r   r   r   r   rb   r   r[  r   )rM   r   )	UpBlock2Dr   r   r   r   r   r   r!   r   NNTTNNr   r   r   r   Fr   r   r   )NNNNNr.  N)	Nr   NNNNNTT) r(   r)   r*   r+   r,    _supports_gradient_checkpointingr
   ro   r   r   r   r   r<   r   r    r   r   dictr   r9  rI  rR  rV  r`  rc  r   r  r   r$   r   r.   rC   rD   s   @r0   r   r     s    (,$ #%(
 &u)?&(049:01*..2!%&*)-<@$'*+?P/4*/.<56(*=|[ 4Z|[  *	|[ c
|[ "#J|[ t|[ !5:-|[ '*E#J&6|[ !5:-|[  !4Z!|[" "%t#|[$ %|[&  $'|[(  $J)|[* 03Tz+|[. "/|[0 %(1|[2 38*3|[4 *-5|[6 $(7|[8 "'s9|[: #&c
"2;|[< #&=|[ |[|  26#'6:+/,0t"t ($.t DL	t
 "&et!3t "DLt #Tkt tl#<+ .u .% .U . .259,C 04+.,0-1.28<<@ "J1J1 ,,&,J1  %||	J1
 ,J1 "DLJ1 llT)J1 ||d*J1 t+J1 !%S#X 5J1  U\\ 12T9J1 J1 J1 
e	#J1 J1r/   r   c                   f  ^  \ rS rSr          S#S\S\S\S\S\S\S\S	\\\   -  S
-  S\S
-  S\S
-  S\S
-  S\S\S
-  S\S
-  4U 4S jjjr\S\	S\
4S j5       rS$S jr       S%S\S\S\S
-  S\S
-  S\S
-  S\S
-  S\\\4   S
-  S\S
-  S\S\\\\\S 4   \\S 4   4   4S! jjrS"rU =r$ )&r   i  rQ   rR   rS   rT   rU   ri   r   rW   Nr   r   rY   rZ   r[   r\   c                   > [         TU ]  5         / n/ n/ n/ n/ n/ nSn[        U	[        5      (       a  U	/U-  n	[	        U5       H  nUS:X  a  UOUnUS:X  a  UOUnUR                  [        X5      5        UR                  [        UUUUS95        UR                  [        X1-   UU[        X1-   US9[        XGS9SS95        U(       aO  UR                  [        U
X*-  UU	U   UUUUS95        UR                  [        UXK-  UU	U   UUU[        XGS9S95        UR                  [        XB5      5        M     U(       aY  UR                  [        X"5      5        [        USUS	S
9U l        [        XB-   SUS	S
9U l        UR                  [        XB5      5        OS U l        S U l        [        R                  " U5      U l        [        R                  " U5      U l        U(       a  [        R                  " U5      OS /U-  U l        U(       a  [        R                  " U5      OS /U-  U l        [        R                  " U5      U l        [        R                  " U5      U l        SU l        g )Nr   r   rb   rc   rU   rd   r^   r`   ra   rg   Trj   rk   F)r;   r<   rn   ro   rp   rq   rr   r   rs   r   r   base_downsamplersctrl_downsamplersr   rA   base_resnetsctrl_resnetsbase_attentionsctrl_attentionsr5   r6   gradient_checkpointing)r>   rQ   rR   rS   rT   rU   ri   r   rt   rW   r   r   rY   rZ   r[   r\   r  r  r  r  r6   r5   rh   ru   r?   s                           r0   r<   )ControlNetXSCrossAttnDownBlock2D.__init__  s   $ 	
2C88,H+IJ+V(z"A346/?P346/?P /? RS 0!2"/*	  0 C!2"/.(;H`  33Dj	 &&&0)E$5#?#B,?.C)9(7	  &&&0)E$5#?#B,?.C)9(;<M(s	 /@ TUs #v  /@ TU%1!D?PW[&D" &2!5Sdko&D" /@ TU%)D"%)D"MM,7MM,7ANr}}_=UYTZ]gTgANr}}_=UYTZ]gTgMM,7MM,7&+#r/   base_downblockctrl_downblockc                    S nUR                   S   R                  nUR                   S   R                  nUR                   S   R                  U-
  nUR                   S   R                  nUR                   S   R                  R                  nUR                   S   R
                  R                  n	UR                   S   R
                  R                  n
[        US5      (       a  Sn[        UR                  S   R                  5      nU" U5      R                  nU" U5      R                  nU" U5      R                  nU" U5      R                  nUR                  S   R                  nOSnS nS nS nS nS nS nUR                  S LnU " UUUUUU	U
UUUUUUUUS9nUR                   R#                  UR                   R%                  5       5        UR&                  R#                  UR                   R%                  5       5        U(       af  UR(                  R#                  UR                  R%                  5       5        UR*                  R#                  UR                  R%                  5       5        U(       ai  UR,                  R#                  UR                  S   R%                  5       5        UR.                  R#                  UR                  R%                  5       5        UR0                  R#                  UR0                  R%                  5       5        UR2                  R#                  UR2                  R%                  5       5        U$ )Nc                 N    U R                   S   R                  S   R                  $ Nr   r7   transformer_blocksattn2blocks    r0   get_first_cross_attentionPControlNetXSCrossAttnDownBlock2D.from_modules.<locals>.get_first_cross_attention  $    ##A&99!<BBBr/   r   r7   TFr   )r4   rb   rc   time_emb_projin_featuresnorm1r   r$  r   r7   r  headsrY   r[   r\   r=   r  r"  r#  r  r  r  r  r  r5   r6   )r   r  r  r  rQ   rR   rS   rT   rU   r   ctrl_num_groupsrt   rW   r   r   rY   r[   r\   rZ   r   s                       r0   r  -ControlNetXSCrossAttnDownBlock2D.from_modules  s   	C *11!4@@*2215BB""1%114DD 	 +2215BB&..q1??KK#++A.44??
(00399DD><00 M+.~/H/H/K/^/^+_('@'P'V'V$'@'P'V'V$";N"K"_"_8HYY$2$=$=a$@$V$V!!M+/('+$'+$"&#$(!'44D@ -/-/'&%4')E%=%= 3)-"7
& 	**>+A+A+L+L+NO**>+A+A+L+L+NO!!11.2K2K2V2V2XY!!11.2K2K2V2V2XY##33N4O4OPQ4R4]4]4_`##33N4O4O4Z4Z4\]**>+F+F+Q+Q+ST**>+F+F+Q+Q+STr/   r.  c                    U R                  5        H
  nSUl        M     U R                  /n[        U R                  [
        R                  5      (       a  UR                  U R                  5        U R                  b  UR                  U R                  5        U H!  nUR                  5        H
  nSUl        M     M#     gr0  TNF)	r1  r2  r  rn   r  r   rA   rq   r  r>   r4  r5  r6  s       r0   r3  3ControlNetXSCrossAttnDownBlock2D.freeze_base_params:  s     __&E"&E ' ''(
d**BMM::d223!!-d445D*&+# + r/   ry  r{  rf  rz  rh  rk  rl  encoder_attention_maskro  .c
           
         Ub(  UR                  SS 5      b  [        R                  S5        Un
UnSnSn[        [	        U R
                  U R                  5      5      n[        [	        U R                  U R                  5      5      n[	        XU R                  U R                  5       GH  u  u  nnu  nnnnU	(       a  [        R                  " UU" U
5      /SS9n[        R                  " 5       (       a$  U R                  (       a  U R                  UX5      n
OU" X5      n
Ub  U" U
UUUUSS9S   n
U	(       aW  [        R                  " 5       (       a$  U R                  (       a  U R                  UX5      nOU" X5      nUb  U" UUUUUSS9S   nU	(       a  U
U" U5      U-  -   n
X4-   nX4-   nGM
     U R                   b  U R                  S	   nU R                  S	   nU	(       a  [        R                  " UU" U
5      /SS9nU R!                  U
5      n
U	(       a  U R#                  U5      nU	(       a  U
U" U5      U-  -   n
X4-   nX4-   nXX4$ )
NscaleSPassing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.r'   r!   rw  Frf  rl  rk  r  rn  r   r   )r  loggerwarningr   r%  r  r  r  r  r5   r6   r  catis_grad_enabledr  _gradient_checkpointing_funcr  r  )r>   ry  r{  rf  rz  rh  rk  rl  r  ro  r  r  base_output_statesctrl_output_statesbase_blocksctrl_blocksb_resb_attnc_resc_attnb2cc2bs                         r0   r   (ControlNetXSCrossAttnDownBlock2D.forwardK  si    "-%))'48Dtu##3t00$2F2FGH3t00$2F2FGH:=d&7&79J9J;
6OUF_eVc3 FCK#8a@ $$&&4+F+F::5&Ov,!*?+A#1+A %  ((**t/J/J!>>ufSF"60F%#.C/E'5/E$) F #f+0B"BB!3i!?!3i!?Y;
\ !!-##B'C##B'C FCK#8a@++F3F//7#f+0B"BB!3i!?!3i!?1EEr/   )	r  r  r  r5   r  r  r  r6   r  )
r   r   Tr!   r!   r!   r   TFTr  )NNr   NNNT)r(   r)   r*   r+   ro   r   r   r<   r   r   r2   r  r3  r   r   r  r   r   r   r.   rC   rD   s   @r0   r   r     s     "(*@A/0/0*.#(--1!t,t, t, 	t,
 t, t, t, #&t, '*E#J&6&=t, #&*t, #&*t, !4Zt, t, +t,   $d{!t, t,l ?*> ?Pl ? ?B,* 04,0+.(,8<04"ZF"ZF ZF  &}	ZF
 #TMZF "DLZF ZF !%S#X 5ZF !'ZF ZF 
vvuVS[153EE	FZF ZFr/   r   c                   &  ^  \ rS rSr         SS\S\S\S-  S\S\S\S	\S-  S
\S-  S\S-  S\S\S-  4U 4S jjjr\S\S\	4S j5       r
S S jr      S!S\S\S\S\S-  S\S-  S\\\4   S-  S\S-  S\S-  S\S\\\4   4S jjrSrU =r$ )"r   i  Nrx   ry   rU   ri   r   rW   r   r   rY   r[   r\   c                    > [         TU ]  5         [        X5      U l        [	        UUUUU	UUU
S9U l        [	        UX!-   UU[        [        X"U-   5      U5      U	UUU
S9	U l        [        X!5      U l	        SU l
        g )N)rW   rb   rU   r|   rY   rX   r\   r[   r{   F)r;   r<   rr   r5   r   base_midblockrs   r   ctrl_midblockr6   r  )r>   rx   ry   rU   ri   r   rW   r   r   rY   r[   r\   r?   s               r0   r<   (ControlNetXSCrossAttnMidBlock2D.__init__  s     	 +=H4)E%') 3 8"7-	
 5)E%5&'-M=#@AC[ !4 8"7-
" +=H&+#r/   r  r  c                    UR                   nUR                  nUR                  nS nUR                  nUR                  n[        UR                  S   R                  5      nUR                  S   R                  R                  n	UR                  S   R                  R                  n
UR                  S   R                  R                  nU" U5      R                  nU" U5      R                  nU" U5      R                  nU" U5      R                  nUR                  S   R                   nU " UUU	U
UUUUUUUS9nUR                   R#                  UR%                  5       5        UR&                  R#                  UR%                  5       5        UR(                  R#                  UR%                  5       5        UR                  R#                  UR%                  5       5        U$ )Nc                 N    U R                   S   R                  S   R                  $ r  r  )rH   s    r0   r  OControlNetXSCrossAttnMidBlock2D.from_modules.<locals>.get_first_cross_attention  s$    &&q)<<Q?EEEr/   r   r   )r5   r6   rH   rc   rb   r   r7   r  r4   r  r  r  r   r  rY   r[   r\   r"  r#  r  r  )r   r  r  r5   r6   r  rx   ry   rW   rU   r   r  r   r   rY   r[   r\   r   s                     r0   r  ,ControlNetXSCrossAttnMidBlock2D.from_modules  s    %11$11%..	F %11$00'*=+C+CA+F+Y+Y'Z$%--a0>>JJ"**1-33>>
'//288CC#<]#K#Q#Q #<]#K#Q#Q 7FZZ4]CTT - 8 8 ; Q Q '''&%4)E%=%= 3-"7
 	**<+B+B+DE++M,D,D,FG++M,D,D,FG**<+B+B+DEr/   r.  c                     U R                  5        H
  nSUl        M     U R                  R                  5        H
  nSUl        M     g)r0  TFN)r1  r2  r  )r>   r4  s     r0   r3  2ControlNetXSCrossAttnMidBlock2D.freeze_base_params  s@     __&E"&E ' ''224E"'E 5r/   ry  r{  rf  rz  rh  rl  rk  r  ro  c
                 T   Ub(  UR                  SS 5      b  [        R                  S5        Un
UnUUUUUS.nU	(       a%  [        R                  " XR                  U
5      /SS9nU R                  " U
40 UD6n
U	(       a)  U R                  " U40 UD6nXR                  U5      U-  -   n
X4$ )Nr  r  )r{  rf  rk  rl  r  r!   rw  )	r  r  r  r  r  r5   r  r  r6   )r>   ry  r{  rf  rz  rh  rl  rk  r  ro  r  r  
joint_argss                r0   r   'ControlNetXSCrossAttnMidBlock2D.forward  s     "-%))'48Dtu## %:,&<&<

 YY(9(9&(ABJF##F9j9''=*=F//7:LLLF~r/   )r  r5   r  r6   r  )	Nr   r   r!   r!   r!   r   FTr  )Nr   NNNT)r(   r)   r*   r+   ro   r   r<   r   r   rF   r  r3  r   r   r  r   r   r   r   r.   rC   rD   s   @r0   r   r     s}   
 %)!(*,-/0/0*.!&-12,2, 2, Tz	2,
 2, #&2, '*2, #&*2, #&*2, !4Z2, 2,  $d{2, 2,h ... 3. .`	(  -1+.8<(,04"""" "  &	"
 #TM" "DL" !%S#X 5" " !'" " 
vv~	" "r/   r   c                   <  ^  \ rS rSr         S$S\S\S\S\\   S\S\S	\S-  S
\S\S\S\S\S\S-  4U 4S jjjr\S\	S\
4S j5       rS%S jr       S&S\S\\S4   S\\S4   S\S\S-  S\S-  S\\\4   S-  S\S-  S\S-  S \S-  S!\S\4S" jjrS#rU =r$ )'r   i>  Nrb   rc   r~   r   rU   ri   r   rW   rX   rY   r   r[   r\   c                   > [         TU ]  5         / n/ n/ nSnXl        Xl        [	        U	[
        5      (       a  U	/U-  n	[        U5       H  nUUS-
  :X  a  UOUnUS:X  a  UOUnUR                  [        UU   U5      5        UR                  [        UU-   UUUS95        U(       d  M_  UR                  [        U
X*-  UU	U   UUUUS95        M     [        R                  " U5      U l        U(       a  [        R                  " U5      OS /U-  U l        [        R                  " U5      U l        U(       a  [!        USUS9U l        OS U l        SU l        Xpl        g )	Nr   r!   r   r  rg   T)rl   rc   F)r;   r<   has_cross_attentionrX   rn   ro   rp   rq   rr   r   r   r   rA   r4   r7   r6   r   
upsamplersr  r   )r>   rb   rc   r~   r   rU   ri   r   rt   rW   rX   rY   r   r[   r\   r4   r7   r6   rh   ru   res_skip_channelsr   r?   s                         r0   r<   'ControlNetXSCrossAttnUpBlock2D.__init__?  sd   " 	

#0 #6 2C88,H+IJ+V(z"A01Z!^0C,89Q!4L/A!/DFX YZNN 25F F!-"/*	 }!!&+$;$0#?#B,?.C)9(7	! #: }}W-7D"--
34&S]J]MM,7(S_`DO"DO&+#,r/   base_upblockctrl_upblockc                    UR                   nS nUR                  S   R                  nUR                  S   R                  U-
  nUR                  S   R                  U-
  nU Vs/ s H  oR                  PM     n	nUR                  S   R                  R
                  n
UR                  S   R                  R                  nUR                  n[        US5      (       at  Sn[        UR                  S   R                  5      nU" U5      R                  nU" U5      R                  nU" U5      R                  nUR                  S   R                   nOSnS nS nS nS nS nUR"                  S LnU " UUUU	U
UUUUUUUUUS9nUR                  R%                  UR                  R'                  5       5        U(       a3  UR                  R%                  UR                  R'                  5       5        U(       a6  UR"                  R%                  UR"                  S   R'                  5       5        UR                   R%                  UR'                  5       5        U$ s  snf )Nc                 N    U R                   S   R                  S   R                  $ r  r  r  s    r0   r  NControlNetXSCrossAttnUpBlock2D.from_modules.<locals>.get_first_cross_attention  r  r/   r   r   r7   TF)rb   rc   r~   r   rU   ri   r   rt   rW   rX   rY   r   r[   r\   )r6   r4   rc   rb   r  r  r  r   r   r$  r   r7   r  r  rY   r[   r\   r  r"  r#  )r   r  r  ctrl_to_base_skip_connectionsr  rc   rb   prev_output_channelsr  ctrl_skip_channelssrU   r   r   rt   rW   rX   rY   r[   r\   r   r   s                        r0   r  +ControlNetXSCrossAttnUpBlock2D.from_modules  s>   (4(A(A%	C $++A.;;"**2.::\I+33A6BB\Q6ST6S}}6ST$,,Q/==II!))!,22==
%44<.. M+.|/F/Fq/I/\/\+](";L"I"O"O";L"I"]"]8FWW$0$;$;A$>$T$T!!M+/("&"&#$(!#..d: #% 42'&)')E 3 3%-"7
$ 	%%l&:&:&E&E&GH,,\-D-D-O-O-QR,,\-D-DQ-G-R-R-TU**+H+S+S+UV] Us   %Ir.  c                    U R                  5        H
  nSUl        M     U R                  /n[        U R                  [
        R                  5      (       a  UR                  U R                  5        U R                  b  UR                  U R                  5        U H!  nUR                  5        H
  nSUl        M     M#     gr  )	r1  r2  r4   rn   r7   r   rA   rq   r  r  s       r0   r3  1ControlNetXSCrossAttnUpBlock2D.freeze_base_params  s     __&E"&E ' ll^
door}}55doo.??&doo.D*&+# + r/   r|  r}  .r~  r{  rf  rh  rl  rk  upsample_sizer  ro  c           
        ^ ^ Ub(  UR                  SS 5      b  [        R                  S5        [        T SS 5      =(       a5    [        T SS 5      =(       a!    [        T SS 5      =(       a    [        T SS 5      mUU 4S jn[	        T R
                  T R                  T R                  [        U5      [        U5      5       H  u  pnnnU(       a  X" U5      U-  -  nU" UU5      u  nn[        R                  " UU/SS	9n[        R                  " 5       (       a$  T R                  (       a  T R                  XU5      nOU" X5      nUc  M  U" UUUUU
S
S9S   nM     T R                  b  T R                  X5      nU$ )Nr  r  rK  rL  rM  rN  c           
         > T(       aA  [        TR                  U UTR                  TR                  TR                  TR
                  S9$ X4$ )N)rK  rL  rM  rN  )r   r   rK  rL  rM  rN  )r|  
res_h_baseis_freeu_enabledr>   s     r0   maybe_apply_freeu_to_subblockMControlNetXSCrossAttnUpBlock2D.forward.<locals>.maybe_apply_freeu_to_subblock  sI    "''!wwwwwwww  %00r/   r!   rw  Fr  r   )r  r  r  r!  r%  r4   r7   r6   r   r  r  r  r  r  r  )r>   r|  r}  r~  r{  rf  rh  rl  rk  r  r  ro  r  resnetattnr  r  
res_h_ctrlr  s   `                 @r0   r   &ControlNetXSCrossAttnUpBlock2D.forward  s{    "-%))'48Dtu D$% *dD)*dD)* dD)	 		1 :=LLOO1212:
5F#z: Z3E!EE(EmU_(`%M:!II}j&AqIM$$&&4+F+F $ A A&Y] ^ &} ; $!*?+A#1+A %! !':
8 ??& OOMIMr/   )r7   r6   r  r  rX   r4   r   r  )	r   NTr!   r!   r   TFTr  )Nr   NNNNT)r(   r)   r*   r+   ro   r   r   r<   r   r   rL   r  r3  r   r   r   r  r   r   r   r.   rC   rD   s   @r0   r   r   >  s     "%),-#$#'!!&-1E-E- E- !	E-
 !IE- E- E- d
E- '*E- !E- !E- E- E-  $d{E- E-N 8(: 8Jd 8 8t,. 04+.8<(,$(04"GG ',FCK&8G ',FCK&8	G
 G  &}G "DLG !%S#X 5G G TzG !'G G 
G Gr/   r   c           	      @    [        [        R                  " XSSS95      $ )Nr!   r   )r   )zero_moduler   rB   )rb   rc   s     r0   rr   rr     s    ryyAqIJJr/   c                 r    U R                  5        H"  n[        R                  R                  U5        M$     U $ r:   )r1  r   initzeros_)r_  ps     r0   r  r     s*     
q !Mr/   c                 V    UnX :  a  U $ US:w  a  X-  nUS:X  a  U$ US-  nUS:w  a  M  g g )Nr   r!   r'   )numberr_   factorresiduals       r0   rs   rs   &  s>    F
A+?q=M!	 A+r/   )r   Tr!   r!   r   TFT)Nr   r!   r!   r   FTr:   )Bdataclassesr   mathr   typingr   r  r   r   configuration_utilsr	   r
   utilsr   r   utils.torch_utilsr   	attentionr   attention_processorr   r   r   r   r   r   
embeddingsr   r   modeling_utilsr   unets.unet_2d_blocksr   r   r   r   r   r   r   unets.unet_2d_conditionr    r
  r"   
get_loggerr(   r  r$   Moduler2   rF   rL   ro   r   r   rw   r}   r   r   r   r   r   r   r   rr   r  rs   r'   r/   r0   <module>r$     s   "     B ( , &  6 '   ; 7 
		H	% 
 
 
(299 (()")) )) ) ')<=&'&*$))-X!X!X! X! 	X!
 X! tX! #&c
"2T"9X! tX! tX! X! TkX!  $;X!| !%&(()&'&*""& p p p : p t	 p
 #& p t p t p  p   pFPPP S	PH
*nk H
V	1J 	1DdFryy dFN	Sbii Sl[RYY [|Kr/   