
    
3j_                        S SK r S SKJr  S SKrS SKJr  SSKJrJr  SSK	J
r
  SSKJr  \
R                  " \5      r " S S	\5      r " S
 S\\5      r " S S\R$                  5      r " S S\R$                  5      r " S S\R$                  5      r " S S\R$                  5      r " S S\R$                  5      r " S S\R$                  5      r " S S\R$                  5      rg)    N)Callable   )ConfigMixinregister_to_config)logging   )
ModelMixinc                     ^  \ rS rSrSrS\S   4U 4S jjrSS\R                  S\\	   S-  S	\\R                     4S
 jjr
    SS\\R                  -  S\S\S\S\S-  4
S jjr\S\\R                  -  S-  4S j5       rSrU =r$ )MultiAdapter   a}  
MultiAdapter is a wrapper model that contains multiple adapter models and merges their outputs according to
user-assigned weighting.

This model inherits from [`ModelMixin`]. Check the superclass documentation for common methods such as downloading
or saving.

Args:
    adapters (`list[T2IAdapter]`, *optional*, defaults to None):
        A list of `T2IAdapter` model instances.
adapters
T2IAdapterc                 &  > [         [        U ]  5         [        U5      U l        [
        R                  " U5      U l        [        U5      S:X  a  [        S5      e[        U5      S:X  a  [        S5      eUS   R                  nUS   R                  n[        S[        U5      5       H\  nX   R                  U:w  d  X   R                  U:w  d  M)  [        SU SU SU SX   R                   SU S	X   R                   35      e   X l	        X0l
        g )
Nr   zExpecting at least one adapterr   zQFor a single adapter, please use the `T2IAdapter` class instead of `MultiAdapter`zjExpecting all adapters to have the same downscaling behavior, but got:
adapters[0].total_downscale_factor=z
adapters[0].downscale_factor=z

adapter[`z`].total_downscale_factor=z`].downscale_factor=)superr   __init__lennum_adapternn
ModuleListr   
ValueErrortotal_downscale_factordownscale_factorrange)selfr   $first_adapter_total_downscale_factorfirst_adapter_downscale_factoridx	__class__s        R/home/wildlama/miniconda3/lib/python3.13/site-packages/diffusers/models/adapter.pyr   MultiAdapter.__init__)   s$   lD*,x=h/x=A=>>x=Apqq 08{/Q/Q,)1!)E)E&CM*C448\\=115SS ::^9_ `44R3S T  #u$>x}?c?c>d e  #u$89W9W8X	Z  + 'K# >    Nxsadapter_weightsreturnc                    Uc2  [         R                  " SU R                  -  /U R                  -  5      nO[         R                  " U5      nSn[        XU R                  5       He  u  pEnU" U5      nUc(  Un[        [        U5      5       H  nXSU   -  X8'   M     M9  [        [        U5      5       H  nX8==   XWU   -  -  ss'   M     Mg     U$ )a2  
Args:
    xs (`torch.Tensor`):
        A tensor of shape (batch, channel, height, width) representing input images for multiple adapter
        models, concatenated along dimension 1(channel dimension). The `channel` dimension should be equal to
        `num_adapter` * number of channel per image.

    adapter_weights (`list[float]`, *optional*, defaults to None):
        A list of floats representing the weights which will be multiplied by each adapter's output before
        summing them together. If `None`, equal weights will be used for all adapters.
Nr   )torchtensorr   zipr   r   r   )	r   r"   r#   accume_statexwadapterfeaturesis	            r   forwardMultiAdapter.forwardL   s     "#llA0@0@,@+ADDTDT+TUO#ll?;O dmmDMA'qzH#'s<01A&'q/&9LO 2 s8}-A OqA;6O . E r!   save_directoryis_main_processsave_functionsafe_serializationvariantc           	      r    SnUnU R                    H#  nUR                  UUUUUS9  US-  nUSU 3-   nM%     g)a  
Save a model and its configuration file to a specified directory, allowing it to be re-loaded with the
`[`~models.adapter.MultiAdapter.from_pretrained`]` class method.

Args:
    save_directory (`str` or `os.PathLike`):
        The directory where the model will be saved. If the directory does not exist, it will be created.
    is_main_process (`bool`, optional, defaults=True):
        Indicates whether current process is the main process or not. Useful for distributed training (e.g.,
        TPUs) and need to call this function on all processes. In this case, set `is_main_process=True` only
        for the main process to avoid race conditions.
    save_function (`Callable`):
        Function used to save the state dictionary. Useful for distributed training (e.g., TPUs) to replace
        `torch.save` with another method. Can also be configured using`DIFFUSERS_SAVE_MODE` environment
        variable.
    safe_serialization (`bool`, optional, defaults=True):
        If `True`, save the model using `safetensors`. If `False`, save the model with `pickle`.
    variant (`str`, *optional*):
        If specified, weights are saved in the format `pytorch_model.<variant>.bin`.
r   )r2   r3   r4   r5   r   _N)r   save_pretrained)	r   r1   r2   r3   r4   r5   r   model_path_to_saver,   s	            r   r8   MultiAdapter.save_pretrainedi   s[    8 +}}G##" /+#5 $  1HC!3#i!? %r!   pretrained_model_pathc                    Sn/ nUn[         R                  R                  U5      (       a[  [        R                  " U40 UD6nUR                  U5        US-  nUSU 3-   n[         R                  R                  U5      (       a  M[  [        R                  [        U5       SU S35        [        U5      S:X  a2  [        S[         R                  R                  U5       SUS-    S35      eU " U5      $ )	ag  
Instantiate a pretrained `MultiAdapter` model from multiple pre-trained adapter models.

The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated). To train
the model, set it back to training mode using `model.train()`.

Warnings:
    *Weights from XXX not initialized from pretrained model* means that the weights of XXX are not pretrained
    with the rest of the model. It is up to you to train those weights with a downstream fine-tuning. *Weights
    from XXX not used in YYY* means that the layer XXX is not used by YYY, so those weights are discarded.

Args:
    pretrained_model_path (`os.PathLike`):
        A path to a *directory* containing model weights saved using
        [`~diffusers.models.adapter.MultiAdapter.save_pretrained`], e.g., `./my_model_directory/adapter`.
    torch_dtype (`torch.dtype`, *optional*):
        Override the default `torch.dtype` and load the model under this dtype.
    output_loading_info(`bool`, *optional*, defaults to `False`):
        Whether or not to also return a dictionary containing missing keys, unexpected keys and error messages.
    device_map (`str` or `dict[str, int | str | torch.device]`, *optional*):
        A map that specifies where each submodule should go. It doesn't need to be refined to each
        parameter/buffer name, once a given module name is inside, every submodule of it will be sent to the
        same device.

        To have Accelerate compute the most optimized `device_map` automatically, set `device_map="auto"`. For
        more information about each option see [designing a device
        map](https://hf.co/docs/accelerate/main/en/usage_guides/big_modeling#designing-a-device-map).
    max_memory (`Dict`, *optional*):
        A dictionary mapping device identifiers to their maximum memory. Default to the maximum memory
        available for each GPU and the available CPU RAM if unset.
    low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`):
        Speed up model loading by not initializing the weights and only loading the pre-trained weights. This
        also tries to not use more than 1x model size in CPU memory (including peak memory) while loading the
        model. This is only supported when torch version >= 1.9.0. If you are using an older version of torch,
        setting this argument to `True` will raise an error.
    variant (`str`, *optional*):
        If specified, load weights from a `variant` file (*e.g.* pytorch_model.<variant>.bin). `variant` will
        be ignored when using `from_flax`.
    use_safetensors (`bool`, *optional*, defaults to `None`):
        If `None`, the `safetensors` weights will be downloaded if available **and** if`safetensors` library is
        installed. If `True`, the model will be forcibly loaded from`safetensors` weights. If `False`,
        `safetensors` is not used.
r   r   r7   z adapters loaded from .zNo T2IAdapters found under z. Expected at least _0)ospathisdirr   from_pretrainedappendloggerinfor   r   dirname)clsr;   kwargsr   r   model_path_to_loadr,   s          r   rB   MultiAdapter.from_pretrained   s   Z 
 3ggmm.// 001CNvNGOOG$1HC!61SE!B ggmm.// 	s8}o%;<Q;RRSTUx=A-bggoo>S.T-UUij  CG  kG  jH  HI  J  8}r!   )r   r   r   r   N)TNTN)__name__
__module____qualname____firstlineno____doc__listr   r&   Tensorfloatr/   strr?   PathLikeboolr   r8   classmethodrB   __static_attributes____classcell__r   s   @r   r   r      s    
!?l!3 !?F%,, et9K W[\a\h\hWi @ !%"&#'"(@bkk)(@ (@  	(@
 !(@ t(@T AC"++4E4L A Ar!   r   c                      ^  \ rS rSrSr\S/ SQSSS4S\S	\\   S
\S\S\4
U 4S jjj5       r	S\
R                  S\\
R                     4S jr\S 5       r\S 5       rSrU =r$ )r      a  
A simple ResNet-like model that accepts images containing control signals such as keyposes and depth. The model
generates multiple feature maps that are used as additional conditioning in [`UNet2DConditionModel`]. The model's
architecture follows the original implementation of
[Adapter](https://github.com/TencentARC/T2I-Adapter/blob/686de4681515662c0ac2ffa07bf5dda83af1038a/ldm/modules/encoders/adapter.py#L97)
 and
 [AdapterLight](https://github.com/TencentARC/T2I-Adapter/blob/686de4681515662c0ac2ffa07bf5dda83af1038a/ldm/modules/encoders/adapter.py#L235).

This model inherits from [`ModelMixin`]. Check the superclass documentation for the common methods, such as
downloading or saving.

Args:
    in_channels (`int`, *optional*, defaults to `3`):
        The number of channels in the adapter's input (*control image*). Set it to 1 if you're using a gray scale
        image.
    channels (`list[int]`, *optional*, defaults to `(320, 640, 1280, 1280)`):
        The number of channels in each downsample block's output hidden state. The `len(block_out_channels)`
        determines the number of downsample blocks in the adapter.
    num_res_blocks (`int`, *optional*, defaults to `2`):
        Number of ResNet blocks in each downsample block.
    downscale_factor (`int`, *optional*, defaults to `8`):
        A factor that determines the total downscale factor of the Adapter.
    adapter_type (`str`, *optional*, defaults to `full_adapter`):
        Adapter type (`full_adapter` or `full_adapter_xl` or `light_adapter`) to use.
   @       ra   r      full_adapterin_channelschannelsnum_res_blocksr   adapter_typec                    > [         TU ]  5         US:X  a  [        XX45      U l        g US:X  a  [	        XX45      U l        g US:X  a  [        XX45      U l        g [        SU S35      e)Nrc   full_adapter_xllight_adapterzUnsupported adapter_type: 'zH'. Choose either 'full_adapter' or 'full_adapter_xl' or 'light_adapter'.)r   r   FullAdapterr,   FullAdapterXLLightAdapterr   )r   rd   re   rf   r   rg   r   s         r   r   T2IAdapter.__init__   sq     	>)&{n_DL..(aDL_,'~`DL-l^ <8 8 r!   r*   r$   c                 $    U R                  U5      $ )aj  
This function processes the input tensor `x` through the adapter model and returns a list of feature tensors,
each representing information extracted at a different scale from the input. The length of the list is
determined by the number of downsample blocks in the Adapter, as specified by the `channels` and
`num_res_blocks` parameters during initialization.
r,   r   r*   s     r   r/   T2IAdapter.forward
  s     ||Ar!   c                 .    U R                   R                  $ rK   )r,   r   r   s    r   r   !T2IAdapter.total_downscale_factor  s    ||222r!   c                 B    U R                   R                  R                  $ )zThe downscale factor applied in the T2I-Adapter's initial pixel unshuffle operation. If an input image's dimensions are
not evenly divisible by the downscale_factor then an exception will be raised.
)r,   	unshuffler   rt   s    r   r   T2IAdapter.downscale_factor  s    
 ||%%666r!   rp   )rL   rM   rN   rO   rP   r   intrQ   rT   r   r&   rR   r/   propertyr   r   rX   rY   rZ   s   @r   r   r      s    4  4 !* s) 	
   , $u||*<  3 3 7 7r!   r   c            	          ^  \ rS rSrSrS/ SQSS4S\S\\   S	\S
\4U 4S jjjrS\R                  S\\R                     4S jr
SrU =r$ )rk   i"  *
See [`T2IAdapter`] for more information.
r]   r^   r   rb   rd   re   rf   r   c                   > [         TU ]  5         XS-  -  n[        R                  " U5      U l        [        R
                  " XS   SSS9U l        [        R                  " [        US   US   U5      /[        S[        U5      5       Vs/ s H  n[        X%S-
     X%   USS9PM     snQ5      U l        US[        U5      S-
  -  -  U l        g s  snf Nr   r   r]   r   kernel_sizepaddingTdown)r   r   r   PixelUnshufflerw   Conv2dconv_inr   AdapterBlockr   r   bodyr   r   rd   re   rf   r   r.   r   s         r   r   FullAdapter.__init__'  s     	!a$77**+;<yyqkqRSTMMXa[(1+~F #1c(m44 !a%(+~TXY4
	 '7s8}q?P9Q&Q#s   
C
r*   r$   c                     U R                  U5      nU R                  U5      n/ nU R                   H  nU" U5      nUR                  U5        M     U$ )a  
This method processes the input tensor `x` through the FullAdapter model and performs operations including
pixel unshuffling, convolution, and a stack of AdapterBlocks. It returns a list of feature tensors, each
capturing information at a different stage of processing within the FullAdapter model. The number of feature
tensors in the list is determined by the number of downsample blocks specified during initialization.
rw   r   r   rC   r   r*   r-   blocks       r   r/   FullAdapter.forwardA  sN     NN1LLOYYEaAOOA  r!   r   r   r   rw   rL   rM   rN   rO   rP   ry   rQ   r   r&   rR   r/   rX   rY   rZ   s   @r   rk   rk   "  su     4 !RR s)R 	R
 R R4 $u||*<  r!   rk   c            	          ^  \ rS rSrSrS/ SQSS4S\S\\   S	\S
\4U 4S jjjrS\R                  S\\R                     4S jr
SrU =r$ )rl   iT  r|   r]   r^   r      rd   re   rf   r   c           
      j  > [         TU ]  5         XS-  -  n[        R                  " U5      U l        [        R
                  " XS   SSS9U l        / U l        [        [        U5      5       H  nUS:X  a/  U R                  R                  [        X%S-
     X%   U5      5        M8  US:X  a.  U R                  R                  [        X%S-
     X%   USS95        Ml  U R                  R                  [        X%   X%   U5      5        M     [        R                  " U R                  5      U l        US-  U l        g r~   )r   r   r   r   rw   r   r   r   r   r   rC   r   r   r   r   s         r   r   FullAdapterXL.__init__Y  s     	!a$77**+;<yyqkqRST	s8}%AAv		  h1uox{N![\a		  h1uox{Nae!fg		  hk8;!WX & MM$)),	&6&:#r!   r*   r$   c                     U R                  U5      nU R                  U5      n/ nU R                   H  nU" U5      nUR                  U5        M     U$ )z
This method takes the tensor x as input and processes it through FullAdapterXL model. It consists of operations
including unshuffling pixels, applying convolution layer and appending each block into list of feature tensors.
r   r   s       r   r/   FullAdapterXL.forwardu  sN    
 NN1LLOYYEaAOOA  r!   r   r   rZ   s   @r   rl   rl   T  so     4 ";; s); 	;
 ; ;8 $u||*<  r!   rl   c            	       ~   ^  \ rS rSrSrSS\S\S\S\4U 4S jjjrS\R                  S	\R                  4S
 jr
SrU =r$ )r   i  a  
An AdapterBlock is a helper model that contains multiple ResNet-like blocks. It is used in the `FullAdapter` and
`FullAdapterXL` models.

Args:
    in_channels (`int`):
        Number of channels of AdapterBlock's input.
    out_channels (`int`):
        Number of channels of AdapterBlock's output.
    num_res_blocks (`int`):
        Number of ResNet blocks in the AdapterBlock.
    down (`bool`, *optional*, defaults to `False`):
        If `True`, perform downsampling on AdapterBlock's input.
rd   out_channelsrf   r   c                 @  > [         TU ]  5         S U l        U(       a  [        R                  " SSSS9U l        S U l        X:w  a  [        R                  " XSS9U l        [        R                  " [        U5       Vs/ s H  n[        U5      PM     sn6 U l
        g s  snf )Nr   Tr   stride	ceil_moder   r   )r   r   
downsampler   	AvgPool2din_convr   
Sequentialr   AdapterResnetBlockresnets)r   rd   r   rf   r   r7   r   s         r   r   AdapterBlock.__init__  s}     llqdSDO&99[ANDL}}8=n8MN8M1 .8MN
Ns   <Br*   r$   c                     U R                   b  U R                  U5      nU R                  b  U R                  U5      nU R                  U5      nU$ )z
This method takes tensor x as input and performs operations downsampling and convolutional layers if the
self.downsample and self.in_conv properties of AdapterBlock model are specified. Then it applies a series of
residual blocks to the input tensor.
r   r   r   rq   s     r   r/   AdapterBlock.forward  sE     ??&"A<<#QALLOr!   r   FrL   rM   rN   rO   rP   ry   rV   r   r&   rR   r/   rX   rY   rZ   s   @r   r   r     sO    
C 
s 
C 
W[ 
 
 %,,  r!   r   c                   n   ^  \ rS rSrSrS\4U 4S jjrS\R                  S\R                  4S jr	Sr
U =r$ )	r   i  z
An `AdapterResnetBlock` is a helper model that implements a ResNet-like block.

Args:
    channels (`int`):
        Number of channels of AdapterResnetBlock's input and output.
re   c                    > [         TU ]  5         [        R                  " XSSS9U l        [        R
                  " 5       U l        [        R                  " XSS9U l        g )Nr]   r   r   r   r   r   r   r   block1ReLUactblock2r   re   r   s     r   r   AdapterResnetBlock.__init__  sA    ii1M779iiBr!   r*   r$   c                 l    U R                  U R                  U5      5      nU R                  U5      nX!-   $ )z
This method takes input tensor x and applies a convolutional layer, ReLU activation, and another convolutional
layer on the input tensor. It returns addition with the input tensor.
r   r   r   r   r*   hs      r   r/   AdapterResnetBlock.forward  .     HHT[[^$KKNur!   r   rL   rM   rN   rO   rP   ry   r   r&   rR   r/   rX   rY   rZ   s   @r   r   r     s6    C C	 	%,, 	 	r!   r   c            	          ^  \ rS rSrSrS/ SQSS4S\S\\   S	\S
\4U 4S jjjrS\R                  S\\R                     4S jr
SrU =r$ )rm   i  r|   r]   )r_   r`   ra      rb   rd   re   rf   r   c                   > [         TU ]  5         XS-  -  n[        R                  " U5      U l        [        R
                  " [        XS   U5      /[        [        U5      S-
  5       Vs/ s H  n[        X%   X%S-      USS9PM     snQ[        US   US   USS9P5      U l	        US[        U5      -  -  U l
        g s  snf )Nr   r   r   Tr   )r   r   r   r   rw   r   LightAdapterBlockr   r   r   r   r   s         r   r   LightAdapter.__init__  s     	!a$77**+;<MM!+{NK #3x=1#455 &hk8E?NY]^5 "(2,nSWX	
	 '7!s8}:L&M#s   *B=
r*   r$   c                     U R                  U5      n/ nU R                   H  nU" U5      nUR                  U5        M     U$ )z
This method takes the input tensor x and performs downscaling and appends it in list of feature tensors. Each
feature tensor corresponds to a different level of processing within the LightAdapter.
)rw   r   rC   r   s       r   r/   LightAdapter.forward  sA    
 NN1YYEaAOOA  r!   )r   r   rw   r   rZ   s   @r   rm   rm     su     . !NN s)N 	N
 N N4 $u||*<  r!   rm   c            	       ~   ^  \ rS rSrSrSS\S\S\S\4U 4S jjjrS\R                  S	\R                  4S
 jr
SrU =r$ )r   i  a  
A `LightAdapterBlock` is a helper model that contains multiple `LightAdapterResnetBlocks`. It is used in the
`LightAdapter` model.

Args:
    in_channels (`int`):
        Number of channels of LightAdapterBlock's input.
    out_channels (`int`):
        Number of channels of LightAdapterBlock's output.
    num_res_blocks (`int`):
        Number of LightAdapterResnetBlocks in the LightAdapterBlock.
    down (`bool`, *optional*, defaults to `False`):
        If `True`, perform downsampling on LightAdapterBlock's input.
rd   r   rf   r   c                 f  > [         TU ]  5         US-  nS U l        U(       a  [        R                  " SSSS9U l        [        R
                  " XSS9U l        [        R                  " [        U5       Vs/ s H  n[        U5      PM     sn6 U l
        [        R
                  " XRSS9U l        g s  snf )Nr   r   Tr   r   r   )r   r   r   r   r   r   r   r   r   LightAdapterResnetBlockr   out_conv)r   rd   r   rf   r   mid_channelsr7   r   s          r   r   LightAdapterBlock.__init__  s    #q( llqdSDOyyJ}}V[\jVk&lVkQR'>|'LVk&lm		,!L 'ms   5B.r*   r$   c                     U R                   b  U R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU$ )z
This method takes tensor x as input and performs downsampling if required. Then it applies in convolution
layer, a sequence of residual blocks, and out convolutional layer.
)r   r   r   r   rq   s     r   r/   LightAdapterBlock.forward  sI    
 ??&"ALLOLLOMM!r!   )r   r   r   r   r   r   rZ   s   @r   r   r     sU    
MC 
Ms 
MC 
MW[ 
M 
M %,,  r!   r   c                   n   ^  \ rS rSrSrS\4U 4S jjrS\R                  S\R                  4S jr	Sr
U =r$ )	r   i.  z
A `LightAdapterResnetBlock` is a helper model that implements a ResNet-like block with a slightly different
architecture than `AdapterResnetBlock`.

Args:
    channels (`int`):
        Number of channels of LightAdapterResnetBlock's input and output.
re   c                    > [         TU ]  5         [        R                  " XSSS9U l        [        R
                  " 5       U l        [        R                  " XSSS9U l        g )Nr]   r   r   r   r   s     r   r    LightAdapterResnetBlock.__init__8  sC    ii1M779ii1Mr!   r*   r$   c                 l    U R                  U R                  U5      5      nU R                  U5      nX!-   $ )z
This function takes input tensor x and processes it through one convolutional layer, ReLU activation, and
another convolutional layer and adds it to input tensor.
r   r   s      r   r/   LightAdapterResnetBlock.forward>  r   r!   r   r   rZ   s   @r   r   r   .  s6    N N	 	%,, 	 	r!   r   )r?   typingr   r&   torch.nnr   configuration_utilsr   r   utilsr   modeling_utilsr	   
get_loggerrL   rD   r   r   Modulerk   rl   r   r   rm   r   r    r!   r   <module>r      s    
    A  & 
		H	%y: yxD7[ D7T/")) /d/BII /d-299 -` <,299 ,^(		 (Vbii r!   