
    +jB                         d dl Z d dlZd dlmZ d dlmZmZmZmZ d dl	m
Z
mZmZmZ d dlmZ erd dlmZ e G d d                      ZdS )	    N)	dataclass)TYPE_CHECKINGLiteralOptionalUnion)DeepSpeedSequenceParallelConfigDistributedTypeTorchContextParallelConfigTorchTensorParallelConfig)is_torch_version)Acceleratorc                      e Zd ZU dZdZee         ed<   dZee         ed<   dZ	ee         ed<   dZ
ee         ed<   dZed         ed<   dZee         ed	<   dZed
         ed<   dZedef         ed<   dZedef         ed<   dZedef         ed<   dZd Zd Zed             Zed             Zed             Zed             Zed             Zed             Zed             Z ed             Z!ed             Z"ed             Z#ed             Z$ed             Z%ed             Z&ed             Z'de(fd Z)d,dee(         fd!Z*d"e+e+ed#f         e+e(d#f         f         fd$Z,d% Z-d&e(d'efd(Z.d-d+Z/dS ).ParallelismConfiga  
    A dataclass to configure parallelisms applied to the model. Inspired by torchtitan's `ParallelDims`
    https://github.com/pytorch/torchtitan/blob/main/torchtitan/distributed/parallel_dims.py

    Args:
        dp_replicate_size (`int`, defaults to `1`):
            The size of the data parallel group. If `dp_replicate_size` is set to 1, the data parallel replication
            group will not be used.
        dp_shard_size (`int`, defaults to `1`):
            The size of the model shard group. If `dp_replicate_size > 1` and `tp_size > 1`, `dp_shard_size` must also
            be greater than 1, as composing DDP + TP is currently not supported.
        tp_size (`int`, defaults to `1`):
            The size of the tensor parallel group. If `tp_size` is set to `1`, the tensor parallel group will not be
            used.
        tp_handler (`~utils.TorchTensorParallelConfig`, defaults to `None`):
            The handler for the tensor parallel group.
        cp_size (`int`, defaults to `1`):
            The size of the context parallel group. Currently not supported, but reserved for future use and enabled
            for downstream libraries.
        cp_backend (`str`, defaults to `torch`):
            Which CP backend to use: `torch` (FSDP2)
        sp_size (`int`, defaults to `1`):
            The size of the sequence parallel group.
        sp_backend (`str`, defaults to `deepspeed`):
            Which SP backend to use:`deepspeed` (ALST/Ulysses)

    You may obtain different distributed data parallel paradigms by configuring `dp_replicate_size` and `dp_shard_size`
    together:
        - `dp_replicate_size == 1` and `dp_shard_size > 1`, we obtain Fully Sharded Data Parallel (FSDP).
        - `dp_replicate_size > 1` and `dp_shard_size > 1`, we obtain Hybrid Sharded Data Parallel (HSDP).
        - `dp_replicate_size > 1` and `dp_shard_size == 1` is an invalid configuration, to use pure DP, use
          `DistributedDataParallelKwargs` instead.

    Ndp_replicate_sizedp_shard_sizetp_sizecp_sizetorch
cp_backendsp_size	deepspeed
sp_backend
tp_handler
cp_handler
sp_handlerc                     d| j          d| j         d| j         d| j         d| j         d| j         d| j         d| j         d	| j         d
| j	         dS )Nz'ParallelismConfig(
 	dp_replicate_size=z,
	dp_shard_size=z,
	tp_size=z,
	cp_size=z,
	cp_backend=z,
	sp_size=z,
	sp_backend=z,
	total_size=z
	tp_handler=z,
	cp_handler=z)
)
r   r   r   r   r   r   r   
total_sizer   r   selfs    `/home/wildlama/comfy/ComfyUI/.venv/lib/python3.11/site-packages/accelerate/parallelism_config.py__repr__zParallelismConfig.__repr__U   s    
1#'#9
1 
1#1
1 
1 
1 
1 	
1 
1
 !O
1 
1 
1 
1 !O
1 
1 !O
1 
1 !O
1 
1 !O
1 
1 
1	
    c                     dd l dg                    fd| j                                        D                        d S )Nr   device_meshc                 x    i | ]6\  }}|v	|t          |d           r                    |j                  n|7S )__dict__)hasattrdeepcopyr&   ).0kv_non_serializable_fieldscopys      r    
<dictcomp>z-ParallelismConfig.to_json.<locals>.<dictcomp>j   sW       Aq444 :0F0FM4==,,,A444r"   )r-   r(   r&   items)r   r,   r-   s    @@r    to_jsonzParallelismConfig.to_jsond   sn    $1?      M//11  	
 	
 	
 	
 	
r"   c                 >    g }| j         r|dgz  }| j        r|dgz  }|S )zENames of enabled dimensions across which data parallelism is applied.dp_replicatedp_shard)dp_replicate_enableddp_shard_enabledr   dimss     r    dp_dim_nameszParallelismConfig.dp_dim_namesq   s=     $ 	%^$$D  	!ZL Dr"   c                 X    g }| j         r|dgz  }| j        r|dgz  }| j        r|dgz  }|S )z]Names of enabled dimensions which will receive the same batch (non-data parallel dimensions).tpcpsp)
tp_enabled
cp_enabled
sp_enabledr6   s     r    non_dp_dim_namesz"ParallelismConfig.non_dp_dim_names{   sM     ? 	TFND? 	TFND? 	TFNDr"   c                 >    g }| j         r|dgz  }| j        r|dgz  }|S )zlNames of enabled dimensions which will be flattened into a joint mesh across which is model sharded in FSDP.r3   r;   )r5   r>   r6   s     r    dp_shard_cp_dim_namesz'ParallelismConfig.dp_shard_cp_dim_names   s:       	!ZL D? 	TFNDr"   c                 X    g }| j         r|dgz  }| j        r|dgz  }| j        r|dgz  }|S )z@Names of enabled dimensions across which loss should be averagedr2   r3   r;   )r4   r5   r>   r6   s     r    dp_cp_dim_namesz!ParallelismConfig.dp_cp_dim_names   sR     $ 	%^$$D  	!ZL D? 	TFNDr"   c                 0    g }| j         r|dgz  }|dgz  }|S )z^Names of enabled dimensions across which FSDP is applied, including data parallel replication.r2   dp_shard_cp)r4   r6   s     r    fsdp_dim_namesz ParallelismConfig.fsdp_dim_names   s2     $ 	%^$$Dr"   c                 P    | j         | j        z  | j        z  | j        z  | j        z  S )zSThe total size of the parallelism configuration, which is the product of all sizes.)r   r   r   r   r   r   s    r    r   zParallelismConfig.total_size   s,     %(::T\IDLX[_[gggr"   c                 0    | j         | j        z  | j        z  S )zhThe size of the non-data parallel dimensions, which is the product of tensor and context parallel sizes.)r   r   r   r   s    r    non_data_parallel_sizez(ParallelismConfig.non_data_parallel_size   s     |dl*T\99r"   c                      | j         | j        z  S )z_The size of the data parallel dimensions, which is the product of data parallel replication and)r   r   r   s    r    data_parallel_sizez$ParallelismConfig.data_parallel_size   s     %(:::r"   c                     | j         dk    S )zKTrue if data parallel replication is enabled, i.e. `dp_replicate_size > 1`.   )r   r   s    r    r4   z&ParallelismConfig.dp_replicate_enabled   s     %))r"   c                     | j         dk    S )zDTrue if data parallel sharding is enabled, i.e. `dp_shard_size > 1`.rN   )r   r   s    r    r5   z"ParallelismConfig.dp_shard_enabled   s     !A%%r"   c                     | j         dk    S )z:True if tensor parallelism is enabled, i.e. `tp_size > 1`.rN   )r   r   s    r    r=   zParallelismConfig.tp_enabled        |ar"   c                     | j         dk    S )z;True if context parallelism is enabled, i.e. `cp_size > 1`.rN   )r   r   s    r    r>   zParallelismConfig.cp_enabled   rQ   r"   c                     | j         dk    S )z;True if context parallelism is enabled, i.e. `sp_size > 1`.rN   )r   r   s    r    r?   zParallelismConfig.sp_enabled   rQ   r"   c                      | j         | j        z   S )z$Names of all active mesh dimensions.)r8   r@   r   s    r    active_mesh_dimsz"ParallelismConfig.active_mesh_dims   s      4#888r"   device_typec                    | j         dk    r| j        dk    rdS t          dd          rddlm} nt          d          |                                 }t          |          dk    rdS |\  }} ||||	          }| j        r || j                 	                    d
           | j
        r || j
                 	                    d           | j        r || j                 	                    d           |S )a!  Builds a device mesh for the given device type based on the parallelism configuration.
        This method will also create required joint meshes (e.g. `dp_shard_cp`, `dp_cp`, `dp`).

        Args:
            device_type (`str`): The type of device for which to build the mesh, e
        r   rN   Nz>=z2.2.0r   )init_device_meshz4Building a device_mesh requires to have torch>=2.2.0)mesh_dim_namesdprF   dp_cp)r   r   r   torch.distributed.device_meshrX   RuntimeError	_get_meshlenr8   _flattenrB   rD   )r   rV   rX   meshrY   
mesh_shaper$   s          r    build_device_meshz#ParallelismConfig.build_device_mesh   s%    ?k))dlQ.>.>4D'** 	WFFFFFFFUVVV~~t99>>4%)"
&&)
 
 

  	:)*33D999% 	L23<<]KKK 	@,-66w???r"   c                     | j         ,||                     |          | _         nAt          d          |0| j         j        |k    r t          d| j         j         d| d          | j         S )Nz@You need to pass a device_type e.g cuda to build the device meshz4The device_mesh is already created with device type z@. However, you are trying to get a device mesh with device_type z<. Please check if you correctly initialized your device_mesh)r$   rc   
ValueErrorrV   )r   rV   s     r    get_device_meshz!ParallelismConfig.get_device_mesh   s    #&#'#9#9+#F#F   !cddd&#/;>>$ wtO_Ok  w  w  ny  w  w  w   r"   return.c                       fd j         D             }g dt          |                                fd          }t          t	          |           S )zQGenerate mesh shape and dimension names for torch.distributed.init_device_mesh().c                 ,    i | ]}|j         |         S  )_sizes)r)   parallelismr   s     r    r.   z/ParallelismConfig._get_mesh.<locals>.<dictcomp>  s"    ddd{[$+k":dddr"   )r2   r3   r;   r<   r:   c                 :                         | d                   S )Nr   )index)x
mesh_orders    r    <lambda>z-ParallelismConfig._get_mesh.<locals>.<lambda>  s    :++AaD11 r"   )key)rU   sortedr/   tuplezip)r   	mesh_dimssorted_itemsrp   s   `  @r    r^   zParallelismConfig._get_mesh  ss     eddddNcddd	 DCC
OO2222
 
 
 S,'(((r"   c           
         | j         2t          t          j                            dd                    | _         | j        2t          t          j                            dd                    | _        | j        2t          t          j                            dd                    | _        | j        2t          t          j                            dd                    | _        | j        %t          j                            dd          | _        | j	        2t          t          j                            dd                    | _	        | j
        %t          j                            d	d
          | _
        | j        dk    r| j        t                      | _        | j        dk    r| j        t                      | _        not          t                    }t!          | j        || j                           s:t#          d| j         d|| j                  dt%          | j                             | j	        dk    r| j        t)                      | _        | j         dk     rt#          d| j                    | j        dk     rt#          d| j                   | j        dk     rt#          d| j                   | j        dk     rt#          d| j                   dg}| j        |vrt#          d| d| j                   | j	        dk     rt#          d| j	                   d
g}| j
        |vrt#          d| d| j
                   | j        dk    r+| j	        dk    r t#          d| j         d| j	         d          | j        dk    s| j        dk    r%| j         dk    r| j        dk    rt#          d          | j         | j        | j        | j        | j	        d| _        d S )N$PARALLELISM_CONFIG_DP_REPLICATE_SIZE1 PARALLELISM_CONFIG_DP_SHARD_SIZEPARALLELISM_CONFIG_TP_SIZEPARALLELISM_CONFIG_CP_SIZEPARALLELISM_CONFIG_CP_BACKENDr   PARALLELISM_CONFIG_SP_SIZEPARALLELISM_CONFIG_SP_BACKENDr   rN   )r   zParallelismConfig's cp_backend=z
 requires z, but cp_handler was set to z.dp_replicate_size must be at least 1, but got z*dp_shard_size must be at least 1, but got z$tp_size must be at least 1, but got z$cp_size must be at least 1, but got zcp_backend must be one of z
, but got z$sp_size must be at least 1, but got zsp_backend must be one of z[Context Parallelism (CP) and Sequence Parallelism (SP) are mutually exclusive. Got cp_size=z and sp_size=z+. Please set either cp_size=1 or sp_size=1.aC  Tensor/Context parallelism (tp/cp_size > 1) cannot be used with pure data parallelism (dp_replicate_size > 1 and dp_shard_size == 1). Please set dp_shard_size > 1 and dp_replicate_size == 1 to compose FSDP + TP/CP for 2D parallel, or set dp_replicate_size == 1 and dp_shard_size > 1 to compose HSDP + TP/CP for 3D parallel.)r2   r3   r:   r;   r<   )r   intosenvirongetr   r   r   r   r   r   r   r   r   r
   dict
isinstancere   typer   r   rk   )r   cp_backends_config_mapvalid_cp_backendsvalid_sp_backendss       r    __post_init__zParallelismConfig.__post_init__  s(   !)%(8^`c)d)d%e%eD"%!$RZ^^4VX[%\%\!]!]D<rz~~.JCPPQQDL<rz~~.JCPPQQDL?" jnn-LgVVDO<rz~~.JCPPQQDL?" jnn-LkZZDO<!&";"="=<!&"<">">)-4* * *& "$/3I$/3Z[[ $ r$/  r  rUklpl{U|  r  r  [_  `d  `o  [p  [p  r  r   <!&"A"C"C!A%%fdNdffggg!!^$J\^^___<!RDLRRSSS<!RDLRRSSS$I?"333h:KhhW[Wfhhiii<!RDLRRSSS(M?"333h:KhhW[Wfhhiii <!q 0 0<#|< <:>,< < <   L1q 0 0d6Lq6P6PUYUgklUlUlo   !2*,,,
 
r"   rl   sizec                     || j                                         v s$J d| j                                                      || j         |<   t          | | d|           d S )NzParallelism must be one of _size)rk   keyssetattr)r   rl   r   s      r    	_set_sizezParallelismConfig._set_size^  so    dk..000002dPTP[P`P`PbPb2d2d000#'K +++T22222r"   acceleratorr   c                    t                      }|j        s| j        dk    rd S | j        dk    r|                     d|j                   | j        dk    r| j        dk    rn0| j        |j        k    r t          d| j         d|j         d          | j        dk    r;|j        s4|j        s-|j	        t          j        k    st          d|j	         d          | j                                        D ];\  }}|dk    r0t          | | d	d           |                    d
| d| d           <|r9|j        r4t#          j        dd                    |          z   t(                     d S d S d S )NrN   r2   r   zParallelismConfig total_size (z ) does not match num_processes (zJ). Please adjust dp_replicate_size/ dp_shard_size/tp_size/cp_size/sp_size.zParallelismConfig is only compatible DistributedType.FSDP (version 2) or DistributedType.Multi{Device} or DistributedType.DEEPSPEED, but got ._handlerzParallelismConfig.z_handler is set, but z0_size is set to 1. This handler will be ignored.z.ParallelismConfig has the following warnings:

)setmulti_devicer   r   num_processesr   r   re   is_fsdp2distributed_typer	   	DEEPSPEEDrk   r/   getattraddis_main_processwarningswarnjoinUserWarning)r   r   	_warningsrl   r   s        r    _validate_acceleratorz'ParallelismConfig._validate_acceleratorc  s   EE	' 	DOq,@,@F ?aNN>;+DEEE ?k))dlQ.>.>_ 999: : :"-";: : :   ?Q  '  +/HHH B  cn  c  B  B  B   "&!2!2!4!4 	 	KqyyWTk+C+C+CTJJV I  I  I;  I  I  I    	4 	MADIIiDXDXX    	 	 	 	r"   )N)r   r   )0__name__
__module____qualname____doc__r   r   r   __annotations__r   r   r   r   r   r   r   r   r   r   r   r
   r   r   r$   r!   r0   propertyr8   r@   rB   rD   rG   r   rJ   rL   r4   r5   r=   r>   r?   rU   strrc   rf   rt   r^   r   r   r   rj   r"   r    r   r   !   sc        ! !F (,x}+++#'M8C='''!GXc]!!!!GXc]!!!#'J '''!GXc]!!!'+J$+++ :>Jd556===:>Jd667>>>?CJd;;<CCCK
 
 

 
 
   X 	 	 X	   X 	 	 X	   X h h Xh : : X: ; ; X; * * X* & & X&     X      X      X  9 9 X9!S ! ! ! !F   8C=        )5sCx%S/!AB ) ) ) )J
 J
 J
X3S 3 3 3 3 3
+ + + + + +r"   r   )r   r   dataclassesr   typingr   r   r   r   accelerate.utils.dataclassesr   r	   r
   r   accelerate.utils.versionsr   
accelerater   r   rj   r"   r    <module>r      s   
			  ! ! ! ! ! ! : : : : : : : : : : : :            7 6 6 6 6 6  '&&&&&& l l l l l l l l l lr"   