
    
9j                    X    S SK Jr  S SKrS SKJr  S SKJr  S SKJr  S\0r	SSSSS.S	 jr
g)
    )annotationsN)nccl)_store)NCCLBackendr   F)backendhostportuse_mpic                  U S::  a  [        SU  35      eSUs=::  a  U :  d  O  [        SU SU  35      eU[        ;  a  [        U S35      eUS:X  a   [        R                  (       d  [	        S5      eUc.  [
        R                  R                  S[        R                  5      nUc7  [        [
        R                  R                  S	[        R                  5      5      n[        U   " XX4U5      $ )
a	  Start `cupyx.distributed` and obtain a communicator.

This call initializes the distributed environment, it needs to be
called for every process that is involved in the communications.

A single device per returned communication is only allowed. It is the user
responsibility of setting the appropiated gpu to be used before creating
and using the communicator.

Currently the user needs to specify each process rank and the total
number of processes, and start all the processes in different hosts
manually.

The process with rank 0 will spawn a TCP server using a
subprocess that listens in the port indicated by
the env var `CUPYX_DISTRIBUTED_PORT`, the rank 0 must be executed
in the host determined by the env var `CUPYX_DISTRIBUTED_HOST`.
In case their values are not specified, `'127.0.0.1'` and `13333` will be
used by default.

Note that this feature is expected to be used within a trusted cluster
environment.

Example:

    >>> import cupy
    >>> def process_0():
    ...     import cupyx.distributed
    ...     cupy.cuda.Device(0).use()
    ...     comm = cupyx.distributed.init_process_group(2, 0)
    ...     array = cupy.ones(1)
    ...     comm.broadcast(array, 0)
    ...
    >>> def process_1():
    ...     import cupyx.distributed
    ...     cupy.cuda.Device(1).use()
    ...     comm = cupyx.distributed.init_process_group(2, 1)
    ...     array = cupy.zeros(1)
    ...     comm.broadcast(array, 0)
    ...     cupy.equal(array, cupy.ones(1))

Args:
    n_devices (int): Total number of devices that will be used in the
        distributed execution.
    rank (int): Unique id of the GPU that the communicator is associated to
        its value needs to be `0 <= rank < n_devices`.
    backend (str): Backend to use for the communications. Optional,
        defaults to `"nccl"`.
    host (str): host address for the process rendezvous on initialization
        defaults to `None`.
    port (int): port for the process rendezvous on initialization
        defaults to `None`.
    use_mpi (bool): if ``False``, it avoids using MPI for synchronization
        and uses the provided TCP server for exchanging CPU only
        information.
        defaults to `False`.
Returns:
    Backend: object used to perform communications, adheres to the
        :class:`~cupyx.distributed.Backend` specification:
r   zInvalid number of devices zInvalid number of rank  z is not supportedr   zNCCL is not availableCUPYX_DISTRIBUTED_HOSTCUPYX_DISTRIBUTED_PORT)
ValueError	_backendsr   	availableRuntimeErrorosenvirongetr   _DEFAULT_HOSTint_DEFAULT_PORT)	n_devicesrankr   r   r	   r
   s         Q/home/wildlama/miniconda3/lib/python3.13/site-packages/cupyx/distributed/_init.pyinit_process_groupr      s    ~ A~5i[ABB!	!24&)EFFiG9$5677&233|zz~~68L8LM|2::>>$f&:&:< = Wit7CC    )
__future__r   r   	cupy.cudar   cupyx.distributedr   cupyx.distributed._nccl_commr   r   r    r   r   <module>r#      s3    " 	  $ 4 [!	 %+DMDr   