
    
9j                        S SK Jr  S SKrS SKrS SKrS SKr	S SK
Jr   " S S5      rS0 S4SS\R                  SS	.S
 jjrS rg)    )annotationsN)runtimec                  p    \ rS rSrSrS rSS jr\SS j5       r\SS j5       r	\
S 5       rSS jrS	 rS
rg)_PerfCaseResult   a  An obscure object encompassing timing results recorded by
:func:`~cupyx.profiler.benchmark`. Simple statistics can be obtained by
converting an instance of this class to a string.

.. warning::
    This API is currently experimental and subject to change in future
    releases.

c                    UR                   S:X  d   eUR                  S   [        U5      S-   :X  d   eUR                  S   S:  d   eXl        X l        X0l        g )N   r      )ndimshapelenname_ts_devices)selfr   tsdevicess       N/home/wildlama/miniconda3/lib/python3.13/site-packages/cupyx/profiler/_time.py__init___PerfCaseResult.__init__   sT    ww!||xx{c'lQ....xx{Q	    c                     U R                  SS9$ )ziReturns a string representation of the object.

Returns:
    str: A string representation of the object.
Tshow_gputo_strr   s    r   __repr___PerfCaseResult.__repr__   s     {{D{))r   c                     U R                   S   $ )zA :class:`numpy.ndarray` of shape ``(n_repeat,)``, holding times spent
on CPU in seconds.

These values are delta of the host-side performance counter
(:func:`time.perf_counter`) between each repeat step.
r   r   r   s    r   	cpu_times_PerfCaseResult.cpu_times'   s     xx{r   c                     U R                   SS $ )zA :class:`numpy.ndarray` of shape ``(len(devices), n_repeat)``,
holding times spent on GPU in seconds.

These values are measured using ``cudaEventElapsedTime`` with events
recorded before/after each repeat step.
r
   Nr!   r   s    r   	gpu_times_PerfCaseResult.gpu_times1   s     xx|r   c                6   UR                   S:X  d   eUR                  S:  d   eUS-  nSR                  XR                  5       5      nUR                  S:  a@  USR                  UR	                  5       UR                  5       UR                  5       5      -  nU$ )Nr
   r   g    .Az    {}: {:9.03f} usz2   +/- {:6.03f} (min: {:9.03f} / max: {:9.03f}) us)r   sizeformatmeanstdminmax)device_namett_usss       r   _to_str_per_item _PerfCaseResult._to_str_per_item;   s    vv{{vvzz3w!((iikB66A:ELL
DHHJ
4 4Ar   c           	     b   U R                  SU R                  S   5      /nU(       a^  [        U R                  5       HE  u  p4UR	                  U R                  SR                  U5      U R                  SU-      5      5        MG     SR                  U R                  SR                  U5      5      $ )NCPUr   zGPU-{}r
   z
{:<20s}:{} )r2   r   	enumerater   appendr)   r   join)r   r   resultsids        r   r   _PerfCaseResult.to_strG   s    ((<=!$--0))(//!*<*.((1q5/;< 1 ""499chhw.?@@r   c                     U R                  SS9$ )NTr   r   r   s    r   __str___PerfCaseResult.__str__P   s    {{D{))r   )r   r   r   N)returnstr)rA   z_numpy.ndarray)F)__name__
__module____qualname____firstlineno____doc__r   r   propertyr"   r%   staticmethodr2   r   r?   __static_attributes__ r   r   r   r      sX     *     	 	A*r   r   rK   i'  
   )r   n_warmupmax_durationr   c          
        Uc  U R                   nUc  [        R                  R                  5       4n[	        U 5      (       d  [        S5      e[        U[        5      (       d  [        S5      e[        U[        5      (       d  [        S5      e[        U[        5      (       d  [        S5      e[        U[        5      (       d  [        S5      e[        U[        5      (       d  [        S5      e[        R                  " U5      (       d  [        S5      e[        U[        5      (       d  [        S5      e[        XX#XEXg5      $ )	a  Timing utility for measuring time spent by both CPU and GPU.

This function is a very convenient helper for setting up a timing test. The
GPU time is properly recorded by synchronizing internal streams. As a
result, to time a multi-GPU function all participating devices must be
passed as the ``devices`` argument so that this helper knows which devices
to record. A simple example is given as follows:

.. code-block:: py

    import cupy as cp
    from cupyx.profiler import benchmark

    def f(a, b):
        return 3 * cp.sin(-a) * b

    a = 0.5 - cp.random.random((100,))
    b = cp.random.random((100,))
    print(benchmark(f, (a, b), n_repeat=1000))

.. note::
    For IPython/Jupyter users, the ``%gpu_timeit`` magic provides
    a convenient way to use this function. See the documentation
    for :doc:`/user_guide/performance` for details.


Args:
    func (callable): a callable object to be timed.
    args (tuple): positional arguments to be passed to the callable.
    kwargs (dict): keyword arguments to be passed to the callable.
    n_repeat (int): number of times the callable is called. Increasing
        this value would improve the collected statistics at the cost
        of longer test time.
    name (str): the function name to be reported. If not given, the
        callable's ``__name__`` attribute is used.
    n_warmup (int): number of times the callable is called. The warm-up
        runs are not timed.
    max_duration (float): the maximum time (in seconds) that the entire
        test can use. If the taken time is longer than this limit, the test
        is stopped and the statistics collected up to the breakpoint is
        reported.
    devices (tuple): a tuple of device IDs (int) that will be timed during
        the timing test. If not given, the current device is used.

Returns:
    :class:`~cupyx.profiler._time._PerfCaseResult`:
        an object collecting all test results.

z#`func` should be a callable object.z`args` should be of tuple type.z `kwargs` should be of dict type.z `n_repeat` should be an integer.z`name` should be a string.z `n_warmup` should be an integer.z)`max_duration` should be given in secondsz!`devices` should be of tuple type)rC   _cupycudaget_device_idcallable
ValueError
isinstancetupledictintrB   _numpyisreal_repeat)funcargskwargsn_repeatr   rM   rN   r   s           r   	benchmarkr`   T   s   j |}}::++-/D>>>??dE"":;;fd##;<<h$$;<<dC  566h$$;<<==&&DEEgu%%<==FdlM Mr   c                T   / n/ n	U H  n
[         R                  " 5       n [         R                  " U
5        UR                  [        R
                  R                  R                  5       5        U	R                  [        R
                  R                  R                  5       5        [         R                  " U5        M     [        U5       H  n
U " U0 UD6  M     [        X5       Hg  u  p[         R                  " 5       n [         R                  " U5        UR                  5         [         R                  " U5        UR                  5         Mi     / nU V
s/ s H  n
/ PM     nn
Sn[        U5       GH  n
[        X5       HW  u  p[         R                  " 5       n [         R                  " U5        UR                  5         [         R                  " U5        MY     [        R                  " 5       nU " U0 UD6  [        R                  " 5       nUU-
  nUR                  U5        [        X5       HW  u  p[         R                  " 5       n [         R                  " U5        UR                  5         [         R                  " U5        MY     [        X5       HW  u  p[         R                  " 5       n [         R                  " U5        UR                  5         [         R                  " U5        MY     [        [        X5      5       H?  u  n
u  nn[        R
                  R                  UU5      S-  nX   R                  U5        MA     U[        R                  " 5       U-
  -  nUU:  d  GM    O   [         R"                  " U/U-   [         R$                  S9n['        UUUS9$ ! [         R                  " U5        f = f! [         R                  " U5        f = fs  sn
f ! [         R                  " U5        f = f! [         R                  " U5        f = f! [         R                  " U5        f = f)Nr   gMbP?)dtype)r   )r   	getDevice	setDevicer8   rP   rQ   streamEventrangeziprecordsynchronize_timeperf_counterr7   get_elapsed_timerY   asarrayfloat64r   )r\   r]   r^   r_   r   rM   rN   r   events_1events_2r;   prev_deviceeventdevicer"   r%   durationt1t2cpu_timeev1ev2gpu_timer   s                           r   r[   r[      sB    HH'')	+a OOEJJ--3356OOEJJ--3356k*  8_df  X/'')	+f%LLNk* 0 I%&XXI&H8_ 3ME!++-K/!!&)!!+. 4 !df!7" 3ME!++-K/!!&)!!+. 4 !3ME!++-K/!!&)!!#!!+. 4 's8'>?MAzSzz223<tCHL) @ 	E&&(2--l"K N 
i/v~~	FB4W55s k* k* ' !!+.  !!+. !!+.sB   BN>&N9O&O&O4-&PN69OO14PP')
__future__r   math_mathtimerk   numpyrY   cupyrP   cupy_backends.cuda.apir   r   infr`   r[   rK   r   r   <module>r      sO    "     *E* E*R b5MMBUYYMM`F6r   