
    3j                        S SK r S SKJrJr  S SKJrJr  S SKJr  SSK	J
r
  \ R                  R                  r\" \ R                  \ R                  \ R                  \ R                   /5      r\" / \R$                  P\R&                  P\R(                  P\R*                  P\R,                  P\R.                  P\R0                  P\R2                  P\R4                  P\R6                  P\R8                  P\R:                  P\R<                  P\R>                  P\R@                  P\RB                  P\RD                  P\RF                  P\RH                  P\RJ                  P\RL                  P\RN                  P\RP                  P\RR                  P\RT                  P\RV                  P\RX                  P\RZ                  P\R\                  P\R^                  P\R`                  P5      r1\" \Rd                  \Rf                  \Rh                  \Rj                  \Rl                  \Rn                  \Rp                  \Rr                  \Rt                  /	5      r;\1\;-  r<S\=4S jr>S	\ R~                  S\@4S
 jrAS\=4S jrBg)    N)get_device_tflopsget_gpu_dram_gbps)optimization_hintstatically_known_true)
OrderedSet   )flop_registryreturnc                     U [         ;   ae  [        U5      S:w  a  [        SU SU  35      eUR                  5       n[	        U5      S-  nSnXv-  n[         U    n	U	" U0 UDSU0D6S-  n
X-  S-  nU$ g	)
aO  
Estimates the compute time of an aten operator.

Args:
    func_packet: The operator overload packet.
    args: The arguments to the operator.
    kwargs: The keyword arguments to the operator.
    out: The output of the operator.
    out_dtypes: The output data types.

Returns:
    float: The estimated compute time in nanoseconds.
r   z"Only support single out dtype got z for g  4&kCg      ?out_val   g    eAg        )r	   lenAssertionErrorpopr   )func_packetargskwargsout
out_dtypesdtypepeak_gpu_flopsfactorpeak_empirical_flopsflop_count_func
flop_countcompute_times               Y/home/wildlama/miniconda3/lib/python3.13/site-packages/torch/utils/_runtime_estimation.pyget_compute_timer   M   s     m#z?a 4ZLk]S   *51D8%6'4$dBfBcBQF
"9S@    tc                     Sn[        U R                  U R                  5       5       H'  u  p#[        US:H  5      (       a  M  U[	        USS9-  nM)     XR                  5       -  $ )z
Calculates the memory consumption of a tensor.

Args:
    t (torch.Tensor): The input tensor.

Returns:
    int: The memory consumption of the tensor in bytes.
r   r   )fallback)zipshapestrider   r   element_size)r    
real_numelsizer%   s       r   get_num_bytesr)   o   sY     JAGGQXXZ0$Vq[11+D1==J 1
 (((r   c                 r    [        5       n[        S U  5       5      n[        S U 5       5      nX4-   nXR-  nU$ )a+  
Estimates the memory transfer time of input and output tensors.

Args:
    flat_args_kwargs (List[torch.Tensor]): The flat list of arguments and keyword arguments.
    flat_outs (List[torch.Tensor]): The flat list of outputs.

Returns:
    float: The estimated memory transfer time in nanoseconds.
c              3   z   #    U  H1  n[        U[        R                  5      (       d  M$  [        U5      v   M3     g 7fN
isinstancetorchTensorr)   .0r    s     r   	<genexpr>$get_transfer_time.<locals>.<genexpr>   s,      "2QjELL6Qa"2   #;;c              3   z   #    U  H1  n[        U[        R                  5      (       d  M$  [        U5      v   M3     g 7fr,   r-   r1   s     r   r3   r4      s+      "+Qz!U\\/Ja)r5   )r   sum)flat_args_kwargs	flat_outsgpu_memory_bandwidth
read_byteswrite_bytescounted_bytestransfer_times          r   get_transfer_timer?      sU     -. "2 J  "+ K ,M!8Mr   )Cr/   torch._inductor.utilsr   r   %torch.fx.experimental.symbolic_shapesr   r   torch.utils._ordered_setr   flop_counterr	   opsatenfloat16bfloat16float32float64_FLOAT_TYPES
lift_freshr    	transposeviewdetach_unsafe_viewsplitadjoint
as_strideddiagonalexpand	expand_asmovedimpermuteselectsqueezemTmHrealimagview_as	unflattenunfoldunbind	unsqueezevsplithsplitsplit_with_sizesswapaxesswapdimschunk	_VIEW_OPSrandintrandnrand
randn_like	rand_likerandint_likearange	ones_like
zeros_like_CREATE_OPS_IGNORE_OPSfloatr   r0   intr)   r?    r   r   <module>rx      s    F 0 ' yy~~	     	  				 
 	  	  	

  	  	  	  	  	  	  	  	   	! " 	# $ 	% & 			' ( 			) * 	+ , 	- . 	/ 0 	1 2 	3 4 	5 6 	7 8 	9 : 	; < 	= > 	

? "	H 

		
 +%E D)U\\ )c )&e r   