
    
3j<                   2   % S SK Jr  S SKrS SKrS SKrS SKrS SKJr  S SKJ	r	  S SK
JrJrJr  S SKrS SKJr  S SKJs  Jr  \R                   R-                  5       (       a	  S SKJs  Jr  SSKJrJrJrJrJrJ r J!r!J"r"J#r#J$r$J%r%J&r&J'r'J(r(J)r)J*r*  SSK+J,r,J-r-  SS	K.J/r/J0r0  S
SK1J2r2  \(       a  S
SK1J3r3  Sr4Sr5Sr6Sr7Sr8Sr9\" \:5      r;\" 5       =(       a	    \ " S\45      r<\" 5       r=\" 5       =(       a	    \" S\55      r>\#" 5       =(       a	    \$" S\65      r?\&" S\75      r@\%" 5       rA\'" 5       =(       a	    \(" S\85      rB\)" 5       =(       a	    \*" S\95      rC\<(       a   S SKDJErEJFrF  S SKGJHrHJIrI  OSrESrFSrHSrI\=(       a   S SKOJErP  S SKOJFrQ  OSrPSrQ\>(       a   S SKRJErS  OSrS\?(       a   S SKTJUrUJVrVJWrWJXrXJYrYJZrZ  OSrUSrXSrYSrVSrWSrZ\@(       a   S SK[Js  J\s  J]r]  OSr]\A(       a   S S!K^J_r_  OSr_\B(       a   S S#K`Jarb  OSrb\C(       a   S SKcJdre  OSre\R                  S&:  a-  \R                  R                  ri\R                  R                  rkOSSSS'.S( jjrlSSS
S).S* jjrm\lri\mrk " S+ S,\n\	5      ro " S- S.5      rp\ " S/ S05      5       rq\oR                  \q" S1S2S3S4S
S59\oR                  \q" S1S6S
S79\oR                  \q" S8S2S9S:S
S59\oR                  \q" S8S6S
S79\oR                  \q" S;S<S
S79\oR                  \q" S=S2S S790rxS>\yS?'   \R                  \oR                  4SS@ jj5       r|      SSSSB.                       SSC jjjr}SSD jr~SSE jrSSF jrSSG jrSSH jrSSI jr S         SSJ jjrSSK jr\/" SLSM9 S       SSN jj5       r S       SSO jjr  S           SSP jjrSSQ jrSR rSSS jrSST jr\i" SUSVSWSX9            S                               SSY jj5       r\k" SU5                  S                               SSZ jj5       r        S                       SS\ jjr    SS] jr        S                       SS^ jjr    SS_ jr        S                       SS` jjr    SSa jr        SSbSc.                         SSd jjjr    SSe jr        SSbSc.                         SSf jjjr    SSg jr        SSbSAS
SSS Sh.                                   SSi jjjr    SSj jr        S                       SSk jjr        S                       SSl jjr    SSm jrSSSn jjr        S                       SSo jjr    SSp jrSq rSSr jrSSSs jjr " St Su\GRH                  GRJ                  5      rSSv jrSSw jrSSx jrSSy jrSSz jr\0      SS{ j5       r\0SS| j5       r " S} S~\GRH                  GRJ                  5      r " S S\GRH                  GRJ                  5      r " S S\GRH                  GRJ                  5      r " S S\GRH                  GRJ                  5      r   S                       SS jjr      SSS.                   SS jjjr\pGRi                  \oGRj                  \\\/S[S9       S                     SS jj5       r\pGRi                  \oR                  \\\/S[S9       S                     SS jj5       r\pGRi                  \oR                  \\\/SS9       S                     SS jj5       r\pGRi                  \oGRr                  \\\/S9       S                     SS jj5       r\pGRi                  \oGRv                  \\\/S9     S                 SS jj5       r\pGRi                  \oR                  \\\/S[S9        S                       SS jj5       r\pGRi                  \oR                  \\\/SS9     S                 SS jj5       r\pGRi                  \oR                  \\\/SS9     S                 SS jj5       r\pGRi                  \oGR                  \\\/S9     S                 SS jj5       r\pGRi                  \oGR                  \\\/S9      S                   SS jj5       r\pGRi                  \oGR                  \~\\/S9      S                   SS jj5       r S       SS jjr\pGRi                  \oR                  \\/S[S9       S                     SS jj5       r\pGRi                  \oGR                  \\\/S[S9       S                     SS jj5       r\pGRi                  \oGR                  \\/S9       S                     SS jj5       r\pGRi                  \oGR                  \\\/S[S9       S                     SS jj5       r\pGRi                  \oGR                  \\/S9       S                     SS jj5       r\pGRi                  \oGR                  \\\/S[S9     S                 SS jj5       r\pGRi                  \oGR                  \\/S9    S               SS jj5       r\pGRi                  \oGR                  \\\/S[S9     S                 SS jj5       r\pGRi                  \oR                  \\\/S[S9     S                 SS jj5       r\pGRi                  \oGR                  \\\/S9     S                 SS jj5       r\pGRi                  \oGR                  \" SS 5      \/S9     S                 SS jj5       r\pGRi                  \oGR                  \" SS 5      \/S9     S                 SS jj5       r\pGRi                  \oGR                  \" SS 5      \/S9     S                 SS jj5       r\pGRi                  \oGR                  \" SS 5      \/S9     S                 SS jj5       r\pGRi                  \oGR                  \~\\/S9       S                     SS jj5       rg! \J\K\L4 a*  rM\;R                  S\M S35        Sr<SrESrFSrHSrI SrMCMGNSrMCMff = f! \J\K\L4 a&  rM\;R                  S\M S35        Sr=SrPSrQ SrMCMGNSrMCMff = f! \J\K\L4 a$  rM\;R                  S\M S35        Sr>SrS SrMCMGNSrMCMff = f! \J\K\L4 a.  rM\;R                  S\M S35        Sr?SrUSrVSrWSrXSrYSrZ SrMCMGNSrMCMff = f! \J\K\L4 a$  rM\;R                  S \M S35        Sr@Sr] SrMCMGN,SrMCMff = f! \J\K\L4 a$  rM\;R                  S"\M S35        SrASr_ SrMCMGNKSrMCMff = f! \J\K\L4 a$  rM\;R                  S$\M S35        SrBSrb SrMCMGNjSrMCMff = f! \J\K\L4 a$  rM\;R                  S%\M S35        SrCSre SrMCMGNSrMCMff = f)    )annotationsN)	dataclass)Enum)TYPE_CHECKINGAnyCallable   )
get_loggeris_aiter_availableis_aiter_versionis_flash_attn_3_availableis_flash_attn_availableis_flash_attn_versionis_kernels_availableis_kernels_versionis_sageattention_availableis_sageattention_versionis_torch_npu_availableis_torch_versionis_torch_xla_availableis_torch_xla_versionis_xformers_availableis_xformers_version)DIFFUSERS_ATTN_BACKENDDIFFUSERS_ATTN_CHECKS)lru_cache_unless_exportmaybe_allow_in_graph   )gather_size_by_comm)ParallelConfigz2.6.3z0.1.5z2.1.1z2.5.0z2.2z0.0.29>=)flash_attn_funcflash_attn_varlen_func)_wrapped_flash_attn_backward_wrapped_flash_attn_forwardz.flash_attn is installed but failed to import: z+. Falling back to native PyTorch attention.F)r"   )r#   zflash_attn_3 failed to import: z#. Falling back to native attention.zaiter failed to import: )sageattnsageattn_qk_int8_pv_fp8_cuda!sageattn_qk_int8_pv_fp8_cuda_sm90sageattn_qk_int8_pv_fp16_cudasageattn_qk_int8_pv_fp16_tritonsageattn_varlenz sageattention failed to import: z!flex_attention failed to import: )npu_fusion_attentionztorch_npu failed to import: )flash_attentionztorch_xla failed to import: zxformers failed to import: z2.4.0)device_typesschemac                  S nUc  U$ U$ )Nc                    U $ N funcs    ]/home/wildlama/miniconda3/lib/python3.13/site-packages/diffusers/models/attention_dispatch.pywrapcustom_op_no_op.<locals>.wrap       K    r3   )namefnmutates_argsr.   r/   r7   s         r6   custom_op_no_opr>          	 zt)r)r:   )lib_stacklevelc                  S nUc  U$ U$ )Nc                    U $ r2   r3   r4   s    r6   r7   !register_fake_no_op.<locals>.wrap   r9   r:   r3   )opr<   r@   rA   r7   s        r6   register_fake_no_oprF      r?   r:   c                  |    \ rS rSrSrSrSrSrSrSr	Sr
S	rS
rSrSrSrSrSrSrSrSrSrSrSrSrSrSrSrSrSrSrg)AttentionBackendName   flash	flash_hubflash_varlenflash_varlen_hubflash_4_hub_flash_3_flash_varlen_3_flash_3_hub_flash_3_varlen_hubaiterflexnative_native_cudnn_native_efficient_native_flash_native_math_native_npu_native_xlasagesage_hubsage_varlen_sage_qk_int8_pv_fp8_cuda_sage_qk_int8_pv_fp8_cuda_sm90_sage_qk_int8_pv_fp16_cuda_sage_qk_int8_pv_fp16_tritonxformersr3   N)__name__
__module____qualname____firstlineno__FLASH	FLASH_HUBFLASH_VARLENFLASH_VARLEN_HUBFLASH_4_HUB_FLASH_3_FLASH_VARLEN_3_FLASH_3_HUB_FLASH_3_VARLEN_HUBAITERFLEXNATIVE_NATIVE_CUDNN_NATIVE_EFFICIENT_NATIVE_FLASH_NATIVE_MATH_NATIVE_NPU_NATIVE_XLASAGESAGE_HUBSAGE_VARLEN_SAGE_QK_INT8_PV_FP8_CUDA_SAGE_QK_INT8_PV_FP8_CUDA_SM90_SAGE_QK_INT8_PV_FP16_CUDA_SAGE_QK_INT8_PV_FP16_TRITONXFORMERS__static_attributes__r3   r:   r6   rH   rH      s     EI!L)KH'O!L/ E DF#M+#M!LKK DHK ;%E"!=#A  Hr:   rH   c                      \ rS rSr0 r0 r0 r\" 5       r\	" \
5      r\r\  S	     S
S jj5       r\S 5       r\SS j5       r\S 5       r\    SS j5       rSrg)_AttentionBackendRegistryi  Nc                V   ^ ^^^ [         R                  ST ST 35        UU UU4S jnU$ )NzRegistering attention backend: z with constraints: c                6  > U TR                   T'   T=(       d    / TR                  T'   [        [        R                  " U 5      R
                  R                  5       5      TR                  T'   T(       a%  TR                  R                  TR                  5        U $ r2   )	_backends_constraintssetinspect	signature
parameterskeys_supported_arg_names_supports_context_paralleladdvalue)r5   backendclsconstraintssupports_context_parallels    r6   	decorator5_AttentionBackendRegistry.register.<locals>.decorator  su    %)CMM'"(3(9rCW%03G4E4Ed4K4V4V4[4[4]0^C$$W-(..227==AKr:   )loggerdebug)r   r   r   r   r   s   ```` r6   register"_AttentionBackendRegistry.register  s4     	6wi?RS^R_`a	 	 r:   c                L    U R                   U R                  U R                      4$ r2   )_active_backendr   r   s    r6   get_active_backend,_AttentionBackendRegistry.get_active_backend'  s"    ""CMM#2E2E$FFFr:   c                    Xl         g r2   )r   )r   r   s     r6   set_active_backend,_AttentionBackendRegistry.set_active_backend+  s    %r:   c                H    [        U R                  R                  5       5      $ r2   )listr   r   r   s    r6   list_backends'_AttentionBackendRegistry.list_backends/  s    CMM&&())r:   c                8    UR                   U R                  ;   nU$ r2   )r   r   )r   r   r   s      r6   _is_context_parallel_available8_AttentionBackendRegistry._is_context_parallel_available3  s    
 %,MMS5S5S$S!((r:   r3   )NF)r   rH   r   zlist[Callable] | Noner   bool)r   str)r   rH   returnr   )rd   re   rf   rg   r   r   r   r   r   rH   r   r   r   _checks_enabledclassmethodr   r   r   r   r   r   r3   r:   r6   r   r     s    IL!$*+ABO+O .2*/	% + $(	 & G G & & * * )%) 
) )r:   r   c                      \ rS rSr% SrS\S'   S\S'   SrS\S'   SrS	\S
'   SrS\S'   Sr	S\S'   Sr
S\S'   SrS\S'   SrS\S'   Srg)_HubKernelConfigi<  zEConfiguration for downloading and using a hub-based attention kernel.r   repo_idfunction_attrNz
str | Nonerevisionz
int | NoneversionzCallable | None	kernel_fnwrapped_forward_attrwrapped_backward_attrwrapped_forward_fnwrapped_backward_fnr3   )rd   re   rf   rg   __doc____annotations__r   r   r   r   r   r   r   r   r3   r:   r6   r   r   <  sZ    OLHjGZ!%I%'+*+(,:,*..+//r:   r   zkernels-community/flash-attn3r"   z(flash_attn_interface._flash_attn_forwardz)flash_attn_interface._flash_attn_backward)r   r   r   r   r   r#   )r   r   r   zkernels-community/flash-attn2z0flash_attn_interface._wrapped_flash_attn_forwardz1flash_attn_interface._wrapped_flash_attn_backwardz kernels-community/sage-attentionr&   zkernels-staging/flash-attn4z.dict['AttentionBackendName', _HubKernelConfig]_HUB_KERNELS_REGISTRYc              #  F  #    U [         R                  ;  a  [        SU  S35      e[        U 5      n [	        U 5        [        U 5        [         R                  n[         R                  U 5         Sv   [         R                  U5        g! [         R                  U5        f = f7f)z6
Context manager to set the active attention backend.
zBackend z is not registered.N)r   r   
ValueErrorrH   %_check_attention_backend_requirements"_maybe_download_kernel_for_backendr   r   )r   old_backends     r6   attention_backendr   r  s     
 /9998G9,?@AA"7+G)'2&w/+;;K009B!44[A!44[As   A*B!-B 1B!BB!        )r   parallel_configc	                  U=(       d    0 nU	c  [         R                  5       u  pO*[        U	5      n[         R                  R	                  U5      nU UUUUUUS.UESU
0En[
        (       a  X}S'   [         R                  (       ay  [        U5      [        [         R                  U   5      -
  nU(       a  [        R                  SU SU S35        [         R                  R	                  U5       H  nU" S0 UD6  M     UR                  5        VVs0 s H"  u  nnU[         R                  U   ;   d  M  UU_M$     nnnU" S0 UD6$ s  snnf )N)querykeyr   	attn_mask	dropout_p	is_causalscale_parallel_config
enable_gqaz5Removing unsupported arguments for attention backend z: .r3   )r   r   rH   r   get_CAN_USE_FLEX_ATTNr   r   r   r   warningr   items)r   r   r   r   r   r   r   r   attention_kwargsr   r   backend_name
backend_fnkwargsremoved_kwargscheckkvs                     r6   dispatch_attention_fnr     sN    (-2 $=#O#O#Q j+G4.88<<\J
 
 
 	O
F )| 00Vs+D+Y+YZf+g'hhNNRS_R``bcqbrrstu.;;??MEOFO N  &||~s~tq!6O6d6deq6r1rdad~Fs ts   E2Ec                0    U b  U(       a  [        S5      eg g )Nz8`is_causal` cannot be True when `attn_mask` is not None.)r   )r   r   r   s      r6   _check_attn_mask_or_causalr     s    STT "+r:   c                    U R                   UR                   :w  d  U R                   UR                   :w  a  [        S5      eU R                  UR                  :w  d  U R                  UR                  :w  a  [        S5      eg )Nz1Query, key, and value must be on the same device.z/Query, key, and value must have the same dtype.)devicer   dtyper   r   r   r   s       r6   _check_devicer     s]    ||szz!U\\U\\%ALMM{{cii5;;%++#=JKK $>r:   c                f    [        XU5        U R                  R                  S:w  a  [        S5      eg )Ncudaz/Query, key, and value must be on a CUDA device.)r   r   typer   r   s       r6   _check_device_cudar     s/    %e$||F"JKK #r:   c                   ^ ^ SU U4S jjnU$ )Nc                   > [        XU5        [        R                  R                  U R                  5      TT4:  a  [        ST ST S35      eg )NzJQuery, key, and value must be on a CUDA device with compute capability >= r   )r   torchr   get_device_capabilityr   r   )r   r   r   r   majorminors       r6   check_device_cuda:_check_device_cuda_atleast_smXY.<locals>.check_device_cuda  sW    5u-::++ELL9UENJ\]b\ccdejdkklm  Kr:   r   torch.Tensorr   r   r   r   r   Noner3   )r   r   r   s   `` r6   _check_device_cuda_atleast_smXYr     s      r:   c                    U R                   UR                   :w  a  [        S5      eU R                   UR                   :w  a  [        S5      eg )Nz'Query and key must have the same dtype.z)Query and value must have the same dtype.)r   r   r   s       r6   _check_qkv_dtype_matchr     s?    {{ciiBCC{{ekk!DEE "r:   c                    [        XU5        U R                  [        R                  [        R                  4;  a  [        S5      eg )Nz9Query, key, and value must be either bfloat16 or float16.)r   r   r   bfloat16float16r   r   s       r6   _check_qkv_dtype_bf16_or_fp16r     s6    5u-{{5>>5==99TUU :r:   c                   U R                   S   UR                   S   :w  a  [        S5      eUR                   S   UR                   S   :w  a  [        S5      eUb,  UR                   S   UR                   S   :w  a  [        S5      eg g )Nz0Query and key must have the same head dimension.z1Key and value must have the same sequence length.z4Attention mask must match the key's sequence length.)shaper   )r   r   r   r   r   s        r6   _check_shaper     s     {{2#))B-'KLL
yy}B'LMM!4		"!EOPP "Fr:   c                   U [         R                  [         R                  4;   a,  [        (       d   [	        SU R
                   S[         S35      eg U [         R                  [         R                  4;   a%  [        (       d  [	        SU R
                   S35      eg U [         R                  [         R                  [         R                  [         R                  [         R                  [         R                  4;   a  [!        5       (       d  [	        SU R
                   S35      e[#        SS	5      (       d  [	        SU R
                   S
35      eU [         R                  :X  a+  [#        SS5      (       d  [	        SU R
                   S35      eg g U [         R$                  :X  a,  [&        (       d   [	        SU R
                   S[(         S35      eg U [         R*                  [         R,                  [         R.                  [         R0                  [         R2                  [         R4                  4;   a,  [6        (       d   [	        SU R
                   S[8         S35      eg U [         R:                  :X  a%  [<        (       d  [	        SU R
                   S35      eg U [         R>                  :X  a%  [@        (       d  [	        SU R
                   S35      eg U [         RB                  :X  a,  [D        (       d   [	        SU R
                   S[F         S35      eg U [         RH                  :X  a,  [J        (       d   [	        SU R
                   S[L         S35      eg g )NzFlash Attention backend 'zb' is not usable because of missing package or the version is too old. Please install `flash-attn>=z`.zFlash Attention 3 backend 'zp' is not usable because of missing package or the version is too old. Please build FA3 beta release from source.z	Backend 'zl' is not usable because the `kernels` package isn't available. Please install it with `pip install kernels`.r!   z0.12zj' needs to be used with a `kernels` version of at least 0.12. Please update with `pip install -U kernels`.z0.12.3zl' needs to be used with a `kernels` version of at least 0.12.3. Please update with `pip install -U kernels`.zAiter Attention backend 'z]' is not usable because of missing package or the version is too old. Please install `aiter>=zSage Attention backend 'ze' is not usable because of missing package or the version is too old. Please install `sageattention>=zFlex Attention backend 'zd' is not usable because of missing package or the version is too old. Please install `torch>=2.5.0`.zNPU Attention backend 'za' is not usable because of missing package or the version is too old. Please install `torch_npu`.zXLA Attention backend 'za' is not usable because of missing package or the version is too old. Please install `torch_xla>=zXformers Attention backend 'z`' is not usable because of missing package or the version is too old. Please install `xformers>=)'rH   rh   rj   _CAN_USE_FLASH_ATTNRuntimeErrorr   _REQUIRED_FLASH_VERSIONrm   rn   _CAN_USE_FLASH_ATTN_3ri   rk   ro   rp   r{   rl   r   r   rq   _CAN_USE_AITER_ATTN_REQUIRED_AITER_VERSIONrz   r|   r}   r~   r   r   _CAN_USE_SAGE_ATTN_REQUIRED_SAGE_VERSIONrr   r   rx   _CAN_USE_NPU_ATTNry   _CAN_USE_XLA_ATTN_REQUIRED_XLA_VERSIONr   _CAN_USE_XFORMERS_ATTN_REQUIRED_XFORMERS_VERSION)r   s    r6   r   r     s   '--/C/P/PQQ""+GMM?  ;]  ^u  ]v  vx  y  #
 
)224H4X4XY	Y$$-gmm_  =m  n  %
 
&&--))00%%(( 
 $%%GMM?  +W  X  "$//GMM?  +U  V  *666?QRVX`?a?aGMM?  +W  X  @b6
 
(..	.""+GMM?  ;X  Yp  Xq  qs  t  #
 
!!((66;;7799 
 "!*7==/  :_  `v  _w  wy  z  "
 
(--	-!!*7==/  :^  _  "
 
(44	4  )'--  9Z  [  !
 
(44	4  )'--  9Z  [p  Zq  qs  t  !
 
(11	1%%.w}}o  >^  _y  ^z  z|  }  & 
2r:      )maxsizec                   [         R                  " U 4U[         R                  US9n[         R                  " U 4U[         R                  US9n[         R                  " U S-   [         R                  US9n[         R                  " U S-   [         R                  US9n[         R                  " USS9USS & [         R                  " USS9USS & UR                  5       R                  5       nUR                  5       R                  5       n	XE4Xg4X44$ )Nr   r   r   r   dim)r   fullint32zeroscumsummaxitem)

batch_size	seq_len_q
seq_len_kvr   	seqlens_q	seqlens_kcu_seqlens_qcu_seqlens_kmax_seqlen_qmax_seqlen_ks
             r6   3_prepare_for_flash_attn_or_sage_varlen_without_maskr"  P  s     

J=)5;;vVI

J=*EKKPVWI;;zA~U[[PL;;zA~U[[PL||I15L||I15L==?'')L==?'')L!L#?,A]]]r:   c                   [         R                  " U 4U[         R                  US9nUR                  S[         R                  S9n[         R                  " U S-   [         R                  US9n[         R                  " U S-   [         R                  US9n[         R
                  " USS9USS & [         R
                  " USS9USS & UR                  5       R                  5       nUR                  5       R                  5       n	XE4Xg4X44$ )Nr  r   )r  r   r   r  )r   r  r  sumr  r  r  r  )
r  r  r   r   r  r  r  r  r   r!  s
             r6   0_prepare_for_flash_attn_or_sage_varlen_with_maskr%  b  s     

J=)5;;vVI!5;;7I;;zA~U[[PL;;zA~U[[PL||I15L||I15L==?'')L==?'')L!L#?,A]]]r:   c                8    Uc  [        XX$5      $ [        XX45      $ r2   )r"  r%  )r  r  r  r   r   s        r6   &_prepare_for_flash_attn_or_sage_varlenr'  s  s&     B:Zdmm;JS\eer:   c           	        U R                   [        R                  :w  a  [        SU R                    S35      eU R                  S:X  a"  U R                  S5      R                  X5      n GOAU R                  S:X  aH  U R                  S5      SU4;  a  [        SU R                  S    SU S35      eU R                  X5      n OU R                  S	:X  aW  U R                  S5      SU4;  a  [        SU R                  S    SU S
35      eU R                  SS9n U R                  X5      n OU R                  S:X  aZ  U R                  S5      SU4;  a  [        SU R                  S    SU S35      eU R                  USSU5      n U R                  SS9n O[        SU R                   35      eU R                  X4:w  a  [        SU R                   SU SU S35      eU $ )z
Normalize an attention mask to shape [batch_size, seq_len_k] (bool) suitable for inferring seqlens_[q|k] in
FlashAttention/Sage varlen.

Supports 1D to 4D shapes and common broadcasting patterns.
z)Attention mask must be of type bool, got r   r   r   r	   zattn_mask.shape[0] (z) must be 1 or z for 2D attention mask.   z for 3D attention mask.r     z for 4D attention mask.r   )r   r	   z"Unsupported attention mask shape: z.Normalized attention mask shape mismatch: got z, expected (z, ))
r   r   r   r   ndim	unsqueezeexpandsizer   any)r   r  	seq_len_ks      r6   _normalize_attn_maskr2    s    %**$DY__DUUVWXX~~''*11*H		1	>>!Q
O3&yq'9&:/*Ulm  $$Z;		1	 >>!Q
O3&yq'9&:/*Ulm  MMaM(	$$Z;		1	>>!Q
O3&yq'9&:/*Ulm  $$ZRC	MMfM-	 =ioo=NOPP:11<Y__<M\ZdYeeghqgrrst
 	
 r:   c                
    X#:  $ r2   r3   )	batch_idxhead_idxq_idxkv_idxs       r6   _flex_attention_causal_mask_modr8    s
    ?r:   c                    U nUR                  S5       H:  n[        X#5      (       d  [        SU R                   SU S35      e[	        X#5      nM<     U$ )Nr   zKernel module 'z"' does not define attribute path 'z'.)splithasattrAttributeErrorrd   getattr)module	attr_pathtargetattrs       r6   _resolve_kernel_attrrB    sY    F$v$$ ?6??2CCefoeppr!stt& % Mr:   c                   U [         ;  a  g [         U    nUR                  S L nUR                  S L=(       a    UR                  S L nUR                  S L=(       a    UR
                  S L nU(       d  U(       d  U(       d  g  SSKJn  U" UR                  UR                  UR                  S9nU(       a  [        XaR                  5      Ul        U(       a  [        XaR                  5      Ul        U(       a  [        XaR                  5      Ul        g g ! [         a+  n[        R                  SUR                   SU 35        e S nAff = f)Nr   )
get_kernel)r   r   z)An error occurred while fetching kernel 'z' from the Hub: )r   r   r   r   r   r   kernelsrD  r   r   r   rB  r   	Exceptionr   error)r   configneeds_kernelneeds_wrapped_forwardneeds_wrapped_backwardrD  kernel_modulees           r6   r   r     s   ++"7+F##t+L"77tCiHaHaeiHi#99El&JdJdhlJl15K&"6>>FOOU[UcUcd3MCWCWXF (<]LgLg(hF%!)=mMiMi)jF& "  @@PP`ab`cdes    BD 
E&EEz,_diffusers_flash_attn_3::_flash_attn_forwardr3   r   )r=   r.   c                    Sn[        S0 SU _SU_SU_SU_SU_SU_SU_S	U_S
U_SU_SU	_SU
_SU_SU_SU_SU_SS_6nUtnnnUR                  SSS5      nUU4$ )Nr   r   qr   r   softmax_scalecausalqv	q_descale	k_descale	v_descalewindow_sizeattention_chunksoftcap
num_splitspack_gqadeterministic	sm_marginreturn_attn_probsTr   r	   r   r3   )flash_attn_3_funcpermute)rP  r   r   rQ  rR  rS  rT  rU  rV  rX  rY  rZ  r[  r\  r]  rW  resultoutlse_s                       r6   _wrapped_flash_attn_3re    s    & K 

  $	
        (    $  !" #F& LCq
++aA
C8Or:   c                    SnU R                   u  nnnnUUU4n[        R                  " U 5      U R                  U5      4$ )NrO  )r   r   
empty_like	new_empty)rP  r   r   rQ  rR  rS  rT  rU  rV  rX  rY  rZ  r[  r\  r]  rW  r  seq_len	num_headshead_dim	lse_shapes                        r6   rd  rd    sH    $ K 01ww,JHWi0IAI 666r:   Tc                8   U	(       a  [        S5      eU
(       a0  U R                  XU5        X@l        XPl        X`l        Xpl        Xl        S XU4 5       u  pn[        R                  R                  R                  UUUUUUUUS9nUR                  SSSS5      nU$ )Nz1Native attention does not support return_lse=Truec              3  H   #    U  H  oR                  S SSS5      v   M     g7fr   r	   r   r)  Nr`  .0xs     r6   	<genexpr>/_native_attention_forward_op.<locals>.<genexpr>G  "     L8K11aA..8K    "r   r   r   r   r   r   r   r   r   r	   r   r)  )r   save_for_backwardr   r   r   r   r   r   nn
functionalscaled_dot_product_attentionr`  ctxr   r   r   r   r   r   r   r   
return_lse	_save_ctxr   rb  s                r6   _native_attention_forward_opr  ,  s     LMM e%0!!!	#LU8KLE
((


:
: ; 	C ++aAq
!CJr:   c                   U R                   u  pEnUR                  S5        UR                  S5        UR                  S5        [        R                  " 5          S XEU4 5       u  pxn	[        R                  R
                  R                  UUU	U R                  U R                  U R                  U R                  U R                  S9n
U
R                  SSSS5      n
[        R                  R                  XX/USS	9u  pnS S S 5        WR                  SSSS5      nWR                  SSSS5      nWR                  SSSS5      nXU4$ ! , (       d  f       NN= f)
NTc              3  H   #    U  H  oR                  S SSS5      v   M     g7fro  rp  rq  s     r6   rt  0_native_attention_backward_op.<locals>.<genexpr>d  s"     "VBUQ99Q1a#8#8BUrw  rx  r   r	   r   r)  F)outputsinputsgrad_outputsretain_graph)saved_tensorsrequires_grad_r   enable_gradrz  r{  r|  r   r   r   r   r   r`  autogradgrad)r~  grad_outargsr   r   r   r   query_tkey_tvalue_trb  grad_query_t
grad_key_tgrad_value_t
grad_querygrad_key
grad_values                    r6   _native_attention_backward_opr  W  sH    ))E	t					"V5uBU"Vhh!!>>mmmmmm))~~ ? 	
 kk!Q1%161D1D% 9_d 2E 2
., 
$ %%aAq1J!!!Q1-H%%aAq1J++- 
	s   B'E
Ec                   U(       a  [        S5      eSnUR                  SS5      R                  5       nUR                  SS5      R                  5       nUR                  SS5      R                  5       nXX#4-  n[        R                  R
                  R                  UUUUU	UUSUS9	u	  pnnnnnnnXXUUU4-  nU
(       a5  U R                  " U6   XPl        X`l	        Xpl
        X@l        UU l        UU l        UR                  SS5      R                  5       nUb   UR                  SS5      R                  5       nU	(       a  X4$ U$ )Nz6`enable_gqa` is not yet supported for cuDNN attention.r3   r   r	   F)	r   r   r   	attn_biascompute_log_sumexpr   r   return_debug_maskr   )r   	transpose
contiguousr   opsaten#_scaled_dot_product_cudnn_attentionry  r   r   r   r   max_qmax_kr~  r   r   r   r   r   r   r   r   r  r  r   tensors_to_saverb  rc  	cum_seq_q	cum_seq_kr  r  philox_seedphilox_offsetdebug_attn_masks                         r6   _cudnn_attention_forward_opr    sN    QRRO OOAq!,,.E
--1

(
(
*COOAq!,,.Es**O 			::)# 	; 
	
 ^CiE5+}o SY]SSO/!!	!		
--1

(
(
*C
mmAq!,,.#C:,,r:   c                   U R                   u	  pEpgppnUR                  SS5      R                  5       nUR                  SS5      R                  5       nUR                  SS5      R                  5       n[        R                  R
                  R                  UUUUUUUUU R                  U	U
U R                  U R                  U R                  U R                  U R                  S9u  pnS XU4 5       u  pnXU4$ )Nr   r	   )	logsumexpr  r  r  r  r  r  r  r   r   r   c              3  `   #    U  H$  oR                  S S5      R                  5       v   M&     g7fr   r	   Nr  r  rq  s     r6   rt  /_cudnn_attention_backward_op.<locals>.<genexpr>  '     'sPr1Aq(9(D(D(F(FPr   ,.)r  r  r  r   r  r  ,_scaled_dot_product_cudnn_attention_backwardr   r  r  r   r   r   r~  r  r  r   r   r   r   rb  rc  r  r  r  r  r  r  r  s                   r6   _cudnn_attention_backward_opr    s     UXTeTeQECIM!!!Q'224H
--1

(
(
*COOAq!,,.E (-yy~~'b'b#--iiii----ii! (c ($J*$ (tQ[gqPr's$J*++r:   c           
     ~   U(       a  [        S5      eSnUR                  SS5      R                  5       nUR                  SS5      R                  5       nUR                  SS5      R                  5       nXX#4-  n[        R                  R
                  R                  UUUUUSUS9u	  pnnnnnnnXXUUU4-  nU
(       a/  U R                  " U6   XPl        X`l	        Xpl
        UU l        UU l        UR                  SS5      R                  5       nUb   UR                  SS5      R                  5       nU	(       a  X4$ U$ )Nz=`enable_gqa` is not yet supported for native flash attention.r3   r   r	   F)r   r   r   r   r   r  r   )r   r  r  r   r  r  #_scaled_dot_product_flash_attentionry  r   r   r   r  r  r  s                         r6   "_native_flash_attention_forward_opr    sA    XYYOOOAq!,,.E
--1

(
(
*COOAq!,,.Es**O 			::# 	; 	
 ^CiE5+}o SY]SSO/!!			
--1

(
(
*C
mmAq!,,.#C:,,r:   c                   U R                   u	  pEpgppnUR                  SS5      R                  5       nUR                  SS5      R                  5       nUR                  SS5      R                  5       n[        R                  R
                  R                  UUUUUUUUU	U
U R                  U R                  U R                  U R                  U R                  S9u  pnS XU4 5       u  pnXU4$ )Nr   r	   )
r  r  r  r  r  r  r  r   r   r   c              3  `   #    U  H$  oR                  S S5      R                  5       v   M&     g7fr  r  rq  s     r6   rt  6_native_flash_attention_backward_op.<locals>.<genexpr>3  r  r  )r  r  r  r   r  r  ,_scaled_dot_product_flash_attention_backwardr  r  r   r   r   r  s                   r6   #_native_flash_attention_backward_opr    s     UXTeTeQECIM!!!Q'224H
--1

(
(
*COOAq!,,.E',yy~~'b'b#iiii----ii (c ($J*" (tQ[gqPr's$J*++r:   rO  rW  c               h   Ub  [        S5      eU(       a  [        S5      eSnS nSn[        S XU4 5       5      nUc  UR                  S   S-  nU(       d  Ub$  UR                  R                  S:  a
  US	:  a  UOS
n[
        R                  " U5         [        UUUUUUUS	   US   UUU	5      u  nnnnUR                  S	SS5      nS S S 5        U
(       a?  U R                  XUWWW5        XPl
        Xpl        X`l        Xl        Xl        Xl        Xl        U	(       a  WW4$ W$ ! , (       d  f       Na= f)Nz2`attn_mask` is not yet supported for flash-attn 2.z3`enable_gqa` is not yet supported for flash-attn 2.r   Fc              3  8   #    U  H  oR                   v   M     g 7fr2   requires_gradrq  s     r6   rt  ._flash_attention_forward_op.<locals>.<genexpr>Q       D0C10C   r         r   r   KH9r	   )r   r0  r   context_parallel_config_world_sizer   set_grad_enabledr%   r`  ry  r   r   r   rW  rY  alibi_slopesr\  )r~  r   r   r   r   r   r   r   r   r  r  r   rW  rY  r  r\  grad_enabledrb  rc  S_dmask	rng_states                        r6   _flash_attention_forward_opr  9  sJ     MNNNOOGLMDU0CDDL}BD) (49I9a9a9m9mpq9q!*QIE					-'BNN(
$S'9 kk!Q" 
.  e%c9E!	!%')#C:,,5 
.	-s   4D##
D1c                   U R                   u  pEpgp[        R                  " U5      [        R                  " U5      [        R                  " U5      pn
[        UUUUUUU
UUU R                  U R
                  U R                  U R                  S   U R                  S   U R                  U R                  U R                  U	5      nU
SS UR                  S   24   n
USS UR                  S   24   nUSS UR                  S   24   nXU4$ )Nr   r   .r   )r  r   rg  r$   r   r   r   rW  rY  r  r\  r   )r~  r  r  r   r   r   r   rb  rc  r  r  r  r  lse_ds                 r6   _flash_attention_backward_opr  w  s    .1->->*EC','7'7'>@P@PQT@UW\WgWghmWn*J(		%E, C!58>>"#5!556J1x~~b1112HC!58>>"#5!556J++r:   c                  Ub  [        S5      eU(       a  [        S5      e[        [        R                     nUR                  nUR
                  nUb  Uc  [        S5      eUc  UR                  S   S-  nSnS nSn[        S XU4 5       5      nU(       d  Ub$  UR                  R                  S	:  a
  US
:  a  UOSn[        R                  " U5         U" UUUUUUUS
   US	   UUU	5      u  nnnnUR                  S
SS	5      R                  5       nS S S 5        U
(       aB  U R                  XUWWW5        XPl        Xpl        X`l        Xl        UU l        UU l        UU l        U	(       a  WW4$ W$ ! , (       d  f       Nd= f)Nz<`attn_mask` is not yet supported for flash-attn hub kernels.z=`enable_gqa` is not yet supported for flash-attn hub kernels.zFlash attention hub kernels must expose `_wrapped_flash_attn_forward` and `_wrapped_flash_attn_backward` for context parallel execution.r   r  r   Fc              3  8   #    U  H  oR                   v   M     g 7fr2   r  rq  s     r6   rt  2_flash_attention_hub_forward_op.<locals>.<genexpr>  r  r  r   r   r  r	   )r   r   rH   ri   r   r   r  r   r0  r  r  r   r  r`  r  ry  r   r   r   rW  rY  r  r\  )r~  r   r   r   r   r   r   r   r   r  r  r   rW  rH  r   r   rY  r  r\  r  rb  rc  r  r  s                           r6   _flash_attention_hub_forward_opr    s     WXXXYY"#7#A#ABF22 44!%8%@.
 	

 }BD)GLMDU0CDDL(49I9a9a9m9mpq9q!*QIE					-'9NN(
$S'9 kk!Q"--/ 
.  e%c9E!	!%')#C:,,5 
.	-s   ?E11
E?c                p   [         [        R                     nUR                  nUc  [	        S5      eU R
                  u  pgpp[        R                  " U5      [        R                  " U5      [        R                  " U5      pnU" UUUUU	U
UUUU R                  U R                  U R                  U R                  S   U R                  S   U R                  U R                  U R                  U5      nUSS UR                  S   24   nUSS UR                  S   24   nUSS UR                  S   24   nXU4$ )NzfFlash attention hub kernels must expose `_wrapped_flash_attn_backward` for context parallel execution.r   r   .r   )r   rH   ri   r   r  r  r   rg  r   r   r   rW  rY  r  r\  r   )r~  r  r  r   rH  r   r   r   r   rb  rc  r  r  r  r  rd  s                   r6    _flash_attention_hub_backward_opr    sO    ##7#A#ABF 44"t
 	
 .1->->*EC','7'7'>@P@PQT@UW\WgWghmWn*J		%	A* C!58>>"#5!556J1x~~b1112HC!58>>"#5!556J++r:   rW  rY  rZ  r[  r\  r]  c               @   Ub  [        S5      eUS:w  a  [        S5      eU(       a  [        S5      e[        [        R                     nUR                  nUc  [        S5      eUc  UR                  S   S-  nU" UUUS S S S S S S S S S S S S S S S S S S S U4UUS   US	   SUUUUS
.6tnnnU	(       a!  UR                  SSS	5      R                  5       OS nU
(       a:  U R                  XUUU5        Xpl
        X`l        Xl        Xl        UU l        UU l        U	(       a  UU4$ U$ )Nz>`attn_mask` is not yet supported for flash-attn 3 hub kernels.r   z>`dropout_p` is not yet supported for flash-attn 3 hub kernels.z?`enable_gqa` is not yet supported for flash-attn 3 hub kernels.ztFlash attention 3 hub kernels must expose `flash_attn_interface._flash_attn_forward` for context parallel execution.r   r  r   r   )rR  window_size_leftwindow_size_rightrX  rY  rZ  r[  r]  r	   )r   r   rH   ro   r   r  r   r`  r  ry  r   r   rW  rY  r\  r]  )r~  r   r   r   r   r   r   r   r   r  r  r   rW  rY  rZ  r[  r\  r]  rH  r   rb  softmax_lserd  rc  s                           r6   !_flash_attention_3_hub_forward_opr    sx   * YZZCYZZZ[["#7#D#DEF22!.
 	

 }BD)-1!2 $Q%a.A!CqF 8B+

aA
&
1
1
3tCe%kB	!%)!#C:,,r:   c                f   [         [        R                     nUR                  nUc  [	        S5      eU R
                  u  pgpn
[        R                  " U5      n[        R                  " U5      n[        R                  " U5      nU" UUUUU	U
S S S S S S UUUU R                  U R                  U R                  S   U R                  S   U R                  U R                  U R                  5        USS UR                  S   24   nUSS UR                  S   24   nUSS UR                  S   24   nXU4$ )NzuFlash attention 3 hub kernels must expose `flash_attn_interface._flash_attn_backward` for context parallel execution.r   r   .r   )r   rH   ro   r   r  r  r   rg  r   r   rW  rY  r\  r]  r   )r~  r  r  r   rH  r   r   r   r   rb  r  r  r  r  s                 r6   "_flash_attention_3_hub_backward_opr  g  sV    ##7#D#DEF 44".
 	

 +.*;*;'EK!!%(J$H!!%(J		-2 C!58>>"#5!556J1x~~b1112HC!58>>"#5!556J++r:   c           
         Ub  [        S5      eUS:  a  [        S5      eU(       a  [        S5      e[        UUUSUUU	S9nS nU	(       a  UtpnUR                  SSS	5      nU	(       a  X4$ U$ 
Nz4`attn_mask` is not yet supported for Sage attention.r   z4`dropout_p` is not yet supported for Sage attention.z5`enable_gqa` is not yet supported for Sage attention.NHDrP  r   r   tensor_layoutr   sm_scaler  r   r	   r   )r   r&   r`  )r~  r   r   r   r   r   r   r   r   r  r  r   rb  rc  rd  s                  r6   _sage_attention_forward_opr    s     OPP3OPPPQQ



C C1kk!Q"#C:,,r:   c           
     2   Ub  [        S5      eUS:  a  [        S5      eU(       a  [        S5      e[        [        R                     R                  nU" UUUSUUU	S9nS nU	(       a%  UtpnUR                  SSS	5      R                  5       nU	(       a  X4$ U$ r  )r   r   rH   r{   r   r`  r  )r~  r   r   r   r   r   r   r   r   r  r  r   r5   rb  rc  rd  s                   r6   _sage_attention_hub_forward_opr    s     OPP3OPPPQQ !5!>!>?IID



C C1kk!Q"--/#C:,,r:   c                    [        S5      e)Nz4Backward pass is not implemented for Sage attention.NotImplementedErrorr~  r  r  s      r6   _sage_attention_backward_opr    s    
 T
UUr:   c                   Ub   [         R                  " US:g  5      (       a  S nUGb-  UR                  S:X  a  UR                  S   U R                  S   :X  a  UR                  S   UR                  S   :X  ak  UR                  S   U R                  S   UR                  S   pTnUR	                  S5      R                  X4U5      R	                  S5      R                  5       nORUR                  S:X  aB  UR                  SS S:X  a/  UR                  SSU R                  S   S5      R                  5       nUR                  [         R                  5      ) nU$ )Nr   r	   r   r*  r)  )r   r   r   )	r   allr,  r   r-  r.  r  tor   )r   r   r   r  r  r  s         r6   _maybe_modify_attn_mask_npur    s%   9>!:!:	 >>Q9??1#5Q#GIOO\]L^bebkbklmbnLn090BEKKPQNTWT]T]^_T`:J!++A.55jZXbbcdepprI^^q Y__Qq%9V%C!((RQDOOQI\\%**--	r:   c                    U	(       a  [        S5      e[        XU5      n[        UUUUR                  S5      USS Uc&  S[        R
                  " UR                  S   5      -  OUSSSU-
  SSS	9S   nU$ )
NANPU attention backend does not support setting `return_lse=True`.r	   BSND      ?r      Fr   	
atten_maskinput_layoutpser   pre_tockensnext_tockens	keep_probsyncinner_precise)r   r  r,   r/  mathsqrtr   r}  s                r6   _npu_attention_forward_opr    s     \]]+E	BI


127-cDIIekk"o..U	/ 	C  Jr:   c                    [        S5      e)Nz:Backward pass is not implemented for Npu Fusion Attention.r  )r~  r  r  r   s       r6   _npu_attention_backward_opr  *  s     Z
[[r:   c                d    [        U [        R                  5      (       a  U R                  5       n U $ r2   )
isinstancefuncolAsyncCollectiveTensorwait)tensors    r6   _wait_tensorr  :  s%    &&6677Mr:   c                    U R                   nU R                  5       n [        R                  " U S S U5      n U R	                  U5      n [        U 5      n U $ r2   )r   flattenr
  all_to_all_singlereshaper  )rs  groupr   s      r6   _all_to_all_singler  @  sJ    GGE
 	
		A  D$6A			%AQAHr:   c                   [         R                  R                  U5      nUS:X  a  US:X  a  U R                  u  pVpxXd-  n	Xt-  n
U R	                  XVXJU5      R                  SS5      R                  5       nUS:  a
  [        XS9nOUnUR	                  XX5      R                  SSSS5      R                  5       nUR	                  XYX5      nU$ US:X  a  US:X  a  U R                  u  pYpX-  nX-  nU R	                  XTXjU5      R                  SSSSS5      R	                  XJXeU5      nUS:  a  [        X5      nOUnUR	                  XvXX5      R                  SS5      R                  5       nUR	                  XVXx5      nU$ [        S5      e)u  
Perform dimension sharding / reassembly across processes using _all_to_all_single.

This utility reshapes and redistributes tensor `x` across the given process group, across sequence dimension or
head dimension flexibly by accepting scatter_idx and gather_idx.

Args:
    x (torch.Tensor):
        Input tensor. Expected shapes:
        - When scatter_idx=2, gather_idx=1: (batch_size, seq_len_local, num_heads, head_dim)
        - When scatter_idx=1, gather_idx=2: (batch_size, seq_len, num_heads_local, head_dim)
    scatter_idx (int) :
        Dimension along which the tensor is partitioned before all-to-all.
    gather_idx (int):
        Dimension along which the output is reassembled after all-to-all.
    group :
        Distributed process group for the Ulysses group.

Returns:
    torch.Tensor: Tensor with globally exchanged dimensions.
        - For (scatter_idx=2 → gather_idx=1): (batch_size, seq_len, num_heads_local, head_dim)
        - For (scatter_idx=1 → gather_idx=2): (batch_size, seq_len_local, num_heads, head_dim)
r	   r   r   r  r)  r*  z<Invalid scatter/gather indices for _all_to_all_dim_exchange.)
r   distributedget_world_sizer   r  r  r  r  r`  r   )rs  scatter_idx
gather_idxr  group_world_sizer  seq_len_localrj  rk  ri  num_heads_localx_temprb  outputs                 r6   _all_to_all_dim_exchanger   M  s   0 ((77>aJ!O :;6
92#7 IIj1AT\]Yq!_Z\ 	 a$V9CCkk'IQQRSUVXY[\]hhjkk*I
		jAo :;6
_#6	3 IIjMT\]WQ1a#W%S[\ 	 a'6FF	*OYYZ[]^_jjl
9OWXXr:   c                  <    \ rS rSrSr\SS j5       r\S 5       rSrg)SeqAllToAllDimi  z
all_to_all operation for unified sequence parallelism. uses _all_to_all_dim_exchange, see _all_to_all_dim_exchange
for more info.
c                >    Xl         X0l        X@l        [        X#XA5      $ r2   )r  
scatter_id	gather_idr   )r~  r  inputr$  r%  s        r6   forwardSeqAllToAllDim.forward  s    	#!'9LLr:   c                z    [         R                  U R                  UU R                  U R                  5      nS US S 4$ r2   )r"  applyr  r%  r$  )r~  r  
grad_inputs      r6   backwardSeqAllToAllDim.backward  s;    #))IIMMNN	

 j$--r:   r3   N)r	   r   )	rd   re   rf   rg   r   staticmethodr'  r,  r   r3   r:   r6   r"  r"    s1    
 M M . .r:   r"  c                    [         R                  " US9nSnX-  S:w  aI  X1U-  -
  nX-   U-  nXE:  d   SU SU 35       e[        R                  " U SSSU45      R	                  5       n X4$ )zMaybe pad the head dimension to be divisible by world_size.
x: torch.Tensor, shape (B, S_LOCAL, H, D) H: int, original global head num return: tuple[torch.Tensor, int], padded
tensor (B, S_LOCAL, H + H_PAD, D) and H_PAD
r  r   Padding head num ( should be less than new local head num )distr  Fpadr  )rs  Hr  
world_sizeH_PADNEW_H_LOCALs         r6   _maybe_pad_qkv_headr9    s    
 $$51JE~*n-yZ/ "t&7w>fgrfs$tt"EE!aAu%&1138Or:   c                    [         R                  " US9n[         R                  " US9nUS:  a  X4S-
  :X  a  U SS2SS2SU* 2SS24   n U R                  5       $ )zMaybe unpad the head dimension.
x: torch.Tensor, shape (B, S_GLOBAL, H_LOCAL + H_PAD, D) H_PAD: int, head padding num return: torch.Tensor,
unpadded tensor (B, S_GLOBAL, H_LOCAL, D)
r  r   r   N)r2  get_rankr  r  )rs  r7  r  rankr6  s        r6   _maybe_unpad_qkv_headr=    sW    
 ==u%D$$51JqyT!^+aGeVGQ<<>r:   c                   Uc  U S4$ [         R                  " US9n[         R                  " US9nSnX-  S:w  aQ  XAU-  -
  nX-   U-  nXV:  d   SU SU 35       eX4S-
  :X  a)  [        R                  " U SSSU45      R                  5       n X4$ )zMaybe pad the head dimension to be divisible by world_size.
x: torch.Tensor, shape (B, S_GLOBAL, H_LOCAL, D) H: int, original global head num return: tuple[torch.Tensor, int],
padded tensor (B, S_GLOBAL, H_LOCAL + H_PAD, D) and H_PAD
r   r  r0  r1  r   )r2  r;  r  r3  r4  r  )rs  r5  r  r<  r6  r7  r8  s          r6   _maybe_pad_o_headr?    s    
 	y!t==u%D$$51JE~ *n-yZ/"t&7w>fgrfs$tt">!a!Q5)*557A8Or:   c                R    US:  a  U SS2SS2SU* 2SS24   n U R                  5       $ )zMaybe unpad the head dimension.
x: torch.Tensor, shape (B, S_LOCAL, H_GLOBAL + H_PAD, D) H_PAD: int, head padding num return: torch.Tensor,
unpadded tensor (B, S_LOCAL, H_GLOBAL, D)
r   N)r  )rs  r7  r  s      r6   _maybe_unpad_o_headrA    s0    
 qyaGeVGQ<<>r:   c                    [        U R                  5      S:X  d   S5       e0 nU R                  S   US'   U R                  S   US'   U$ )Nr*  zEQuery tensor must be 4-dimensional of shape (B, S_LOCAL, H_GLOBAL, D)r	   NUM_QO_HEADr   	Q_S_LOCAL)lenr   )r   r   extra_kwargss      r6   ulysses_anything_metadatarG    sN    u{{q i"ii L"'++a.L %ALr:   c                v  ^ ^^ [         R                  " TS9nT R                  u  pEpg[        T UT5      u  m mUT-   U-  nT R	                  XEX8U5      R                  SSSSS5      R                  5       m U/U-  n	[        UT5      n
T R                  SS5      m [        R                  " T XT5      m SUUU 4S jjnU$ )	zc
x: torch.Tensor, shape (B, S_LOCAL, H, D) return: Callable that returns (B, S_GLOBAL, H_LOCAL, D)
r  r	   r   r   r)  r*  c                 |   > [        T5      mTR                  SSSS5      R                  5       m[        TT T5      mT$ )Nr   r   r	   r)  )r  r`  r  r=  )r7  r  rs  s   r6   r  -all_to_all_single_any_qkv_async.<locals>.wait  s>    O IIaAq!,,.!!UE2r:   r   r   )r2  r  r   r9  r  r`  r  r   r  r
  r  )rs  r  r   r6  BS_LOCALr5  DH_LOCALinput_split_sizesoutput_split_sizesr  r7  s   ``          @r6   all_to_all_single_any_qkv_asyncrR    s     $$51JwwA"1a/HAu5yZ'G			!j15==aAq!LWWYA 	J. -We<			!QA  $65QA  Kr:   c           	       ^ ^^	^
^^^^ UR                  SS5      n[        R                  " TS9m[        T UT5      u  m mT R                  nUu  m	nmm
UR                  S5      m[        TT5      nT R                  SSSS5      R                  5       m T/T-  n[        R                  " T XvT5      m S
U	U
UUUUUU 4S	 jjnU$ )zj
x: torch.Tensor, shape (B, S_GLOBAL, H_LOCAL, D) return: Callable that returns (B, S_LOCAL, H_GLOBAL, D)
rC  Nr  rD  r   r   r	   r)  c                    > [        T5      mTR                  TTT TT5      mTR                  SSSSS5      R                  5       mTR                  T TTT-  T5      m[	        TTT5      mT$ )Nr	   r   r   r)  r*  )r  r  r`  r  rA  )rL  rN  rO  r7  rM  r  r6  rs  s   r6   r  +all_to_all_single_any_o_async.<locals>.wait6  sm    OIIj'1gq9IIaAq!$//1IIa*w"6:5%0r:   rK  )
r   r2  r  r?  r   r   r`  r  r
  r  )rs  r  r   r5  r   S_GLOBALrP  rQ  r  rL  rN  rO  r7  rM  r6  s   ``       @@@@@@r6   all_to_all_single_any_o_asyncrW    s     	

=$'A$$51J Au-HAuGGE %Q'1 jj%G+GU;			!Q1((*A!Z/  $65QA  Kr:   c                  t    \ rS rSr\ S                     SS jj5       r\    SS j5       rSrg)	TemplatedRingAttentioniB  Nc                   UR                   R                  nUR                   R                  nUR                   R                  nUS-   U-  nS =nnXl        Xl        UR                  U l        UR                  U l        Xl	        [        R                  " UR                  5       UR                  5       /5      R                  5       n[        R                  " USUR!                  5       S9nUR#                  U5      n[%        U5       GHK  nUS:  aE  UU   nUR'                  5       nUS U R)                  U5      nUUS  R)                  U5      nUS-   U-  nU
" U UUUUUUUUSUS:H  US9u  nnUR                   R*                  (       a>  UR-                  [        R.                  5      nUR-                  [        R.                  5      nUR0                  S:X  a  UR3                  S5      nUbd  U[        R4                  R6                  R9                  UU-
  5      UU-
  -  -
  nU[        R4                  R6                  R;                  UU-
  5      -
  nUnUnGMN     WR-                  UR<                  5      nWR?                  S5      nU	(       a  UU4$ U$ )Nr   r   
gather_dimr  Tr  r   r)  r   ) r  
_ring_mesh_ring_local_rankring_degree
forward_opbackward_opr   q_shapekv_shaper   r   catr  r  r
  all_gather_tensor	get_groupchunkrangenumel
reshape_asconvert_to_fp32r  float32r,  r-  rz  r{  sigmoid
logsigmoidr   squeeze)r~  r   r   r   r   r   r   r   r   r  ra  rb  r   	ring_meshr<  r6  	next_rankprev_outprev_lse	kv_bufferikv	key_numelrb  rc  s                            r6   r'  TemplatedRingAttention.forwardC  sI     %<<GG	77HH%==II
AX+	""8#%kkyy/IIs{{}emmo>?JJL	,,Y1IL_L_Lab	OOJ/	z"A1uy)IIK	)n//49:11%8&]j8	!q&!1HC  77GGffU]]+ffU]]+ xx1}mmB'#!4!4!<!<S8^!LPX[^P^!__!4!4!?!?3!OOHHM #P ffU[[!kk"o'Sz0S0r:   c                J  ^ U R                   R                  R                  nU R                   R                  R                  nU R                   R                  R                  nUS-   U-  n[        [        SU5      5      S/-   nU R                   R                  R                  (       a  [        R                  OTR                  n[        R                  " U R                  UTR                  S9n	[        R                  " U R                  UTR                  S9n
[        R                  " U R                  UTR                  S9nS nU R                  tpnn[        R                   " UR#                  5       UR#                  5       /5      R%                  5       n[&        R(                  " USUR+                  5       S9nUR-                  U5      n[        U5       GH-  nUS:  aE  UU   nUR/                  5       nUS U R1                  U5      nUUS  R1                  U5      nUS-   U-  nU R3                  U T5      tnnnnUS:  aC  [5        U5      nU
R/                  5       nUS U R1                  U
5      n
UUS  R1                  U5      nU	U-  n	U
U-  n
UU-  nUUS-
  :  d  M  [        R                   " U
R#                  5       UR#                  5       /5      R%                  5       n[&        R6                  " UXsR+                  5       S9nGM0     U4S jXU4 5       u  pnXUS S S S S S S S S 4$ )Nr   r   r  r[  r  c              3  X   >#    U  H  oR                  TR                  5      v   M!     g 7fr2   )r  r   )rr  rs  r  s     r6   rt  2TemplatedRingAttention.backward.<locals>.<genexpr>  s!     +mJlQDD,@,@Jls   '*)r   r  r^  r_  r`  r   ri  rl  r   rm  r   r  rc  r   rd  r  re  r  r  r
  rf  rg  rh  rj  rk  rb  r  permute_tensor)r~  r  r  rq  r<  r6  rr  
next_ranksaccum_dtyper  r  r  next_grad_kvr   r   r   rd  ru  rv  rw  rx  grad_query_opgrad_key_opgrad_value_opgrad_kv_buffergrad_key_numels    `                        r6   r,  TemplatedRingAttention.backward  s    ((@@KK	##;;LL))AAMM
AX+	%:./1#5
'*';';'S'S'c'cemmiqiwiw[[KX
;;s||;xW[[[Y
 # 1 1EAIIs{{}emmo>?JJL	,,Y1IL_L_Lab	OOJ/	z"A1uy)IIK	)n//49:11%8&]j8	<?OOCQY<Z9M;1u!-l!;!)!1)/>:EEhO+NO<GG
S
-'J#H-'J:>!!&H,<,<,>
@R@R@T+U!V!a!a!c%44^ZWjWjWlm- #0 ,n:akJl+m(
jZtT4tUY[_aeeer:   r3   r2   r~  #torch.autograd.function.FunctionCtxr   r   r   r   r   r   r   torch.Tensor | Noner   floatr   r   r   float | Noner   r   r  r   r   'ParallelConfig' | Noner~  r  r  r   rd   re   rf   rg   r.  r'  r,  r   r3   r:   r6   rY  rY  B  s     59J10J1J1 J1 	J1
 'J1 J1 J1 J1 J1 J1 2J1 J1X 0f00f0f 0fr:   rY  c                  t    \ rS rSr\ S                     SS jj5       r\    SS j5       rSrg)	TemplatedUlyssesAttentioni  Nc                  ^ UR                   R                  nUR                   R                  nUR                  5       mXl        Xl        Xl        UR                  u  nnnnUR                  u  nn  nUU-  nUR                  UUUUU5      R                  SSSSS5      R                  5       nUR                  UUUUU5      R                  SSSSS5      R                  5       nUR                  UUUUU5      R                  SSSSS5      R                  5       nU4S jXU4 5       u  pnS XU4 5       u  pnU
" U UUUUUUUUU	SUS	9nU	(       a  UtnnnUR                  XUUU5      R                  SSSSS5      R                  5       n[        UT5      nUR                  SS5      R                  SSSS5      R                  5       nU	(       aq  WR                  XUU5      R                  SSSS5      R                  5       n[        UT5      nUR                  SS5      R                  SSS5      R                  5       nOS nU	(       a  UU4$ U$ )
Nr	   r   r   r)  r*  c              3  <   >#    U  H  n[        UT5      v   M     g 7fr2   r  rr  rs  r  s     r6   rt  4TemplatedUlyssesAttention.forward.<locals>.<genexpr>  s     WCVa/599CV   c              3     #    U  H6  oR                  S S5      R                  SS SS5      R                  5       v   M8     g7fr   r   r	   r)  Nr  r`  r  rq  s     r6   rt  r    s8     kWjRSYYq!_44Q1a@KKMMWj   >A Tr]  )r  _ulysses_meshulysses_degreerg  ra  rb  r   r   r  r`  r  r  r  )r~  r   r   r   r   r   r   r   r   r  ra  rb  r   ulysses_meshr6  rL  	S_Q_LOCALr5  rN  rd  
S_KV_LOCALrO  rb  rc  r  s                           @r6   r'  !TemplatedUlyssesAttention.forward  s\     (??MM%==LL
&&(#%/"[[9a!ii:q!z/aJCKKAqRSUVXYZeegkk!ZWa@HHAqRSUVWbbdaZ!DLLQPQSTVWYZ[ffhWEPUCVWEkX]diWjkE-
 LCqkk!GQ?GG1aQRTUVaac e,kk!Q''1a3>>@++aY@HHAqRST__aC$S%0C++a#++Aq!4??ACC'Sz0S0r:   c                j  ^^^^^^ U R                   R                  R                  nU R                   R                  R                  mUR	                  5       mUR
                  u  mmnmUT-  mUR                  TTTTT5      R                  SSSSS5      R                  5       n[        UT5      nUR                  SS5      R                  SSSS5      R                  5       nU R                  X5      tpVpxUUUUU4S jXVU4 5       u  pnU4S jXU4 5       u  pnS XU4 5       u  pnXUS S S S S S S S S 4$ )	Nr	   r   r   r)  r*  c              3     >#    U  H;  nUR                  TTTTT5      R                  S SSSS5      R                  5       v   M=     g7f)r   r)  r   r	   r*  N)r  r`  r  )rr  rs  rL  rN  rO  rM  r6  s     r6   rt  5TemplatedUlyssesAttention.backward.<locals>.<genexpr>  sH      ,
@ IIaWgq9AA!Q1aP[[]]@s   AAc              3  <   >#    U  H  n[        UT5      v   M     g 7fr2   r  r  s     r6   rt  r    s     +uRtQ,>q%,H,HRtr  c              3     #    U  H6  oR                  S S5      R                  SSS S5      R                  5       v   M8     g7fr  r  rq  s     r6   rt  r    s9      ,
FhIIaO##Aq!Q/::<<Fhr  )r   r  r  r  rg  r   r  r`  r  r  r  rb  )r~  r  r  r  r5  r  r  r  rd  r  r  r  rL  rN  rO  rM  r  r6  s               @@@@@@r6   r,  "TemplatedUlyssesAttention.backward  sS    ++CCQQ))AAPP
&&(#>>7Aqz/##Aw
GQGOOPQSTVWYZ\]^iik%h6##Aq)11!Q1=HHJ8;8V5M,
 ,
#-@,
(
j ,vS]isRt+u(
j,
GQ]gFh,
(
j ZtT4tUY[_aeeer:   r3   r2   r  r  r  r3   r:   r6   r  r    s     59<10<1<1 <1 	<1
 '<1 <1 <1 <1 <1 <1 2<1 <1| f0ff fr:   r  c                  t    \ rS rSr\ S                     SS jj5       r\    SS j5       rSrg)	TemplatedRingAnythingAttentioni#  Nc                  ^  Ub  [        S5      eUR                  R                  nUR                  5       nUR                  R                  nUR                  R
                  nUS-   U-  nS =nnXl        Xl        UR                  U l	        UR                  U l
        Xl        UR                  S   n[        UU5      n[        U5      m SU 4S jjnU" U5      nU" U5      n[        R                  " UR!                  5       UR!                  5       /5      R#                  5       n[$        R&                  " USUS9nUR)                  U5      nUR+                  5       n[-        U5       GHj  nUS:  aM  UU   nUU   nUS U R/                  U5      S S 2S U24   nUUS  R/                  U5      S S 2S U24   nUS-   U-  nOUS S 2S U24   nUS S 2S U24   nU
" U UUUUUUUUSUS:H  US9u  nnUR                  R0                  (       a>  UR3                  [        R4                  5      nUR3                  [        R4                  5      n[7        SS	5      (       a  UR9                  S
5      nUbd  U[        R:                  R<                  R?                  UU-
  5      UU-
  -  -
  nU[        R:                  R<                  RA                  UU-
  5      -
  nUnUnGMm     WR3                  URB                  5      nWRE                  S
5      nU	(       a  UU4$ U$ )NzTemplatedRingAnythingAttention does not support non-None attn_mask: non-uniform sequence lengths across ranks make cross-rank mask slicing ambiguous.r   c                   > TU R                   S   -
  nUS:X  a  U $ U R                   S   U/U R                   SS  Q7n[        R                  " X R                  U5      /SS9$ )Nr   r   r	   r  )r   r   re  	new_zeros)tpad_len	pad_shapes_maxs      r6   pad_to_s_max<TemplatedRingAnythingAttention.forward.<locals>.pad_to_s_maxL  s_    aggaj(G!|W;qwwqr{;I99aY!78a@@r:   r   r[  Tr]  <z2.9.0r   )r  r   r   r   )#r   r  r^  rg  r_  r`  ra  rb  r   rc  rd  r   r   r  r   re  r  r  r
  rf  rh  rj  ri  rk  rl  r  rm  r   r-  rz  r{  rn  ro  r   rp  )!r~  r   r   r   r   r   r   r   r   r  ra  rb  r   rq  r  r<  r6  rr  rs  rt  
kv_seq_lenall_kv_seq_lensr  
key_paddedvalue_paddedru  kv_padded_numelrv  true_seq_lenrw  rb  rc  r  s!                                   @r6   r'  &TemplatedRingAnythingAttention.forward$  s   "  d  %<<GG	##%77HH%==II
AX+	""8#%kkyy/YYq\
-j%@O$	A "#&
#E*IIz113\5I5I5KLMXXZ	,,Y1ER	OOJ/	 %**,z"A1u.y9y))/*55jA!]l]BRS?+,77Ea,FVW&]j8	 !KZK0$Q^4!q&!1HC  77GGffU]]+ffU]]+W--mmB'#!4!4!<!<S8^!LPX[^P^!__!4!4!?!?3!OOHHM #P ffU[[!kk"o'Sz0S0r:   c                    [        S5      e)NzNBackward pass for Ring Anything Attention in diffusers is not implemented yet.r  r  s      r6   r,  'TemplatedRingAnythingAttention.backward  s     ""rssr:   r3   r2   r  r  r  r3   r:   r6   r  r  #  s     59d10d1d1 d1 	d1
 'd1 d1 d1 d1 d1 d1 2d1 d1L t0tt tr:   r  c                  t    \ rS rSr\ S                     SS jj5       r\    SS j5       rSrg)	!TemplatedUlyssesAnythingAttentioni  Nc                    UR                   R                  nUR                  5       nXl        Xl        Xl        [        U5      n[        X40 UD6n[        X/40 UD6n[        X?40 UD6nU" 5       nU" 5       nU" 5       nU
" U UUUUUUUUU	SUS9nU	(       a  Utnnn[        UU40 UD6nU	(       aL  WR                  S5      n[        UU40 UD6nU" 5       nU" 5       nUR                  S5      R                  5       nO	U" 5       nS nU	(       a  UU4$ U$ )NFr]  r   )r  r  rg  ra  rb  r   rG  rR  rW  r-  rp  r  )r~  r   r   r   r   r   r   r   r   r  ra  rb  r   r   r  r  metadata
query_waitkey_wait
value_waitrb  rc  rd  out_waitlse_waits                            r6   r'  )TemplatedUlyssesAnythingAttention.forward  s3   " (??MM&&(#%/,U34UNXN
23JJ4UNXN
j-
 LCq 1eHxH--#C4S%L8LH*C*C++b/,,.C*CC'Sz0S0r:   c                    [        S5      e)NzQBackward pass for Ulysses Anything Attention in diffusers is not implemented yet.r  r  s      r6   r,  *TemplatedUlyssesAnythingAttention.backward  s     ""uvvr:   r3   r2   )r~  r  r   r   r   r   r   r   r   r   r   r  r   r   r   r  r   r   r  r   r   r  r  r  r3   r:   r6   r  r    s     59?10?1?1 ?1 	?1
  ?1 ?1 ?1 ?1 ?1 ?1 2?1 ?1B w0ww wr:   r  c                   UR                   R                  nUR                  5       n[        R	                  XX5      n [        R	                  XX5      n[        R	                  XX5      n[
        R	                  U UUUUUUUUU	U
U5      nU(       a  UtnnnOUn[        R	                  UUUU5      nU(       aM  WR                  S:X  a  UR                  S5      n[        R	                  UUX5      nUR                  S5      nUU4$ U$ )zt
Unified Sequence Parallelism attention combining Ulysses and ring attention. See: https://arxiv.org/abs/2405.07719
r)  r   )	r  r  rg  r"  r*  rY  r,  r-  rp  )r   r   r   r   r   r   r   r   r  ra  rb  r   r  r  r  ulysses_grouprb  context_layerrc  rd  r  s                        r6   _templated_unified_attentionr    s   & $;;IIL **,M  {OE


};
KC  {OE
 
&
&C !$sQ!!	F 
 88q=--#C""=#zOkk"o}Mr:   )r   c	                  U(       a  [        S5      eU(       a  [        S5      eUR                  R                  S:  a0  UR                  R                  S:  a  [	        U UUUUUUUUU	U
U5      $ UR                  R                  S:  a[  UR                  R
                  (       a   [        R                  U UUUUUUUUU	U
U5      $ [        R                  U UUUUUUUUU	U
U5      $ UR                  R                  S:  a[  UR                  R                  (       a   [        R                  U UUUUUUUUU	U
U5      $ [        R                  U UUUUUUUUU	U
U5      $ [        S5      e)Nz>Causal attention is not yet supported for templated attention.z1GQA is not yet supported for templated attention.r   z@Reaching this branch of code is unexpected. Please report a bug.)r   r  r`  r  r  ring_anythingr  r*  rY  ulysses_anythingr  r  )r   r   r   r   r   r   r   r   r  ra  rb  r   s               r6   %_templated_context_parallel_attentionr  	  s    YZZLMM 	00<<q@44CCaG+
 	
 
	1	1	=	=	A33AA177   *//   
	1	1	@	@1	D33DD4::   -22   [\\r:   )r   r   c
                    S n
Ub  [        S5      eU	c  [        U UUUUUUUS9nU(       a  UtpnO<[        R                  " [        US9n[        U UUS UUUSUU[        U	S9nU(       a  Uu  pU(       a  X4$ U$ Nz.`attn_mask` is not supported for flash-attn 2.)rP  r   r   r   rQ  rR  rW  r^  r  Fra  rb  r   )r   r"   	functoolspartialr  r  r  )r   r   r   r   r   r   r   rW  r  r   rc  rb  rd  ra  s                 r6   _flash_attentionr  	  s    " CIJJ#(	
 LCq&&'BP[\
3!4-
 HC#C:,,r:   c
                ,   S n
Ub  [        S5      e[        [        R                     R                  nU	c  U" U UUUUUUUS9nU(       a  UtpnO<[
        R                  " [        US9n[        U UUS UUUSUU[        U	S9nU(       a  Uu  pU(       a  X4$ U$ r  )
r   r   rH   ri   r   r  r  r  r  r  )r   r   r   r   r   r   r   rW  r  r   rc  r5   rb  rd  ra  s                  r6   _flash_attention_hubr  	  s    " CIJJ !5!?!?@JJD#(	
 LCq&&'FT_`
3!8-
 HC#C:,,r:   c
                @   U R                   u  p  nUR                   u  p  nUb  [        X:U5      n[        XXU R                  S9u  u  pu  nnu  nn/ / nn[	        U
5       H8  nUU   nUR                  UUS U24   5        UR                  UUS U24   5        M:     U R                  SS5      n[        R                  " USS9n[        R                  " USS9n[        [        R                     R                  nU" UUUUUUUUUUUUS9nUR                  SU
S45      nU$ Nr   r   r   r   r  )rP  r   r   r  r  r   r!  r   rQ  rR  rW  r^  r   )r   r2  r'  r   ri  appendr  r   re  r   rH   rk   r   	unflatten)r   r   r   r   r   r   r   rW  r  r   r  r  rd  r  r  r  r  r   r!  	key_validvalue_validb	valid_lenquery_packed
key_packedvalue_packedr5   rb  s                               r6   _flash_varlen_attention_hubr  	  sO   " #(++J1a))A1a(
K	 	/:5<<	
 ONQ0\<2N<  {I:aL	Q

]+,5JYJ/0 
 ==A&L9!,J99[a0L !5!F!FGQQD



!!!!$C --J+
,CJr:   )r   c
                   U R                   u  p  nUR                   u  p  nUb  [        X:U5      n[        XXU R                  S9u  u  pu  nnu  nn/ / nn[	        U
5       H8  nUU   nUR                  UUS U24   5        UR                  UUS U24   5        M:     U R                  SS5      n[        R                  " USS9n[        R                  " USS9n[        UUUUUUUUUUUUS9nUR                  SU
S45      nU$ r  )r   r2  r'  r   ri  r  r  r   re  r#   r  )r   r   r   r   r   r   r   rW  r  r   r  r  rd  r  r  r  r  r   r!  r  r  r  r  r  r  r  rb  s                              r6   _flash_varlen_attentionr  8
  s8     #(++J1a))A1a(
K	 	/:5<<	
 ONQ0\<2N<  {I:aL	Q

]+,5JYJ/0 
 ==A&L9!,J99[a0L
 


!!!!$C --J+
,CJr:   c                T    Ub  [        S5      e[        U UUUUS9u  pU(       a  X4$ U$ )N.`attn_mask` is not supported for flash-attn 3.rP  r   r   rQ  rR  )r   re  )
r   r   r   r   r   r   r  r   rb  rc  s
             r6   _flash_attention_3r  q
  sB     IJJ$


HC $C:,,r:   c                   Ub  [        S5      e[        [        R                     R                  nU
cI  U" S0 SU _SU_SU_SU_SU_SS _SS _S	S _S
S _SU_SU_SS_SS _SU_SS_SU	_6nU	(       a
  US   US   4$ U$ [
        R                  " [        UUSS USS9n[
        R                  " [        UUSS USS9n[        U UUS SUUSU	UUU
S9nU	(       a  Uu  pX4$ U$ )Nr  rP  r   r   rQ  rR  rS  rT  rU  rV  rW  rY  rZ  r   r[  r\  r]  r   r^  r  r   Fr  r3   )
r   r   rH   ro   r   r  r  r  r  r  )r   r   r   r   r   r   rW  rY  r\  r^  r   r5   rb  ra  rb  rc  s                   r6   _flash_attention_3_hubr  
  s   $ IJJ !5!B!BCMMD 


 
  	

 
 
 
 
 
 $
 
 
 
 (
 
  0!
$ $5AA=#="")#J ##*#K 0)C xJr:   c                T   U R                   u  p  n
UR                   u  p  n
Ub  [        X8U5      n[        XXU R                  S9u  u  pu  pu  nn/ / nn[	        U5       H8  nUU   nUR                  UUS U24   5        UR                  UUS U24   5        M:     U R                  SS5      n[        R                  " USS9n[        R                  " USS9n[        [        R                     R                  nU" UUUUUUUUUS9	tnnn
UR                  SUS45      nU(       a  UU4$ U$ )Nr  r   r   r  )	rP  r   r   r  r  r   r!  rQ  rR  r   )r   r2  r'  r   ri  r  r  r   re  r   rH   rp   r   r  )r   r   r   r   r   r   r  r   r  r  rd  r  r  r  r  r   r!  r  r  r  r  r  r  r  r5   rb  rc  s                              r6   _flash_attention_3_varlen_hubr  
  sV    #(++J1a))A1a(
K	 	/:5<<	
 ONQ0\2N<  {I:aL	Q

]+,5JYJ/0 
 ==A&L9!,J99[a0L !5!I!IJTTD


!!!!
LCq --J+
,C#C:,,r:   c                    Ub  [        S5      e[        [        R                     R                  nU" U UUUUS9n	[        U	[        5      (       a  U(       a
  U	S   U	S   4$ U	S   $ U	$ )Nz.`attn_mask` is not supported for flash-attn 4.r  r   r   )r   r   rH   rl   r   r	  tuple)
r   r   r   r   r   r   r  r   r5   rb  s
             r6   _flash_attention_4_hubr    sx     IJJ !5!A!ABLLD



C #u#-AA93q69Jr:   c                R   U R                   u  p  n
UR                   u  p  n
Ub  [        X8U5      n[        XXU R                  S9u  u  pu  pu  nn/ / nn[	        U5       H8  nUU   nUR                  UUS U24   5        UR                  UUS U24   5        M:     U R                  SS5      n[        R                  " USS9n[        R                  " USS9n[        UUUUUUUUUUS9
n[        U[        5      (       a  Utnnn
OUnS nUR                  SUS45      nU(       a  UU4$ U$ )Nr  r   r   r  )
rP  r   r   r  r  r   r!  rQ  rR  r^  r   )r   r2  r'  r   ri  r  r  r   re  flash_attn_3_varlen_funcr	  r  r  )r   r   r   r   r   r   r  r   r  r  rd  r  r  r  r  r   r!  r  r  r  r  r  r  r  ra  rb  rc  s                              r6   _flash_varlen_attention_3r  3  s^    #(++J1a))A1a(
K	 	/:5<<	
 ONQ0\2N<  {I:aL	Q

]+,5JYJ/0 
 ==A&L9!,J99[a0L%


!!!!$F &%  S1
--J+
,C#C:,,r:   c	           
         Ub  [        S5      eU(       d,  [        R                  " 5       (       a  [        U UUUUUSS9tpnO[        U UUUUUUS9n	U(       a  U	tpnU(       a  U	W
4$ U	$ )N0`attn_mask` is not supported for aiter attentionT)rP  r   r   r   rQ  rR  r  )r   r   is_grad_enabledaiter_flash_attn_func)r   r   r   r   r   r   r   r  r   rb  rc  rd  s               r6   _aiter_flash_attentionr  m  s     KLL%//11,
1 $!
 LCq#C:,,r:   c	                  ^ S n	S n
U R                   u  ppUR                   u  p  nTb  [        T[        R                  5      (       a  Tn
OU(       a(  [        R                  " [
        XXU R                  5      n
O[        R                  " T5      (       a  TR                  S:X  a2  TR                  TR                  S5      STR                  S5      S5      mTR                  XX5      mTR                  [        R                  :X  a+  U4S jn[        R                  " UUS XU R                  5      n
OU4S jn	O[        S5      eS XU4 5       u  pn[        R                  " U UUU	U
UUUS9nUR!                  SSSS	5      nU$ )
Nr	   r   r   c                   > TXX#4   $ r2   r3   )r4  r5  r6  r7  r   s       r6   mask_mod(_native_flex_attention.<locals>.mask_mod  s     e!CDDr:   c                   > U TXX44   -   $ r2   r3   )scorer4  r5  r6  r7  r   s        r6   	score_mod)_native_flex_attention.<locals>.score_mod  s    ye)KLLLr:   zCAttention mask must be either None, a BlockMask, or a 2D/4D tensor.c              3  H   #    U  H  oR                  S SSS5      v   M     g7fro  rp  rq  s     r6   rt  )_native_flex_attention.<locals>.<genexpr>  rv  rw  )r   r   r   r   
block_maskr   r   r  r)  )r   r	  flex_attention	BlockMaskcreate_block_maskr8  r   r   	is_tensorr,  viewr/  r.  r   r   r   r`  )r   r   r   r   r   r   r   r  r   r   r  r  r  rj  rd  r  r  rb  s      `              r6   _native_flex_attentionr
    se     IJ*/++'J9))A1aJy.2J2JKK
	#55+ZI[`[g[g

 
	#	#>>Q!y~~a'8!Y^^A=NPQRI$$ZIR	??ejj(E (99*dI5<<J
M ^__LU8KLE

'
'	C ++aAq
!CJr:   c                   U R                   [        R                  :X  a1  [        R                  " U S[	        S5      5      n U R                  US9n OU R                  US9n U(       a"  U R                  u  p4U R                  USSU5      n U $ )a  
Convert a 2D attention mask to an additive mask, optionally reshaping to 4D for SDPA.

This helper is used by both native SDPA and xformers backends to handle both boolean and additive masks.

Args:
    attn_mask: 2D tensor [batch_size, seq_len_k]
               - Boolean: True means attend, False means mask out
               - Additive: 0.0 means attend, -inf means mask out
    target_dtype: The dtype to convert the mask to (usually query.dtype)
    reshape_4d: If True, reshape from [batch_size, seq_len_k] to [batch_size, 1, 1, seq_len_k] for broadcasting

Returns:
    Additive mask tensor where 0.0 means attend and -inf means mask out. Shape is [batch_size, seq_len_k] if
    reshape_4d=False, or [batch_size, 1, 1, seq_len_k] if reshape_4d=True.
r   -inf)r   r   )r   r   r   wherer  r  r   r	  )r   target_dtype
reshape_4dr  r1  s        r6   _prepare_additive_attn_maskr    sw    ( %**$KK	3f>	LL|L4	 LL|L4	  )
NN:q!Y?	r:   c
                   U(       a  [        S5      eUbp  UR                  S:X  a`  UR                  S   U R                  S   :X  a@  UR                  S   UR                  S   :X  a   UR                  S5      R                  S5      nU	cR  S XU4 5       u  pn[        R
                  R                  R                  U UUUUUUUS9n
U
R                  SSSS5      n
U
$ [        U UUUUUUUU[        [        U	S9n
U
$ )	NzDNative attention backend does not support setting `return_lse=True`.r	   r   r   c              3  H   #    U  H  oR                  S SSS5      v   M     g7fro  rp  rq  s     r6   rt  $_native_attention.<locals>.<genexpr>  "     P<OqYYq!Q22<Orw  rx  r)  r  )r   r,  r   r-  r   rz  r{  r|  r`  r  r  r  r   r   r   r   r   r   r   r   r  r   rb  s              r6   _native_attentionr    s   " _``
 	NNaOOA%++a.0OOA#))A,. ''*44Q7	PU<OPEhh!!>>! ? 	
 kk!Q1%" J 435-
 Jr:   c
                    S n
U	c  U(       d  S XU4 5       u  pn[         R                  R                  R                  [         R                  R                  R                  R
                  5         [         R                  R                  R                  U UUUUUUUS9nS S S 5        WR                  SSSS5      nO'[        U UUUUUUUU[        [        U	S9nU(       a  Uu  pU(       a  X4$ U$ ! , (       d  f       NV= f)Nc              3  d   #    U  H&  oR                  S SSS5      R                  5       v   M(     g7fro  )r`  r  rq  s     r6   rt  *_native_cudnn_attention.<locals>.<genexpr>Q  s+     ]I\AYYq!Q2==??I\s   .0rx  r   r	   r   r)  r  )r   rz  	attentionsdpa_kernel
SDPBackendCUDNN_ATTENTIONr{  r|  r`  r  r  r  r   r   r   r   r   r   r   r   r  r   rc  rb  s               r6   _native_cudnn_attentionr  >  s    " C
]%V[I\]EXX++EHH,>,>,I,I,Y,YZ((%%BB###% C 	C [ kk!Q1%324-
 HC#C:,,= [Zs   0/C//
C=c
                   U(       a  [        S5      eS XU4 5       u  pn[        R                  R                  R	                  [        R                  R                  R
                  R                  5         [        R                  R                  R                  U UUUUUUUS9n
S S S 5        W
R                  SSSS5      n
U
$ ! , (       d  f       N$= f)NzNNative efficient attention backend does not support setting `return_lse=True`.c              3  H   #    U  H  oR                  S SSS5      v   M     g7fro  rp  rq  s     r6   rt  ._native_efficient_attention.<locals>.<genexpr>  rv  rw  rx  r   r	   r   r)  )
r   r   rz  r  r  r  EFFICIENT_ATTENTIONr{  r|  r`  r  s              r6   _native_efficient_attentionr$  s  s      ijjLU8KLE				'	'(:(:(E(E(Y(Y	Zhh!!>>! ? 	
 
[ ++aAq
!CJ 
[	Z   6/C
Cc
                   Ub  [        S5      eS n
U	c  U(       d  S XU4 5       u  pn[        R                  R                  R	                  [        R                  R                  R
                  R                  5         [        R                  R                  R                  U UUS UUUUS9nS S S 5        WR                  SSSS5      nO'[        U UUS UUUUU[        [        U	S9nU(       a  Uu  pU(       a  X4$ U$ ! , (       d  f       NV= f)	Nr  c              3  H   #    U  H  oR                  S SSS5      v   M     g7fro  rp  rq  s     r6   rt  *_native_flash_attention.<locals>.<genexpr>  r  rw  rx  r   r	   r   r)  r  )r   r   rz  r  r  r  FLASH_ATTENTIONr{  r|  r`  r  r  r  r  s               r6   _native_flash_attentionr*    s   " KLL
C
PU<OPEXX++EHH,>,>,I,I,Y,YZ((%%BB##% C 	C [ kk!Q1%39;-
 HC#C:,,= [Zs   >/C==
Dc
                   U(       a  [        S5      eS XU4 5       u  pn[        R                  R                  R	                  [        R                  R                  R
                  R                  5         [        R                  R                  R                  U UUUUUUUS9n
S S S 5        W
R                  SSSS5      n
U
$ ! , (       d  f       N$= f)NzINative math attention backend does not support setting `return_lse=True`.c              3  H   #    U  H  oR                  S SSS5      v   M     g7fro  rp  rq  s     r6   rt  )_native_math_attention.<locals>.<genexpr>  rv  rw  rx  r   r	   r   r)  )
r   r   rz  r  r  r  MATHr{  r|  r`  r  s              r6   _native_math_attentionr/    s      deeLU8KLE				'	'(:(:(E(E(J(J	Khh!!>>! ? 	
 
L ++aAq
!CJ 
L	Kr%  c                *   U(       a  [        S5      eUca  [        XU5      n[        U UUU R                  S5      USS Uc&  S[        R
                  " U R                  S   5      -  OUSSSU-
  SSS	9S   nU$ [        U UUUUS US U[        [        US
9nU$ )Nr  r	   r  r  r   r  Fr   r  r  )
r   r  r,   r/  r  r  r   r  r  r  )	r   r   r   r   r   r   r  r   rb  s	            r6   _native_npu_attentionr1    s     \]]/IF	"JJqM 6;m#		%++b/22Io
 < J 402-
 Jr:   c                    Ub  [        S5      eU(       a  [        S5      eS XU4 5       u  pnU [        R                  " U R                  S   5      -  n [	        U UUUS9nUR                  SSSS	5      nU$ )
Nz.`attn_mask` is not supported for XLA attentionzAXLA attention backend does not support setting `return_lse=True`.c              3  H   #    U  H  oR                  S SSS5      v   M     g7fro  rp  rq  s     r6   rt  (_native_xla_attention.<locals>.<genexpr>6  rv  rw  r   )rP  r   r   rR  r   r	   r   r)  )r   r  r  r   xla_flash_attentionr`  )r   r   r   r   r   r  r   rb  s           r6   _native_xla_attentionr6  %  s     IJJ\]]LU8KLEDIIekk"o..E



	C ++aAq
!CJr:   c                    Ub  [        S5      eS nUc  [        U UUSUUUS9n	U(       a  U	tpn
O'[        U UUS SUUSU[        [        US9n	U(       a  U	u  pU(       a  X4$ U	$ N/`attn_mask` is not supported for sage attentionr  r  r   Fr  )r   r&   r  r  r  )r   r   r   r   r   r   r  r   rc  rb  rd  s              r6   _sage_attentionr:  B  s     JKK
C!
 LCq313-
 HC#C:,,r:   c                    Ub  [        S5      eS n[        [        R                     R                  n	Uc  U	" U UUSUUUS9n
U(       a  U
tpnO'[        U UUS SUUSU[        [        US9n
U(       a  U
u  pU(       a  X4$ U
$ r8  )r   r   rH   r{   r   r  r  r  )r   r   r   r   r   r   r  r   rc  r5   rb  rd  s               r6   _sage_attention_hubr<  u  s     JKK
C !5!>!>?IID!
 LCq353-
 HC#C:,,r:   c                    U(       a  [        S5      eU R                  u  p  n
UR                  u  p  n
Ub  [        X8U5      n[        XXU R                  S9u  u  pu  pu  nn/ / nn[        U5       H8  nUU   nUR                  UUS U24   5        UR                  UUS U24   5        M:     U R                  SS5      n[        R                  " USS9n[        R                  " USS9n[        UUUUUUUUUS9	nUR                  SUS45      nU$ )Nz?Sage varlen backend does not support setting `return_lse=True`.r  r   r   r  )	rP  r   r   r  r  r   r!  r   r  r   )r   r   r2  r'  r   ri  r  r  r   re  r+   r  )r   r   r   r   r   r   r  r   r  r  rd  r  r  r  r  r   r!  r  r  r  r  r  r  r  rb  s                            r6   _sage_varlen_attentionr>    s<    Z[["'++J1a))A1a(
K	 	/:5<<	
 ONQ0\2N<  {I:aL	Q

]+,5JYJ/0 
 ==A&L9!,J99[a0L



!!!!
C --J+
,CJr:   	   c           
     <    Ub  [        S5      e[        U UUSUUUS9$ Nr9  r  r  )r   r'   r   r   r   r   r   r   r  r   s           r6   #_sage_qk_int8_pv_fp8_cuda_attentionrC    s8     JKK'


 r:   c           
     <    Ub  [        S5      e[        U UUSUUUS9$ rA  )r   r(   rB  s           r6   (_sage_qk_int8_pv_fp8_cuda_sm90_attentionrE    s8     JKK,


 r:      c           
     <    Ub  [        S5      e[        U UUSUUUS9$ rA  )r   r)   rB  s           r6   $_sage_qk_int8_pv_fp16_cuda_attentionrH    s8     JKK(


 r:   c           
     <    Ub  [        S5      e[        U UUSUUUS9$ rA  )r   r*   rB  s           r6   &_sage_qk_int8_pv_fp16_triton_attentionrJ  1  s8     JKK*


 r:   c
                   U(       a  [        S5      eU R                  u  ppUR                  u  ppU(       a  [        R                  " 5       nOUb  UR                  S:X  a  UR                  S5      nUS-   S-  S-  n[        R                  " XUU4U R                  U R                  S9n[        X0R                  S9nUUS S 2S S 2S S 2S U24'   [        S5      US S 2S S 2S S 2US 24'   US S 2S S 2S S 2S U24   nOLUR                  S	:w  a  [        S
5      eUR                  S	:X  a!  UR                  XX5      R                  U 5      nU(       ay  X-  S:w  a  [        S5      eX-  nU R                  SUS45      n UR                  SUS45      R                  SSSUS5      nUR                  SUS45      R                  SSSUS5      n[        R                  " XX#XF5      nU(       a  UR!                  SS5      nU$ )NzFxformers attention backend does not support setting `return_lse=True`.r	   r      rF  r  )r  r  r*  zDOnly 2D and 4D attention masks are supported for xformers attention.r   zKNumber of heads in query must be divisible by number of heads in key/value.r   r)  )r   r   xopsLowerTriangularMaskr,  r/  r   r  r   r   r  r  r.  type_asr  memory_efficient_attentionr  )r   r   r   r   r   r   r   r   r  r   r  r  num_heads_qrd  r  num_heads_kvoriginal_seq_lenaligned_seq_lenaligned_maskmask_additivenum_heads_per_grouprb  s                         r6   _xformers_attentionrX  L  s     abb,1KK)J;%(YY"A<,,.			>>Q
  )~~a0 01 4:a?O !;;)_Ekk||L 8M 8ELAq"3#3"3347<V}LAq"2"334 %Q1kzk%9:I^^q cdd^^q !(()X``afgI%*jkk)9L"#56mmAb1299"b"FY[]^L"#56==b"bJ]_ab

)
)%e	
YCkk!QJr:   r2   )r   zstr | AttentionBackendName)Nr   FNFN)r   r   r   r   r   r   r   r  r   r  r   r   r   r  r   r   r   zdict[str, Any] | Noner   zAttentionBackendName | Noner   r  r   r   )r   r  r   r   r   r   r   )r   intr   rY  r   r   )
r   r   r   r   r   r   r   r  r   r   )r   rH   r   r   )r  rY  r  rY  r  rY  r   torch.device | None)r  rY  r  rY  r   r   r   rZ  )NN)r  rY  r  rY  r  rY  r   r  r   rZ  r   r   )r   r   r  rY  r1  rY  r   r   )r?  r   )NFNNNNr   r   r   NFr   ) rP  r   r   r   r   r   rQ  r  rR  r   rS  r  rT  r  rU  r  rV  r  rX  rY  rY  r  rZ  rY  r[  bool | Noner\  r   r]  rY  r   z!tuple[torch.Tensor, torch.Tensor])Nr   FNFFTN)r~  r  r   r   r   r   r   r   r   r  r   r  r   r   r   r  r   r   r  r   r  r   r   r  r  )r~  r  r   r   r   r   r   r   r   r  r   r  r   r   r   r  r   r   r  r   r  r   r   r  rW  tuple[int, int])$r~  r  r   r   r   r   r   r   r   r  r   r  r   r   r   r  r   r   r  r   r  r   r   r  rW  r\  rY  r  rZ  rY  r[  r[  r\  r   r]  rY  )r   r   r   r   r   r  )rs  r   r   r   )r	   r   N)rs  r   r  rY  r  rY  r   r   )rs  r   r5  rY  r  dist.ProcessGroupr   ztuple[torch.Tensor, int])rs  r   r7  rY  r  r]  r   r   )r   r   r   dict)rs  r   r  r]  r   zCallable[..., torch.Tensor])Nr	   r   )r   r   r   r   r   r   r   r   r   r  r   r   r   r  r   r   r  r   r   r  r  rY  r  rY  )Nr   FNFF)r   r   r   r   r   r   r   r  r   r  r   r   r   r  r   r   r  r   r   r  )Nr   FNrO  FN)r   r   r   r   r   r   r   r  r   r  r   r   r   r  rW  r\  r  r   r   r  r   r   )Nr   NFrO  FN)r   r   r   r   r   r   r   r  r   r  r   r  r   r   rW  r\  r  r   r   r  r   r   )NNFFN)r   r   r   r   r   r   r   r  r   r  r   r   r  r   r   r  r   r   )NNFrO  r   FFN)r   r   r   r   r   r   r   r  r   r  r   r   rW  r\  rY  r  r\  r   r^  r   r   r  r   r   )r   r   r   r   r   r   r   r  r   r  r   r   r   r  r  r   r   r  r   r   )NFNFFN)r   r   r   r   r   r   r   z0torch.Tensor | 'flex_attention.BlockMask' | Noner   r   r   r  r   r   r  r   r   r  r   r   )T)r   r   r  ztorch.dtyper  r   r   r   )Nr   FNFFN)r   r   r   r   r   r   r   r  r   r  r   r   r   r  r   r   r  r   r   r  r   r   )Nr   NFN)r   r   r   r   r   r   r   r  r   r  r   r  r  r   r   r  r   r   )NFFN)r   r   r   r   r   r   r   r  r   r   r  r   r   r  r   r   )NFNFN)r   r   r   r   r   r   r   r  r   r   r   r  r  r   r   r  r   r   )
__future__r   
contextlibr  r   r  dataclassesr   enumr   typingr   r   r   r   torch.distributedr  r2  torch.nn.functionalrz  r{  r3  is_available)torch.distributed._functional_collectives_functional_collectivesr
  utilsr
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   utils.constantsr   r   utils.torch_utilsr   r   _modeling_parallelr   r    r  r  r  _REQUIRED_FLEX_VERSIONr
  r  rd   r   r   r  r  r  r   r  r	  r  
flash_attnr"   r#   flash_attn.flash_attn_interfacer$   r%   ImportErrorOSErrorr  rM  r   flash_attn_interfacer_  r  rS   r  sageattentionr&   r'   r(   r)   r*   r+   !torch.nn.attention.flex_attentionr  r  	torch_npur,   $torch_xla.experimental.custom_kernelr-   r5  xformers.opsr  rM  __version__library	custom_op
_custom_opregister_fake_register_faker>   rF   r   rH   r   r   ro   rp   ri   rk   r{   rl   r   r   contextmanagerrs   r   r   r   r   r   r   r   r   r   r   r"  r%  r'  r2  r8  rB  r   re  rd  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  Functionr"  r9  r=  r?  rA  rG  rR  rW  rY  r  r  r  r  r  r   rh   r  r  r  rj   r  rm   r  r  r  r  rn   r  rq   r  rr   r
  r  r  rt   r  ru   r$  rv   r*  rw   r/  rx   r1  ry   r6  rz   r:  r<  r|   r>  r}   rC  r~   rE  r   rH  r   rJ  r   rX  r3   r:   r6   <module>r     s#   #     !  / /      	!!##>>    $ L M 3 2! !      % 	H	-/h4I$Pg4h 13 (*^/?F]/^ /1l6NtUk6l %d,BC *, *,b1EdLa1b .0j5HOi5j  +Fmm O!#' "& (M[ #%B !	
 	
 	
$ H$(!&*##' (,%O  	CB N $2   #_  # D 	((J]]00N*UY **4Q * !J(N(3 (V.) .)b 0 0 0  %%'7/'GI( ,,.>/./
 ""$4/'OQ% ))+;/.,
 !!#32 $
 $$&6-''=#I E #L <P<W<W B B0 &*.21  ,0/31 1 	1  1  #	1 
 1  1  1  1  ,1  )1  -1  1 pU
LLFV &*	QQ	Q Q #	Q 
Q0L^ %
 #'	^^^ ^  	^ &^* #'	^^^ ^  	^* &*"&	f	f	f 	f #		f
  	f 
	f1h
D :Z`a
 #'"%)%)%) ((( (  	(
 ( 	( #( #( #( ( ( ( ( ( (  '!( b(V >?
 #'"%)%)%) 777 7  	7
 7 	7 #7 #7 #7 7 7 7 7 7 7  '!7 @7B &*04(	,(( 
( 	(
 #( ( ( ( ( ( ( .(V",	,",",Z &*045-	,5-5- 
5- 	5-
 #5- 5- 5- 5- 5- 5- 5- .5-t!,	,!,!,X &*040-	,0-0- 
0- 	0-
 #0- 0- 0- 0- 0- 0- 0- .0-l,	,,,P &*04;- $,;-	,;-;- 
;- 	;-
 #;- ;- ;- ;- ;- ;- ;- .;- !;-|#,	,#,#,V &*04C- $,C-	,C-C- 
C- 	C-
 #C- C- C- C- C- C- C- .C- !C-L),	,),),b &*04U- $, 'U-	,U-U- 
U- 	U-
 #U- U- U- U- U- U- U- .U- !U- U-  !U-" #U-$ %U-& 'U-p0,	,0,0,p &*04#-	,#-#- 
#- 	#-
 ##- #- #- #- #- #- #- .#-V &*04%-	,%-%- 
%- 	%-
 #%- %- %- %- %- %- %- .%-PV	,VV0 &*04#	,## 
# 	#
 ## # # # # # # .#N\	,\\ 
FYR.U^^,, .2"
. -  B $ $NfU^^44 fD\f 7 7 \f~ntU^^%<%< ntbIw(?(? Iwp 15<<	< < 	<
 < < < < < .< < <F &*g] 15g]g]	g] g] #	g]
 g] g] g] g] g] .g]Z ## =|L" $  &*#+040-0-	0- 0- #	0-
 0- 0- 0- !0- 0- .0- 0-
0-f ##"" =|L" $  &*#+041-1-	1- 1- #	1-
 1- 1- 1- !1- 1- .1- 1-
1-h ##)) =|L# $  &*#+0433	3 3 #	3
 3 3 3 !3 3 .3 3
3l ##%% =|L $  &*#+0422	2 2 #	2
 2 2 2 !2 2 .2 2	2j ##!! =|L $  &*04--	- - #	-
 - - - .- -	-. ##%% =|L" $  &*#+#04JJ	J J #	J
 J J !J J J J .J J
JZ ##,, =|L# $  &*04.-.-	.- .- #	.-
 .- .- .- ..- .-
.-b ##$$ =|L# $  &*04	  #	
    . 
4 ##(( =|L $  &*043-3-	3- 3- #	3-
 3- 3- 3- .3- 3-	3-l ###%BLQ $  &*04&-&-	&- &- #	&-
 &- &- &- &- .&- &-	&-R ##+]LI $  CG0488	8 8 @	8
 8 8 8 8 .8 8	8x LP""+6"DH""J ##-" $  &*0488	8 8 #	8
 8 8 8 8 8 .8 8
8v ##&& =|L" $  &*04----	-- -- #	--
 -- -- -- -- -- .-- --
--` ##**- $  &*04	  #	
      . 	< ##&& =|L" $  &*040-0-	0- 0- #	0-
 0- 0- 0- 0- 0- .0- 0-
0-f ##%%- $  &*04	  #	
      . 	< ##$$ =|L" $  &*04--	- - #	-
 - - - .- -
-b ##$$- $  &*04	  #	
   . 	2 ###%BLQ" $  &*04+-+-	+- +- #	+-
 +- +- +- .+- +-
+-\ ##!!#%BLQ" $  &*04,-,-	,- ,- #	,-
 ,- ,- ,- .,- ,-
,-^ ##$$#%BLQ $  &*0400	0 0 #	0
 0 0 0 .0 0	0f ##220A6E $  &*04	  #	
    . 	. ##770A6E $  &*04	  #	
    . 	. ##330A6E $  &*04	  #	
    . 	. ##550A6E $  &*04	  #	
    . 	. ##!!+]LI $  &*04@@	@ @ #	@
 @ @ @ @ @ .@ @	@}o ,/ + 	GsJuvw#!%'+$&*#+( ,/ (8;^_` % #' 	( ,/ %1!4WXY# $%" ,/ 9!<_`a"'+$,0)(,%*.'0 ,/ :1#=`ab" ,/ $5aS8[\]!#$ ,/ #5aS8[\]!"# ,/ 4QC7Z[\!&s   x 7x= y/ !z { |	 .|9 ?}) 	x:x55x:=	y,y''y,/	z8zz	{(#{{	|"||		|6|11|69	}&}!!}&)	~2~~