
    "
3jb0                     Z    S SK Jr  S SKJr   " S S5      r " S S\5      r " S S5      rg	)
    )ArgumentParser)Enumc                   $    \ rS rSrSrSrSrSrSrg)AttentionMaskFormat	   r             N)	__name__
__module____qualname____firstlineno__MaskIndexEndMaskIndexEndAndStartAttentionMaskNoMask__static_attributes__r       a/home/wildlama/miniconda3/lib/python3.13/site-packages/onnxruntime/transformers/fusion_options.pyr   r   	   s    L  M Fr   r   c                   6    \ rS rSrSrSrSrSrS rS r	S r
S	rg
)AttentionOpType   	AttentionMultiHeadAttentionGroupQueryAttentionPagedAttentionc                     U R                   $ Nvalueselfs    r   __str__AttentionOpType.__str__   s    zzr   c                 ,    [        U R                  5      $ r   )hashr!   r"   s    r   __hash__AttentionOpType.__hash__!   s    DJJr   c                 4    UR                   U R                   :H  $ r   r    )r#   others     r   __eq__AttentionOpType.__eq__$   s    {{djj((r   r   N)r   r   r   r   r   r   r   r   r$   r(   r,   r   r   r   r   r   r      s'    I-/%N )r   r   c                   d    \ rS rSrSrS rSS jrS rS\4S jr	\
S 5       r\
S	\4S
 j5       rSrg)FusionOptions(   z'Options of fusion in graph optimizationc                    SU l         SU l        SU l        SU l        SU l        SU l        SU l        SU l        SU l        SU l	        SU l
        SU l        SU l        SU l        SU l        US;   a  SU l        [        R                   U l        US:X  a  [        R$                  U l        OUS;   a  [        R&                  U l        S U l        US;   a2  SU l        SU l        SU l        SU l        SU l        SU l        SU l        g g )NTF)clipqwen3bert)vitr3   unetvaer2   )enable_geluenable_layer_normenable_attentionenable_rotary_embeddingsuse_multi_head_attention!disable_multi_head_attention_biasenable_skip_layer_normenable_embed_layer_normenable_bias_skip_layer_normenable_bias_geluenable_gelu_approximationenable_qordered_matmulenable_shape_inferenceenable_gemm_fast_gelugroup_norm_channels_lastr   r   attention_mask_formatr   r   attention_op_typeenable_nhwc_convenable_group_normenable_skip_group_normenable_bias_splitgeluenable_packed_qkvenable_packed_kvenable_bias_add)r#   
model_types     r   __init__FusionOptions.__init__+   s   !% $(,% ).%16.&*#'+$+/( $).&&*#&*#%*"(,%**+0D( &9%F%F")<)I)ID&++)<)C)CD&!% 00$(D!%)D"*.D')-D&%)D"$(D!#'D  1r   c                 h    U(       a  [         R                  U l        g [         R                  U l        g r   )r   r   rH   r   )r#   use_raw_masks     r   use_raw_attention_mask$FusionOptions.use_raw_attention_maskZ   s!    )<)J)JD&)<)I)ID&r   c                 .    [         R                  U l        g r   )r   r   rH   r"   s    r   disable_attention_mask$FusionOptions.disable_attention_mask`   s    %8%?%?"r   attn_op_typec                     Xl         g r   )rI   )r#   r[   s     r   set_attention_op_type#FusionOptions.set_attention_op_typec   s    !-r   c                    [        U R                  5      nU R                  (       a  SUl        U R                  (       a  SUl        U R                  (       a  SUl        U R                  (       a  SUl	        U R                  (       a  SUl
        U R                  (       a  SUl        U R                  (       a  SUl        U R                  (       a  SUl        U R"                  (       a  SUl        U R&                  (       a  SUl        U R(                  (       a  SUl        U R,                  (       a  SUl        U R.                  (       a  UR1                  S5        U R0                  (       a  UR1                  S5        U R2                  (       a  UR5                  5         U R                  S;   a  U R6                  (       a  SUl        U R:                  (       a  SUl        U R>                  (       a  SUl         U RB                  (       a  SUl"        U RF                  (       a  SUl$        U RJ                  (       a  SUl&        U RN                  (       a  SUl(        U RR                  (       a  SUl*        U$ )NFTr6   )+r/   rQ   disable_gelur9   disable_layer_normr:   disable_rotary_embeddingsr<   disable_attentionr;   r=   disable_skip_layer_normr?   disable_embed_layer_normr@   disable_bias_skip_layer_normrA   disable_bias_gelurB   rC   disable_shape_inferencerE   rF   use_mask_indexrV   no_attention_maskrY   use_group_norm_channels_firstrG   disable_nhwc_convrJ   disable_group_normrK   disable_skip_group_normrL   disable_bias_splitgelurM   disable_packed_qkvrN   disable_packed_kvrO   disable_bias_addrP   )argsoptionss     r   parseFusionOptions.parsef   s   0"'G""(-G%))/4G,!!',G$((/3G,''-2G*((.3G+,,27G/!!',G$))04G-''-2G*%%,0G)**51&&**40!!**,??5511380%%+0(&&,1)++16.**05-&&,1)%%+0($$*/'r   parserc                    U R                  SSSSS9  U R                  SS9  U R                  SSSSS9  U R                  SS	9  U R                  S
SSSS9  U R                  SS9  U R                  SSSSS9  U R                  SS9  U R                  SSSSS9  U R                  SS9  U R                  SSSSS9  U R                  SS9  U R                  SSSSS9  U R                  SS9  U R                  SSSSS9  U R                  SS9  U R                  SSSSS9  U R                  SS9  U R                  SSSS S9  U R                  SS!9  U R                  S"SSS#S9  U R                  SS$9  U R                  S%SSS&S9  U R                  SS'9  U R                  S(SSS)S9  U R                  SS*9  U R                  S+SSS,S9  U R                  SS-9  U R                  S.SSS/S9  U R                  SS09  U R                  S1SSS2S9  U R                  SS39  U R                  S4SSS5S9  U R                  SS69  U R                  S7SSS8S9  U R                  SS99  U R                  S:SSS;S9  U R                  SS<9  U R                  S=SSS>S9  U R                  SS?9  U R                  S@SSSAS9  U R                  SSB9  U R                  SCSSSDS9  U R                  SSE9  U R                  SFSSSGS9  g )HNz--disable_attentionF
store_truezdisable Attention fusion)requiredactionhelp)rc   z--disable_skip_layer_normz%disable SkipLayerNormalization fusion)rd   z--disable_embed_layer_normz&disable EmbedLayerNormalization fusion)re   z--disable_bias_skip_layer_normz2disable Add Bias and SkipLayerNormalization fusion)rf   z--disable_bias_geluz)disable Add Bias and Gelu/FastGelu fusion)rg   z--disable_layer_normz!disable LayerNormalization fusion)ra   z--disable_geluzdisable Gelu fusion)r`   z--enable_gelu_approximationz+enable Gelu/BiasGelu to FastGelu conversion)rC   z--disable_shape_inferencez disable symbolic shape inference)rh   z--enable_gemm_fast_geluzenable GemmfastGelu fusion)rF   z--use_mask_indexzWuse mask index to activate fused attention to speed up. It requires right-side padding!)ri   z--use_raw_attention_maskzuse raw attention mask. Use this option if your input is not right-side padding. This might deactivate fused attention and get worse performance.)rV   z--no_attention_maskz1no attention mask. Only works for model_type=bert)rj   z--use_multi_head_attentionzUse MultiHeadAttention instead of Attention operator for testing purpose. Note that MultiHeadAttention might be slower than Attention when qkv are not packed. )r=   z--disable_group_normz9not fuse GroupNorm. Only works for model_type=unet or vae)rm   z--disable_skip_group_normzPnot fuse Add + GroupNorm to SkipGroupNorm. Only works for model_type=unet or vae)rn   z--disable_packed_kvz[not use packed kv for cross attention in MultiHeadAttention. Only works for model_type=unet)rq   z--disable_packed_qkvz[not use packed qkv for self attention in MultiHeadAttention. Only works for model_type=unet)rp   z--disable_bias_addz0not fuse BiasAdd. Only works for model_type=unet)rr   z--disable_bias_splitgeluz6not fuse BiasSplitGelu. Only works for model_type=unet)ro   z--disable_nhwc_convz:Do not use NhwcConv. Only works for model_type=unet or vae)rl   z--use_group_norm_channels_firstznUse channels_first (NCHW) instead of channels_last (NHWC) for GroupNorm. Only works for model_type=unet or vae)rk   z--disable_rotary_embeddingsz5Do not fuse rotary embeddings into RotaryEmbedding op)add_argumentset_defaults)rw   s    r   add_argumentsFusionOptions.add_arguments   sv   !+	 	 	
 	e4'8	 	 	
 	E:(9	 	 	
 	U;,E	 	 	
 	?!<	 	 	
 	e4"4	 	 	
 	u5&	 	 	
 	/)>	 	 	
 	e<'3	 	 	
 	E:%-	 	 	
 	%8j	 	 	
 	51& e	 	 	
 	59!D	 	 	
 	e4(d	 	 	
 	U;"L	 	 	
 	u5'c	 	 	
 	E:!n	 	 	
 	e4"n	 	 	
 	u5 C	 	 	
 	U3&I	 	 	
 	59!M	 	 	
 	e4- B	 	 	
 	%@)H	 	 	
r   )rH   rI   r>   r;   rP   rB   rA   rM   r@   r9   rC   rF   rK   r:   rJ   rO   rN   rD   r<   rE   rL   r?   rG   r=   N)T)r   r   r   r   __doc__rR   rV   rY   r   r]   staticmethodru   r   r   r   r   r   r   r/   r/   (   sV    1-(^J@./ . 3 3j w
n w
 w
r   r/   N)argparser   enumr   r   r   r/   r   r   r   <module>r      s/   
 $  )d )"l
 l
r   