
    3j7                        S SK Jr  SSKJr  SSKJr  SSKJrJrJ	r	  SSK
JrJrJrJrJr  \" 5       (       a
  S SKrS SKJr  \" 5       (       a  S S	KJr  \" 5       r\" 5       (       a  \(       d  S SKr\R.                  " \5      r " S
 S\5      r " S S\R"                  R6                  5      r " S S\R:                  5      r\" SS9S 5       r SS\ \!   S-  4S jjr"g)    )	lru_cache   )ACT2FN)ConversionOps)get_module_from_name	on_deviceshould_convert_module)is_accelerate_availableis_fbgemm_gpu_availableis_torch_availableis_torch_xpu_availableloggingN)nn)init_empty_weightsc            	           \ rS rSrS r S	S\\\R                  \	\R                     -  4   S\R                  R                  S-  S\\\R                  4   4S jjrSrg)
FbgemmFp8Quantize,   c                     Xl         g Nhf_quantizer)selfr   s     ^/home/wildlama/miniconda3/lib/python3.13/site-packages/transformers/integrations/fbgemm_fp8.py__init__FbgemmFp8Quantize.__init__-   s    (    N
input_dictmodelreturnc                 Z   [        UR                  5       5      S   u  pEUS   nSSKJn  [	        X$5      u  px[        Xv5      (       Ga.  US:X  a  UR                  SS5      n	U	R                  n
U	R                  SU
S   5      n[        U5         [        U5      u  pS S S 5        WR                  U
5      nUR                  SS5      nWR                  U
S   SU
S   5      nOUS:X  a  UR                  SS5      n	U	R                  n
U	R                  SU
S   5      n[        U5         [        U5      u  pS S S 5        WR                  U
5      nUR                  SS5      nWR                  U
S   U
S   S5      nO][        U5         [        U5      u  pS S S 5        [        R                  R                  WR                  UR                  S   S5      5      nU[        R                  R                  W5      U S3W0$ ! , (       d  f       GNf= f! , (       d  f       N= f! , (       d  f       N= f)	Nr   r   )FbgemmFp8Llama4TextExpertsgate_up_proj   	down_proj_scale)tupleitemsintegrationsr!   r   
isinstance	transposeshapereshaper   quantize_fp8_per_rowtorchr   	Parameterview)r   r   r   kwargs
target_keyvaluer!   moduletensor_nametransposed_paramoriginal_shapeflattened_paramnew_value_flatweight_scale_flat	new_valueweight_scales                   r   convertFbgemmFp8Quantize.convert0   s
    "*"2"2"45a8
a=25Ef99n, $)??1a#8  "2!7!7"2":":2~b?Q"R /8L_8]5N 0 +22>B	%//15	0889JA~^_O`a+ $)??1a#8  "2!7!7"2":":2~b?Q"R /8L_8]5N 0 +22>B	%//15	0889JN[\L]_`a5!*>u*E'	 " 88--l.?.?@R@RST@UWX.YZLEHH..y9j\;PR^__; 0/" 0/ "!s$   G9#HH9
H
H
H*r   r   )__name__
__module____qualname____firstlineno__r   dictstrr/   Tensorlistr   Moduler>   __static_attributes__ r   r   r   r   ,   sn    ) )-5`ellT%,,-???@5` xx%5`
 
c5<<	 5` 5`r   r   c                   H   ^  \ rS rSr\R
                  4U 4S jjrS rSrU =r	$ )FbgemmFp8Linearh   c                 ^  > [         TU ]  XU5        Xl        X l        [        R
                  R                  [        R                  " X!4US95      U l        [        R
                  R                  [        R                  " US4[        R                  S95      U l
        U R                  S[        R                  " S/[        R                  S9SS9  U(       aP  [        R
                  R                  [        R                  " U R                  [        R                  S95      U l        g S U l        g )Ndtyper#   input_scale_ubF
persistent)superr   in_featuresout_featuresr/   r   r0   zerosweightfloat32r=   register_bufferfloatbias)r   rU   rV   r\   rP   	__class__s        r   r   FbgemmFp8Linear.__init__i   s    D9&(hh((l5PX])^_!HH..u{{L!;LTYTaTa/bc-u{{A3ekk/R_de**5;;8I8IRWR_R_+`aDIDIr   c           	         / UR                   S S QSP7n[        U5         [        UR                  SUR                   S   5      R	                  5       U R
                  S9u  p4S S S 5        U R                  R                  [        R                  5      n[        (       ac  [        R                  " WU R                  R                  5       WR                  S5      UR                  5       UR                  U R                   S9nOR[        R"                  R$                  R'                  WU R                  WUSS9nU R                   b  X`R                   -   OUnUR)                  U5      nAAU$ ! , (       d  f       GN= f)Nr$   )scale_ub)scale_ascale_b	out_dtyper\   Tuse_fast_accum)r,   r   r.   r1   
contiguousrQ   r=   tor/   rY   _is_torch_xpu_available
_scaled_mmrX   t	unsqueezerP   r\   opsfbgemmf8f8bf16_rowwiser-   )r   xoutput_shapex_quantizedx_scaleweight_scale_float32outputs          r   forwardFbgemmFp8Linear.forwardw   s3   *"*r* q\#7r1772;'224t?R?R$ K 
  $0033EMMB""%%))"-,..0''YYF YY%%66T[['3GX\ 7 F ,099+@Vii'fF-- \s   AE((
E7)r\   rU   rV   rX   r=   )
r@   rA   rB   rC   r/   float8_e4m3fnr   ru   rI   __classcell__r]   s   @r   rL   rL   h   s    >C>Q>Q  r   rL   c                   H   ^  \ rS rSr\R
                  4U 4S jjrS rSrU =r	$ )r!      c                 T  > [         TU ]  5         UR                  U l        UR                  U l        UR
                  U l        U R                  U l        [        UR                     U l	        [        R                  R                  [        R                  " U R                  U R
                  SU R                  -  4[        R                  S95      U l        [        R                  R                  [        R                  " U R                  SU R                  S-  4[        R                   S95      U l        [        R                  R                  [        R                  " U R                  U R                  U R
                  4[        R                  S95      U l        [        R                  R                  [        R                  " U R                  U R
                  S4[        R                   S95      U l        U R)                  S[        R                  " S/[        R*                  S9SS9  g )Nr   rO   r#   rQ   FrR   )rT   r   num_local_expertsnum_expertsintermediate_sizehidden_size
expert_dimr   
hidden_actact_fnr/   r   r0   rW   rw   r"   rY   gate_up_proj_scaler%   down_proj_scalerZ   r[   )r   configrP   r]   s      r   r   #FbgemmFp8Llama4TextExperts.__init__   s   !33!'!9!9!--00V../!HH..KK))4+;+;Q=PQY^YlYlm
 #((("4"4KK))1doo.AB%--X#
 ++KK))4??D<L<LMUZUhUhi
  %xx11KK))4+;+;Q?u}}U 
 	-u{{A3ekk/R_der   c           
      "
   UR                  U R                  SU R                  5      nSn[        R                  " U5      n[        U R                  5       GH\  nX   nUR                  SU R                  5      n[        U5         [        XbU R                  5      u  pxSSS5        U R                  R                  S   S-  n	U R                  R                  [        R                  5      n
[        (       GaC  [        R                   " WU R                  U   R#                  SS5      SU	 R%                  5       R'                  5       WR)                  S5      X   S   SU	 R                  SS5      R%                  5       R'                  5       UR*                  S9n[        R                   " UU R                  U   R#                  SS5      U	S R%                  5       R'                  5       UR)                  S5      X   S   U	S R                  SS5      R%                  5       R'                  5       UR*                  S9nO[        R,                  R.                  R1                  WU R                  U   R#                  SS5      SU	 R%                  5       WX   S   SU	 R                  SS5      R%                  5       SS9n[        R,                  R.                  R1                  UU R                  U   R#                  SS5      U	S R%                  5       UX   S   U	S R                  SS5      R%                  5       SS9nXR3                  U5      -  n[        U5         [        XU R                  5      u  pSSS5        U R4                  R                  [        R                  5      n[        (       a  [        R                   " WU R6                  U   R#                  SS5      R%                  5       WR)                  S5      UU   R                  SS5      R%                  5       R'                  5       UR*                  S9nOw[        R,                  R.                  R1                  WU R6                  U   R#                  SS5      R%                  5       WUU   R                  SS5      R%                  5       SS9nUX4'   GM_     UR                  UR8                  5      nUR                  SU R                  5      $ ! , (       d  f       GNW= f! , (       d  f       GN= f)	z
Args:
    hidden_states (torch.Tensor): (batch_size * token_num, hidden_size)
Returns:
    torch.Tensor: (batch_size * token_num, hidden_size)
r$   Nr   r   r#   )ra   rb   rc   Trd   )r1   r~   r   r/   
empty_likeranger-   r   r.   rQ   r"   r,   r   rg   rY   rh   ri   r+   rf   rj   rk   rP   rl   rm   rn   r   r   r%   device)r   hidden_states
num_tokensnext_statesiexpert_hiddenexpert_hidden_reshapedexpert_quantizedexpert_scalesharded_expert_dimgate_up_proj_scale_float32gateup	activatedactivated_quantizedactivated_scaledown_proj_scale_float32expert_outputs                     r   ru   "FbgemmFp8Llama4TextExperts.forward   s    &**4+;+;RAQAQR
 &&}5t''(A),M%2%:%:2t?O?O%P"121E*8K8K2.  3 "&!2!2!8!8!<!A)-)@)@)C)CEMM)R&&&''$%%a(221a89L:LMXXZ\\^(222669!<=P>PQVVWY[\]hhjlln+11 %%$%%a(221a89K9LMXXZ\\^(222669!<=O=PQVVWY[\]hhjlln+11 yy''88$%%a(221a89L:LMXXZ .1!45H6HINNrSTU``b#' 9  YY%%66$%%a(221a89K9LMXXZ .1!45G5HINNrSTU``b#' 7  [[..I9%7KIcgcvcv7w4# & '+&:&:&=&=emm&L#&& % 0 0'NN1%//15@@B+55b93A6;;BBMMOQQS+11! !&		 0 0 A A'NN1%//15@@B#+A.33B:EEG#' !B ! +KNE )F "nn]%9%9:D$4$455 32P &%s   S-S?-
S<	?
T	)	r   r%   r   r   r"   r   r   r   r~   )
r@   rA   rB   rC   r/   rY   r   ru   rI   rx   ry   s   @r   r!   r!      s    %*]] f0R6 R6r   r!   r#   )maxsizec                      [         (       a  SSKJn   U " S5      R                  $ [        R
                  R                  R                  $ )Nr#   
get_kernelzkernels-community/fp8-fbgemm)rh   hub_kernelsr   r.   r/   rl   rm   r   s    r   get_quantize_fp8_per_rowr     s3    +89NNN99000r   modules_to_not_convertc                    [        5       qSnU(       a  0 OSS0nU R                  5        H  u  px[        Xq5      (       d  M  Sn	[	        SS9   UR
                  R                  S:X  a@  [        U R                  SU R                  5      n
[        U
=(       d    U R                  5      n	O^[        U[        R                  5      (       a?  [        UR                  UR                  UR                   SL40 UD6n	U	R#                  S5        SSS5        U	c  M  U R%                  Xy5        SnM     U(       d  [&        R)                  S5        U $ ! , (       d  f       NH= f)	a  
A helper function to replace all `torch.nn.Linear` modules by `FbgemmFp8Linear` modules.
This will enable running your models using high performance fp8 kernel from FBGEMM library.

Parameters:
    model (`torch.nn.Module`):
        Input model or `torch.nn.Module` as the function is run recursively.
    modules_to_not_convert (`list[`str`]`, *optional*, defaults to `None`):
        Names of the modules to not convert. In practice we keep the `lm_head` in full precision for numerical stability reasons.
    quantization_config (`FbgemmFp8Config`):
        The quantization config object that contains the quantization parameters.
    pre_quantized (`book`, defaults to `False`):
        Whether the model is pre-quantized or not
FrP   NT)include_buffersLlama4TextExpertstext_configzYou are loading your model using FP8 quantization but no linear modules were found in your model. Please double check your model architecture, or submit an issue on github if you think this is a bug.)r   r.   named_modulesr	   r   r]   r@   getattrr   r!   r*   r   LinearrL   rU   rV   r\   requires_grad_set_submoduleloggerwarning)r   r   quantization_configpre_quantizedtp_planhas_been_replacedmodule_kwargsmodule_namer5   
new_moduler   s              r   replace_with_fbgemm_fp8_linearr     s.   $ 45'Bgt_M$224$[II
5((,??
 &ellM5<<P78Su||T
FBII..,&&''KKt+ $	
 ))%0 6" K4 5  58 	
 L= 65s   B9E
E	)NNFN)#	functoolsr   activationsr   core_model_loadingr   quantizers.quantizers_utilsr   r   r	   utilsr
   r   r   r   r   r/   r   
accelerater   rh   fbgemm_gpu.experimental.gen_ai
fbgemm_gpu
get_loggerr@   r   r   r   rL   rH   r!   r   rG   rE   r   rJ   r   r   <module>r      s        . ` `  -02 %<)			H	%9` 9`x+ehhoo +\k6 k6\ 11 1 tx:#'9t#3:r   