
    3j:*                         S SK r S SKrS SKrS SKJr  S SKJrJrJr  \" 5       (       a  SSK	J
r
  SSKJr  \" 5       (       a  S SKJr  S SKJr  \R"                  " \5      rS	 rS
 r " S S\
5      r " S S\
5      rg)    N)logging)is_torch_accelerator_availableis_torch_availableis_torchao_available   )ConversionOps)get_module_from_name)unflatten_tensor_state_dict)is_metadata_torchaoc                 (   SSK Jn  SSKJn  [	        X5      (       a*  U R
                  R                   SU R                  5        S3$ [	        X5      (       a<  U R
                  R                   SU R                   S[        U R                  5       S3$ g )Nr   )AffineQuantizedTensor)LinearActivationQuantizedTensor()z(activation=	, weight=)
torchao.dtypesr   7torchao.quantization.linear_activation_quantized_tensorr   
isinstance	__class____name___quantization_typeinput_quant_funcoriginal_weight_tensor)weightr   r   s      [/home/wildlama/miniconda3/lib/python3.13/site-packages/transformers/integrations/torchao.pyr   r   &   s    4g&00""++,Af.G.G.I-J!LL&::""++,L9P9P8QQZ[mnt  oL  oL  \M  [N  NO  P  	P ;    c                    [        U R                  5      nUc7  SU R                  R                  S    SU R                  R                  S    S3$ SU R                  R                  S    SU R                  R                  S    SU 3$ )Nzin_features=   z, out_features=r   z, weight=Noner   )r   r   shape)selfr   s     r   _linear_extra_reprr!   1   s    ,F~dkk//23?4;;CTCTUVCWBXXeffdkk//23?4;;CTCTUVCWBXXabhaijjr   c                       \ rS rSrS rS r   SS\\\R                  4   S\R                  R                  S-  S\S-  S\\\R                  4   4S	 jjrS
rg)TorchAoQuantize9   c                     Xl         g Nhf_quantizerr    r(   s     r   __init__TorchAoQuantize.__init__:       (r   c                    SSK Jn  [        UR                  5       5      R                  nU R
                  R                  (       an  UR                  S:X  a^  [        5       (       a  [        R                  R                  5       OSnUR                  U5        U" X/UQ70 UD6  UR                  S5        gU" X/UQ70 UD6  g)a  Run quantize_, moving to CUDA first if CPU offloading is active.

Some torchao quantization ops (e.g. int4 packing) only have CUDA kernels.
When a layer is destined for CPU (e.g. CPU offloading), we temporarily move
it to CUDA for quantization, then move the result back to CPU.
r   )	quantize_cpucudaN)torchao.quantizationr.   next
parametersdevicer(   offload_to_cputyper   torchacceleratorcurrent_acceleratorto)r    moduleconfigargskwargsr.   target_devicer4   s           r   	_quantizeTorchAoQuantize._quantize=   s     	3V..0188++0B0Be0K@^@`@`U&&::<flFIIff6t6v6IIef6t6v6r   N
input_dictmodelfull_layer_namereturnc                    [        UR                  5       5      S   u  pg[        U[        5      (       a  US   OUn[	        X#5      u  p[
        R                  R                  XwR                  S9UR                  U	'   UR                  5       n
[        U5      [        U
5      :H  nU R                  R                  R                  nU(       a+  U(       a$  [        UR                   R#                  SS9SS5        SSKJn  U R                  R                  R)                  5       n[        X5      (       Ga  UR+                  SS	5      u  nnS nX>R,                  ;   a-  UR/                  S
5      (       a   S5       eUR0                  U   nOXR,                  ;   a-  UR/                  S
5      (       a   S5       eUR0                  U   nOUR,                   H{  nUR/                  S
5      (       d  M  [2        R4                  " USS  U5      (       a  UR0                  U   n  OP[2        R4                  " USS  U5      (       d  Ml  UR0                  U   n  O   UR0                  R7                  SS 5      nUb  US:X  a  U(       a!  U(       a  UR8                  R;                  5       nU R=                  UUS 5        UR?                  U5        SUl         URC                  SS9 H
  nSUl         M     U(       a  U(       a  SW0$ 0 $ U" UU05      nU R=                  UUS S9  UR?                  U5        SUl         URC                  SS9 H
  nSUl         M     0 $ X70$ U(       a!  U(       a  UR8                  R;                  5       nU R=                  XR                  R                  R)                  5       5        UR?                  U5        SUl         URC                  SS9 H
  nSUl         M     U(       a  U(       a  SW0$ 0 $ )Nr   )requires_gradT)decodertie_word_embeddingsF)FqnToConfig.r   zre:zHparam fqn should not start with`re:`, which is used for specifying regexzImodule fqn should not start with`re:`, which is used for specifying regex   _defaultr   c                     g)NT )xfqns     r   <lambda>)TorchAoQuantize.convert.<locals>.<lambda>   s    dr   recursezlm_head.weight)	filter_fn)"tupleitemsr   listr	   r7   nn	ParameterrG   _parametersget_input_embeddingsidr(   quantization_configuntie_embedding_weightssetattrr<   get_text_configr1   rJ   get_apply_tensor_subclassrsplitfqn_to_config
startswithmodule_fqn_to_configre	fullmatchgetr   cloner@   discard_is_hf_initializedr3   )r    rB   rC   rD   missing_keysr>   _valuer;   tensor_nameinput_embedis_embedding_paramr`   rJ   r<   
module_fqntop_level_param_namecmaybe_module_fqn_patternlm_headparamcustom_param_fqn_configs                         r   convertTorchAoQuantize.convertO   s    ))+,Q/&ud33a25J*/((*<*<UReRe*<*f;' 002Z2k?:"&"3"3"G"G"_"_"'9ELL000>@UW\]4""66PPRf**/>/E/Ec1/M,J,A"6"66%0077 ^7 //@333%0077 _7 //
; 170D0D,3>>uEE &>qr&BOTT"778PQ&>qr&BJOO"778PQ 1E 3377
DIA}'83).E"(--"5"5"7NN61/BD ((904F-
 "(!2!25!2!A370 "B:LQh,g6pnpp /:;OQR:S.T+NN6+BdNS ((904F-!'!2!25!2!A370 "BI#++"9mm))+Gv00DD^^`a_-$(!&&u&5E'+E$ 6.@E\ '*dbddr   r'   )NNN)r   
__module____qualname____firstlineno__r*   r@   dictstrr7   TensorrZ   Moduler{   __static_attributes__rO   r   r   r#   r#   9   s    )7* )-&*\eell*+\e xx%\e t	\e 
c5<<	 \e \er   r#   c                       \ rS rSrS r    SS\\\R                  4   S\	\   S-  S\R                  R                  S-  S\S-  S\\\R                  4   4
S	 jjrS
rg)TorchAoDeserialize   c                     Xl         g r&   r'   r)   s     r   r*   TorchAoDeserialize.__init__   r,   r   NrB   source_patternsrC   rD   rE   c           
      X   [        UR                  5       5      S   U;  n0 nSR                  UR                  S5      SS 5      n	U(       a'  [	        US   [         5      (       a	  US   S   n
OYUS   n
OSUR                  5        H?  n[        X   5      S:w  a  [        SU S[        X   5       S	35      eX   S   X SU 3'   MA     U(       a  UW
0$ [        U R                  R                  5      (       d  [        S
5      e[        XR                  R                  5      u  pU(       a   eX   n[        X45      u  nn[	        U[        R                  R                  5      (       a   [        R                   " ["        U5      Ul        SUl        UR)                  SS9 H
  nSUl        M     XN0$ )a  
Consolidates tensor subclass components before reconstructing the object

For example:
    input_dict: {
        "_weight_qdata": torch.Tensor,
        "_weight_scale": torch.Tensor,
    }
    full_layer_name: "model.layers.0.self_attn.k_proj.weight"

    Given this, we reconstruct a Float8Tensor instance using the qdata and scale
    and return it as a dictionary with the full_layer_name as the key and the recovered
    Float8Tensor instance as the value.
r   rK   Nr   r   zExpected a single tensor for z	 but got z tensors insteadz$Invalid torchao safetensors metadataTFrT   )rY   keysjoinsplitr   len
ValueErrorr   r(   metadatar
   r	   r7   rZ   Lineartypes
MethodTyper!   
extra_reprrm   r3   )r    rB   r   rC   rD   rn   r>   is_unsafe_serialization
param_data
layer_namer   suffixunflattened_state_dictleftover_state_dict	new_paramr;   ro   ry   s                     r   r{   TorchAoDeserialize.convert   s   . #'z'8"9!"<O"S
XXo33C8"=>
"*X.55#H-a0#H-$//+z)*a/$7xyZM_I`Haaqr  8B7I!7L
\6(34 , ##V,,$T%6%6%?%?@@CDD6Q))227
3 '&&*;	(@	fehhoo.. % 0 01CV LF$(!&&u&5E'+E$ 6  ++r   r'   )NNNN)r   r}   r~   r   r*   r   r   r7   r   rY   rZ   r   r{   r   rO   r   r   r   r      s    ) -1(,&*<,ell*+<, cT)<, xx%	<,
 t<, 
c5<<	 <, <,r   r   )rh   r   r7   transformers.utilsr   transformers.utils.import_utilsr   r   r   core_model_loadingr   quantizers.quantizers_utilsr	   1torchao.prototype.safetensors.safetensors_supportr
   /torchao.prototype.safetensors.safetensors_utilsr   
get_loggerr   loggerr   r!   r#   r   rO   r   r   <module>r      s{    
   & t t 2 >  T			H	%Pkrem rej@, @,r   