
    3j9                         S SK JrJrJr  S SKJr  SSKJr  \" 5       (       a  SSKr\R                  " \
5      r " S S\5      rg)	   )is_compressed_tensors_availableis_torch_availablelogging)CompressedTensorsConfig   )HfQuantizer    Nc                      ^  \ rS rSr% SrSr\\S'   S\4U 4S jjrS r	SS jr
S	 rS
 rS r\S 5       rS\4S jrS\4S jrSrU =r$ )CompressedTensorsHfQuantizer   zu
Quantizer for the compressed_tensors package.  Loads and restores models to
quantized state with compressed_tensors
Tquantization_configc                    > [         TU ]  " U40 UD6  UR                  5         SSKJn  UR                  U5      U l        UR                  U l        Xl        g )Nr	   )ModelCompressor)	super__init__	post_initcompressed_tensors.compressorsr   from_compression_config
compressorrun_compressedr   )selfr   kwargsr   	__class__s       n/home/wildlama/miniconda3/lib/python3.13/site-packages/transformers/quantizers/quantizer_compressed_tensors.pyr   %CompressedTensorsHfQuantizer.__init__$   sN    ,77
 	%%'B)AABUV1@@#6     c                 8    [        5       (       d  [        S5      eg )NzyUsing `compressed_tensors` quantized models requires compressed-tensors>=0.15.0: `pip install compressed-tensors>=0.15.0`)r   ImportError)r   argsr   s      r   validate_environment1CompressedTensorsHfQuantizer.validate_environment1   s"    .00;  1r   returnc                 X    U[         R                  :w  a  [        R                  S5        U$ )NzZWe suggest you to set `dtype=torch.float16` for better efficiency with compressed_tensors.)torchfloat16loggerinfo)r   dtypes     r   update_dtype)CompressedTensorsHfQuantizer.update_dtype8   s     EMM!KKtur   c                     SSK Jn  U R                  R                  nU" XU R                  5        U R                  R
                  (       a  U R                  R                  US9  g g )Nr	   )apply_quantization_configmodel)compressed_tensors.quantizationr,   r   r   r   is_quantization_compressedcompress_model)r   r.   r   r,   ct_quantization_configs        r   $_process_model_before_weight_loadingACompressedTensorsHfQuantizer._process_model_before_weight_loading=   sP    M!%!D!D 	"%ATATU##>>OO***7 ?r   c                     U R                   R                  (       a,  U R                  (       d  U R                  R	                  US9  ggg)z3Decompress loaded model if necessary - need for qatr-   N)r   r0   r   r   decompress_model)r   r.   r   s      r   #_process_model_after_weight_loading@CompressedTensorsHfQuantizer._process_model_after_weight_loadingG   s:     ##>>tGZGZOO,,5,9 H[>r   c                     SSSSSS.nUR                  5       bD  UR                  5       R                  b)  UR                  5       R                  R                  U5        U$ )Ncolwiserowwise)z0layers.*.feed_forward.experts.*.gate_proj.weightz6layers.*.feed_forward.experts.*.gate_proj.weight_scalez.layers.*.feed_forward.experts.*.up_proj.weightz4layers.*.feed_forward.experts.*.up_proj.weight_scalez0layers.*.feed_forward.experts.*.down_proj.weight)get_text_configbase_model_tp_planupdate)r   configadditional_plans      r   update_tp_plan+CompressedTensorsHfQuantizer.update_tp_planO   s_    @IFO>GDM@I
 !!#/F4J4J4L4_4_4k""$77>>Or   c                     g)NT r   s    r   is_trainable)CompressedTensorsHfQuantizer.is_trainable\       r   c                 h    U R                   (       + =(       d    U R                  R                  (       + $ )z7Loaded Models can carry out quantization aware training)r   r   r0   rE   s    r   is_qat_trainable-CompressedTensorsHfQuantizer.is_qat_trainable`   s'     &&&ad.F.F.a.a*aar   c                     g)z>Models quantized using compressed tensors can be saved to diskTrD   rE   s    r   is_serializable,CompressedTensorsHfQuantizer.is_serializablee   rH   r   )r   r   r   )r(   torch.dtyper"   rO   )__name__
__module____qualname____firstlineno____doc__requires_calibrationr   __annotations__r   r    r)   r3   r7   rA   propertyrF   boolrJ   rM   __static_attributes____classcell__)r   s   @r   r   r      so    
  007,C 7
8:  b$ b
  r   r   )utilsr   r   r   utils.quantization_configr   baser   r$   
get_loggerrP   r&   r   rD   r   r   <module>r_      s@     Q P ?  			H	%L; Lr   