
    
3j                       S r SSKJr  SSKrSSKrSSKrSSKrSSKrSSK	J
r
  SSKJr  SSKJrJr  SSKJr  SS	KJrJrJrJr  \" 5       (       a  SSKr\R0                  " \5      r " S
 S\\5      r\
 " S S5      5       r\
 " S S\5      5       r\
 " S S\5      5       r\
 " S S\5      5       r \
 " S S\5      5       r!\
 " S S\5      5       r"g)z
Adapted from
https://github.com/huggingface/transformers/blob/52cb4034ada381fe1ffe8d428a1076e5411a8026/src/transformers/utils/quantization_config.py
    )annotationsN)	dataclass)Enum)AnyCallable)version   )	deprecateis_torch_availableis_torchao_versionloggingc                  (    \ rS rSrSrSrSrSrSrSr	g)	QuantizationMethod-   bitsandbytesgguftorchaoquantomodelopt N)
__name__
__module____qualname____firstlineno__BITS_AND_BYTESGGUFTORCHAOQUANTOMODELOPT__static_attributes__r       b/home/wildlama/miniconda3/lib/python3.13/site-packages/diffusers/quantizers/quantization_config.pyr   r   -   s    #NDGFHr!   r   c                  p    \ rS rSr% SrS\S'   / r\SS j5       rSS jr	SS jr
S rS	 rSSS
 jjrS rSrg)QuantizationConfigMixin5   z%
Mixin class for quantization config
r   quant_methodc                    U " S0 UD6n/ nUR                  5        H4  u  pg[        XF5      (       d  M  [        XFU5        UR                  U5        M6     U H  nUR	                  US5        M     U(       a  XC4$ U$ )an  
Instantiates a [`QuantizationConfigMixin`] from a Python dictionary of parameters.

Args:
    config_dict (`dict[str, Any]`):
        Dictionary that will be used to instantiate the configuration object.
    return_unused_kwargs (`bool`, *optional*, defaults to `False`):
        Whether or not to return a list of unused keyword arguments. Used for `from_pretrained` method in
        `PreTrainedModel`.
    kwargs (`dict[str, Any]`):
        Additional parameters from which to initialize the configuration object.

Returns:
    [`QuantizationConfigMixin`]: The configuration object instantiated from those parameters.
Nr   )itemshasattrsetattrappendpop)clsconfig_dictreturn_unused_kwargskwargsconfig	to_removekeyvalues           r"   	from_dict!QuantizationConfigMixin.from_dict>   sw    $ #{#	 ,,.JCv##U+  % ) CJJsD!   >!Mr!   c                    [        USSS9 nU R                  5       n[        R                  " USSS9S-   nUR	                  U5        SSS5        g! , (       d  f       g= f)	a  
Save this instance to a JSON file.

Args:
    json_file_path (`str` or `os.PathLike`):
        Path to the JSON file in which this configuration instance's parameters will be saved.
    use_diff (`bool`, *optional*, defaults to `True`):
        If set to `True`, only the difference between the config instance and the default
        `QuantizationConfig()` is serialized to JSON file.
wzutf-8)encodingr	   Tindent	sort_keys
N)opento_dictjsondumpswrite)selfjson_file_pathwriterr.   json_strings        r"   to_json_file$QuantizationConfigMixin.to_json_file_   sL     .#8F,,.K**[dKdRKLL%	 988s   ;A
Ac                B    [         R                  " U R                  5      $ )
Serializes this instance to a Python dictionary. Returns:
    `dict[str, Any]`: Dictionary of all the attributes that make up this configuration instance.
)copydeepcopy__dict__rC   s    r"   r?   QuantizationConfigMixin.to_dictp   s    
 }}T]]++r!   c              #     #    [         R                  " U R                  5      R                  5        H
  u  pX4v   M     g7f)zTallows `dict(obj)` for situations where obj may be a dict or QuantizationConfigMixinN)rK   rL   rM   r(   )rC   attrr4   s      r"   __iter__ QuantizationConfigMixin.__iter__w   s0     ==7==?KD+ @s   >A c                T    U R                   R                   SU R                  5        3$ )N )	__class__r   to_json_stringrN   s    r"   __repr__ QuantizationConfigMixin.__repr__|   s(    ..))*!D,?,?,A+BCCr!   c                    USL a  U R                  5       nOU R                  5       n[        R                  " USSS9S-   $ )ar  
Serializes this instance to a JSON string.

Args:
    use_diff (`bool`, *optional*, defaults to `True`):
        If set to `True`, only the difference between the config instance and the default `PretrainedConfig()`
        is serialized to JSON string.

Returns:
    `str`: String containing all the attributes that make up this configuration instance in JSON format.
Tr	   r:   r=   )to_diff_dictr?   r@   rA   )rC   use_diffr.   s      r"   rW   &QuantizationConfigMixin.to_json_string   s=     t++-K,,.Kzz+a4@4GGr!   c                    / nUR                  5        H4  u  p4[        X5      (       d  M  [        XU5        UR                  U5        M6     UR                  5        VVs0 s H  u  p4X2;  d  M  X4_M     nnnU$ s  snnf )ao  
Updates attributes of this class instance with attributes from `kwargs` if they match existing attributes,
returning all the unused kwargs.

Args:
    kwargs (`dict[str, Any]`):
        Dictionary of attributes to tentatively update this class.

Returns:
    `dict[str, Any]`: Dictionary containing all the key-value pairs that were not used to update the instance.
)r(   r)   r*   r+   )rC   r0   r2   r3   r4   unused_kwargss         r"   updateQuantizationConfigMixin.update   sq     	 ,,.JCt!!5)  % ) 7=lln]n
H\n] ^s   A8-A8r   NF)rD   zstr | os.PathLikereturnzdict[str, Any])T)r\   boolrd   str)r   r   r   r   __doc____annotations___exclude_attributes_at_initclassmethodr5   rG   r?   rR   rX   rW   r`   r    r   r!   r"   r$   r$   5   sH     %$"$ @&",
DH$r!   r$   c                      \ rS rSrSr/ SQr          SS jr\S 5       r\R                  SS j5       r\S 5       r
\
R                  SS	 j5       r
S
 rS rS rSS jrS rSS jrSrg)BitsAndBytesConfig   a  
This is a wrapper class about all possible attributes and features that you can play with a model that has been
loaded using `bitsandbytes`.

This replaces `load_in_8bit` or `load_in_4bit` therefore both options are mutually exclusive.

Currently only supports `LLM.int8()`, `FP4`, and `NF4` quantization. If more methods are added to `bitsandbytes`,
then more arguments will be added to this class.

Args:
    load_in_8bit (`bool`, *optional*, defaults to `False`):
        This flag is used to enable 8-bit quantization with LLM.int8().
    load_in_4bit (`bool`, *optional*, defaults to `False`):
        This flag is used to enable 4-bit quantization by replacing the Linear layers with FP4/NF4 layers from
        `bitsandbytes`.
    llm_int8_threshold (`float`, *optional*, defaults to 6.0):
        This corresponds to the outlier threshold for outlier detection as described in `LLM.int8() : 8-bit Matrix
        Multiplication for Transformers at Scale` paper: https://huggingface.co/papers/2208.07339 Any hidden states
        value that is above this threshold will be considered an outlier and the operation on those values will be
        done in fp16. Values are usually normally distributed, that is, most values are in the range [-3.5, 3.5],
        but there are some exceptional systematic outliers that are very differently distributed for large models.
        These outliers are often in the interval [-60, -6] or [6, 60]. Int8 quantization works well for values of
        magnitude ~5, but beyond that, there is a significant performance penalty. A good default threshold is 6,
        but a lower threshold might be needed for more unstable models (small models, fine-tuning).
    llm_int8_skip_modules (`list[str]`, *optional*):
        An explicit list of the modules that we do not want to convert in 8-bit. This is useful for models such as
        Jukebox that has several heads in different places and not necessarily at the last position. For example
        for `CausalLM` models, the last `lm_head` is typically kept in its original `dtype`.
    llm_int8_enable_fp32_cpu_offload (`bool`, *optional*, defaults to `False`):
        This flag is used for advanced use cases and users that are aware of this feature. If you want to split
        your model in different parts and run some parts in int8 on GPU and some parts in fp32 on CPU, you can use
        this flag. This is useful for offloading large models such as `google/flan-t5-xxl`. Note that the int8
        operations will not be run on CPU.
    llm_int8_has_fp16_weight (`bool`, *optional*, defaults to `False`):
        This flag runs LLM.int8() with 16-bit main weights. This is useful for fine-tuning as the weights do not
        have to be converted back and forth for the backward pass.
    bnb_4bit_compute_dtype (`torch.dtype` or str, *optional*, defaults to `torch.float32`):
        This sets the computational type which might be different than the input type. For example, inputs might be
        fp32, but computation can be set to bf16 for speedups.
    bnb_4bit_quant_type (`str`,  *optional*, defaults to `"fp4"`):
        This sets the quantization data type in the bnb.nn.Linear4Bit layers. Options are FP4 and NF4 data types
        which are specified by `fp4` or `nf4`.
    bnb_4bit_use_double_quant (`bool`, *optional*, defaults to `False`):
        This flag is used for nested quantization where the quantization constants from the first quantization are
        quantized again.
    bnb_4bit_quant_storage (`torch.dtype` or str, *optional*, defaults to `torch.uint8`):
        This sets the storage type to pack the quanitzed 4-bit prarams.
    kwargs (`dict[str, Any]`, *optional*):
        Additional parameters from which to initialize the configuration object.
)_load_in_4bit_load_in_8bitr&   Nc                  ^  [         R                  T l        U(       a  U(       a  [        S5      eUT l        UT l        UT l        UT l        UT l        UT l	        UT l
        U	T l        Uc  [        R                  T l        O][        U[         5      (       a  [#        [        U5      T l        O2[        U[        R$                  5      (       a  UT l        O[        S5      eU
c  [        R&                  T l        On[        U
[         5      (       a'  U
S;  a  [        S5      e[#        [        U
5      T l        O2[        U
[        R$                  5      (       a  U
T l        O[        S5      eU(       aW  [+        U 4S jU 5       5      (       d=  [,        R/                  S[1        UR3                  5       5       ST R4                   S	35        T R7                  5         g )
NVload_in_4bit and load_in_8bit are both True, but only one can be used at the same timez8bnb_4bit_compute_dtype must be a string or a torch.dtype)float16float32int8uint8float64bfloat16zv`bnb_4bit_quant_storage` must be a valid string (one of 'float16', 'float32', 'int8', 'uint8', 'float64', 'bfloat16') z8bnb_4bit_quant_storage must be a string or a torch.dtypec              3  @   >#    U  H  oTR                   ;   v   M     g 7fN)ri   ).0krC   s     r"   	<genexpr>.BitsAndBytesConfig.__init__.<locals>.<genexpr>  s     TV4#C#CCVs   zUnused kwargs: z. These kwargs are not used in .)r   r   r&   
ValueErrorro   rn   llm_int8_thresholdllm_int8_skip_modules llm_int8_enable_fp32_cpu_offloadllm_int8_has_fp16_weightbnb_4bit_quant_typebnb_4bit_use_double_quanttorchrs   bnb_4bit_compute_dtype
isinstancerf   getattrdtyperu   bnb_4bit_quant_storageallloggerwarninglistkeysrV   	post_init)rC   load_in_8bitload_in_4bitr   r   r   r   r   r   r   r   r0   s   `           r"   __init__BitsAndBytesConfig.__init__   s    /==Luvv))"4%:"0P-(@%#6 )B&!)*/--D'.44*1%9O*PD'.<<*@D'WXX!)*/++D'.44% .  ! M  +2%9O*PD'.<<*@D'WXX#TVTTTNN_T&++--@,AA`aeaoao`ppqrsr!   c                    U R                   $ ry   )rn   rN   s    r"   r   BitsAndBytesConfig.load_in_4bit      !!!r!   c                    [        U[        5      (       d  [        S5      eU R                  (       a  U(       a  [	        S5      eXl        g )Nload_in_4bit must be a booleanrq   )r   re   	TypeErrorr   r   rn   rC   r4   s     r"   r   r   !  8    %&&<==uvv"r!   c                    U R                   $ ry   )ro   rN   s    r"   r   BitsAndBytesConfig.load_in_8bit*  r   r!   c                    [        U[        5      (       d  [        S5      eU R                  (       a  U(       a  [	        S5      eXl        g )Nload_in_8bit must be a booleanrq   )r   re   r   r   r   ro   r   s     r"   r   r   .  r   r!   c                   [        U R                  [        5      (       d  [        S5      e[        U R                  [        5      (       d  [        S5      e[        U R
                  [        5      (       d  [        S5      eU R                  b*  [        U R                  [        5      (       d  [        S5      e[        U R                  [        5      (       d  [        S5      e[        U R                  [        5      (       d  [        S5      eU R                  b4  [        U R                  [        R                  5      (       d  [        S5      e[        U R                  [        5      (       d  [        S	5      e[        U R                   [        5      (       d  [        S
5      eU R                  (       aW  ["        R$                  " [&        R(                  R#                  S5      5      ["        R$                  " S5      :  d  [+        S5      egg)zn
Safety checker that arguments are correct - also replaces some NoneType arguments with their default values.
r   r   z"llm_int8_threshold must be a floatNz/llm_int8_skip_modules must be a list of stringsz2llm_int8_enable_fp32_cpu_offload must be a booleanz*llm_int8_has_fp16_weight must be a booleanz*bnb_4bit_compute_dtype must be torch.dtypez$bnb_4bit_quant_type must be a stringz+bnb_4bit_use_double_quant must be a booleanr   z0.39.0z[4 bit quantization requires bitsandbytes>=0.39.0 - please upgrade your bitsandbytes version)r   r   re   r   r   r   floatr   r   r   r   r   r   r   r   rf   r   r   parse	importlibmetadatar   rN   s    r"   r   BitsAndBytesConfig.post_init7  s    $++T22<==$++T22<==$11599@AA%%1*TE_E_ae:f:fMNN$??FFPQQ$77>>HII&&2:dFaFachcncn;o;oHII$22C88BCC$88$??IJJW]]93E3E3M3Mn3]%^biboboc
 &
 m &
r!   c                @    U R                   =(       d    U R                  $ )z@
Returns `True` if the model is quantizable, `False` otherwise.
)r   r   rN   s    r"   is_quantizable!BitsAndBytesConfig.is_quantizable\  s       5D$5$55r!   c                    U R                   (       a  gU R                  (       a  U R                  S:X  a  gU R                  (       a  U R                  S:X  a  gg)zu
This method returns the quantization method used for the model. If the model is not quantizable, it returns
`None`.
llm_int8fp4nf4N)r   r   r   rN   s    r"   quantization_method&BitsAndBytesConfig.quantization_methodb  sE    
 4#;#;u#D4#;#;u#Dr!   c                   [         R                  " U R                  5      n[        US   5      R	                  S5      S   US'   [        US   5      R	                  S5      S   US'   U R
                  US'   U R                  US'   U$ )rJ   r   r~      r   r   r   )rK   rL   rM   rf   splitr   r   )rC   outputs     r"   r?   BitsAndBytesConfig.to_dictp  s    
 t}}-+.v6N/O+P+V+VWZ+[\]+^'(+.v6N/O+P+V+VWZ+[\]+^'(!%!2!2~!%!2!2~r!   c                    U R                  5       nU R                  R                   S[        R                  " USSS9 S3$ NrU   r	   Tr:   r=   r?   rV   r   r@   rA   rC   r.   s     r"   rX   BitsAndBytesConfig.__repr__}  ;    lln..))*!DJJ{1X\,]+^^`aar!   c                    U R                  5       n[        5       R                  5       n0 nUR                  5        H  u  pEXRU   :w  d  M  XSU'   M     U$ )z
Removes all attributes from config which correspond to the default config attributes for better readability and
serializes to a Python dictionary.

Returns:
    `dict[str, Any]`: Dictionary of all the attributes that make up this configuration instance,
)r?   rl   r(   )rC   r.   default_config_dictserializable_config_dictr3   r4   s         r"   r[   BitsAndBytesConfig.to_diff_dict  s[     lln 12::<#%  &++-JCC0005- . ('r!   )rn   ro   r   r   r   r   r   r   r   r   r&   )
FFg      @NFFNr   FN)r4   re   rc   )r   r   r   r   rg   ri   r   propertyr   setterr   r   r   r   r?   rX   r[   r    r   r!   r"   rl   rl      s    1f #U ").!&#!"'#<| " " # # " " # ##J6b(r!   rl   c                  &    \ rS rSrSrSSS jjrSrg)GGUFQuantizationConfigi  a5  This is a config class for GGUF Quantization techniques.

Args:
    compute_dtype: (`torch.dtype`, defaults to `torch.float32`):
        This sets the computational type which might be different than the input type. For example, inputs might be
        fp32, but computation can be set to bf16 for speedups.

Nc                    [         R                  U l        Xl        SU l        S U l        U R                  c  [        R                  U l        g g )NT)r   r   r&   compute_dtypepre_quantizedmodules_to_not_convertr   rs   )rC   r   s     r"   r   GGUFQuantizationConfig.__init__  sE    .33*! '+#%!&D &r!   )r   r   r   r&   ry   )r   z'torch.dtype' | None)r   r   r   r   rg   r   r    r   r!   r"   r   r     s    	/ 	/r!   r   c                  l   ^  \ rS rSrSr S
     SS jjrS rU 4S jr\SS j5       r	S r
S rS	rU =r$ )TorchAoConfigi  a  This is a config class for torchao quantization/sparsity techniques.

Args:
    quant_type (`AOBaseConfig`):
        An `AOBaseConfig` subclass instance specifying the quantization type. See the [torchao
        documentation](https://docs.pytorch.org/ao/main/api_ref_quantization.html#inference-apis-for-quantize) for
        available config classes (e.g. `Int4WeightOnlyConfig`, `Int8WeightOnlyConfig`, `Float8WeightOnlyConfig`,
        `Float8DynamicActivationFloat8WeightConfig`, etc.).
    modules_to_not_convert (`list[str]`, *optional*, default to `None`):
        The list of modules to not quantize, useful for quantizing models that explicitly require to have some
        modules left in their original precision.

Example:
    ```python
    from diffusers import FluxTransformer2DModel, TorchAoConfig
    from torchao.quantization import Int8WeightOnlyConfig

    quantization_config = TorchAoConfig(Int8WeightOnlyConfig())
    transformer = FluxTransformer2DModel.from_pretrained(
        "black-forest-labs/Flux.1-Dev",
        subfolder="transformer",
        quantization_config=quantization_config,
        torch_dtype=torch.bfloat16,
    )
    ```
c                f    [         R                  U l        Xl        X l        U R                  5         g ry   )r   r   r&   
quant_typer   r   )rC   r   r   r0   s       r"   r   TorchAoConfig.__init__  s'     /66$&<#r!   c                    [        SS5      (       a  [        S5      eSSKJn  [	        U R
                  U5      (       d+  [        S[        U R
                  5      R                   35      eg )N<0.15.0zWTorchAoConfig requires torchao >= 0.15.0. Please upgrade with `pip install -U torchao`.r   )AOBaseConfigz1quant_type must be an AOBaseConfig instance, got )	r   r   torchao.quantization.quant_apir   r   r   r   typer   )rC   r   s     r"   r   TorchAoConfig.post_init  s[    c8,,vww?$//<88OPTUYUdUdPePnPnOopqq 9r!   c                ^   > [         TU ]  5       nSSKJn  SU" U R                  5      0US'   U$ )z&Convert configuration to a dictionary.r   )config_to_dictdefaultr   )superr?   torchao.core.configr   r   )rC   dr   rV   s      r"   r?   TorchAoConfig.to_dict  s2    GO 	7
 %nT__&EF,r!   c                    [        SS5      (       d  [        S5      eUR                  5       nUR                  S5      n[	        U5      S:X  a  SU;   d   S5       eUS   nSS	KJn  U" U5      nU " S
SU0UD6$ )z'Create configuration from a dictionary.z>=r   zCTorchAoConfig requires torchao >= 0.15.0 for construction from dictr   r   r   z8Expected only one key 'default' in quant_type dictionaryr   )config_from_dictr   )r   NotImplementedErrorrK   r,   lenr   r   )r-   r.   r/   r0   r   r   s         r"   r5   TorchAoConfig.from_dict  s     "$11%&kll!&&( __\2
 :!#	Z(? 	
F	
?  	*
 	9%j1
8j8K88r!   c                    U R                   $ )zBCreate the appropriate quantization method based on configuration.)r   rN   s    r"   get_apply_tensor_subclass'TorchAoConfig.get_apply_tensor_subclass  s    r!   c                    U R                  5       nU R                  R                   S[        R                  " USSS9 S3$ r   r   r   s     r"   rX   TorchAoConfig.__repr__	  r   r!   )r   r&   r   ry   )r   z'AOBaseConfig'r   list[str] | Nonerd   Nonerb   )r   r   r   r   rg   r   r   r?   rj   r5   r   rX   r    __classcell__)rV   s   @r"   r   r     s^    < 48
"
 !1

 

r 9 9*b br!   r   c                  6    \ rS rSrSr  S   SS jjrS rSrg)	QuantoConfigi  a]  
This is a wrapper class about all possible attributes and features that you can play with a model that has been
loaded using `quanto`.

Args:
    weights_dtype (`str`, *optional*, defaults to `"int8"`):
        The target dtype for the weights after quantization. Supported values are ("float8","int8","int4","int2")
   modules_to_not_convert (`list`, *optional*, default to `None`):
        The list of modules to not quantize, useful for quantizing models that explicitly require to have some
        modules left in their original precision (e.g. Whisper encoder, Llava encoder, Mixtral gate layers).
Nc                    Sn[        SSU5        [        R                  U l        Xl        X l        U R                  5         g )NzB`QuantoConfig` is deprecated and will be removed in version 1.0.0.r   z1.0.0)r
   r   r   r&   weights_dtyper   r   )rC   r   r   r0   deprecation_messages        r"   r   QuantoConfig.__init__  s<     c.'+>?.55*&<#r!   c                b    / SQnU R                   U;  a  [        SU SU R                    35      eg)z+
Safety checker that arguments are correct
)float8rt   int4int2zOnly support weights in z but found N)r   r   )rC   accepted_weightss     r"   r   QuantoConfig.post_init*  sA     >%5578H7IUYUgUgThijj 6r!   )r   r&   r   )rt   N)r   rf   r   r   )r   r   r   r   rg   r   r   r    r   r!   r"   r   r     s,    
 $37 !1kr!   r   c                      \ rS rSrSrSSSSSS.rSSS.r          S                       SS
 jjrSSS jjrSS jr	SS jr
Srg	)NVIDIAModelOptConfigi3  a9  This is a config class to use nvidia modelopt for quantization.

Args:
    quant_type (`str`):
        The type of quantization we want to use, following is how to use:
            **weightquant_activationquant ==> FP8_FP8** In the above example we have use FP8 for both weight and
            activation quantization. Following are the all the options:
                - FP8
                - INT8
                - INT4
                - NF4
                - NVFP4
    modules_to_not_convert (`list[str]`, *optional*, default to `None`):
        The list of modules to not quantize, useful for quantizing models that explicitly require to have some
    weight_only (`bool`, *optional*, default to `False`):
        If set to `True`, the quantization will be applied only to the weights of the model.
    channel_quantize (`int`, *optional*, default to `None`):
        The channel quantization axis, useful for quantizing models across different axes.
    block_quantize (`int`, *optional*, default to `None`):
        The block size, useful to further quantize each channel/axes into blocks.
    scale_channel_quantize (`int`, *optional*, default to `None`):
        The scale channel quantization axis, useful for quantizing calculated scale across different axes.
    scale_block_quantize (`int`, *optional*, default to `None`):
        The scale block size, useful for quantizing each scale channel/axes into blocks.
    algorithm (`str`, *optional*, default to `"max"`):
        The algorithm to use for quantization, currently only supports `"max"`.
    forward_loop (`Callable`, *optional*, default to `None`):
        The forward loop function to use for calibration during quantization.
    modelopt_config (`dict`, *optional*, default to `None`):
        The modelopt config, useful for passing custom configs to modelopt.
    disable_conv_quantization (`bool`, *optional*, default to `False`):
        If set to `True`, the quantization will be disabled for convolutional layers.
    kwargs (`dict[str, Any]`, *optional*):
        Additional parameters which are to be used for calibration.
)         r   )r	   r   )FP8INT8INT4NF4NVFP4)r   r   Nc                    [         R                  U l        U R                  U5        X l        X0l        X@l        XPl        SU0U l        Xl	        X`l
        Xpl        U
(       d  U R                  5       OU
U l        Xl        g )Nmethod)r   r   r&   _normalize_quant_typer   weight_onlychannel_quantizeblock_quantize	calib_cfgforward_loopscale_channel_quantizescale_block_quantizeget_config_from_quant_typemodelopt_configdisable_conv_quantization)rC   r   r   r  r  r  r  r  	algorithmr  r
  r  r0   s                r"   r   NVIDIAModelOptConfig.__init__e  sv     /77"":.&<#& 0,i
 )&<#$8!HWt>>@]l)B&r!   c                h    SSK Jn  [        U5      S:X  a  SU S3n[        R                  " U5        g g )Nr   )_PATCHED_CLASSESzNot z weights in modelopt format. This might cause unreliable behavior.Please make sure to run the following code before loading/saving model weights:

    from modelopt.torch.opt import enable_huggingface_checkpointing
    enable_huggingface_checkpointing()
)&modelopt.torch.opt.plugins.huggingfacer  r   warningswarn)rC   	operationr  warning_msgs       r"   check_model_patching)NVIDIAModelOptConfig.check_model_patching  s<    K A%yk "; ;  MM+& &r!   c                   UR                  S5      nUS   n[        U5      S:  a  US   OSn[        U5      S:  a  [        R                  SU S35        SnSnOaU[        R
                  ;  a  [        R                  S	U S
35        SnUb/  U[        R
                  ;  a  [        R                  SU S35        SnX4b  SU-   OS-   U l        g)av  
Validates and normalizes the quantization type string.

Splits the quant_type into weight and activation components, verifies them against supported types, and
replaces unsupported values with safe defaults.

Args:
    quant_type (str): The input quantization type string (e.g., 'FP8_INT8').

Returns:
    str: A valid quantization type string (e.g., 'FP8_INT8' or 'FP8').
_r   r   Nr	   zQuantization type z. is not supported. Picking FP8_INT8 as defaultr   zWeight Quantization type z) is not supported. Picking FP8 as defaultzActivation Quantization type z* is not supported. Picking INT8 as default )r   r   r   r   r   quanttype_to_numbitsr   )rC   r   partsw_typeact_types        r"   r  *NVIDIAModelOptConfig._normalize_quant_type  s       %q"5zA~584u:>NN/
|;ijkFH1FFF!:6(Bklm#8L8a8a(a!>xjHrst 6JC(NPRSr!   c                   SSK Js  Jn  SS00 SS00 0 0 0 S.UR                  R                  EU R
                  S.nUS   nU R                  (       a#  U H  nS	U;  d  M  X4   (       a  M  SX4   S'   M     U R                  R                  S
5      nUS   n[        U5      S:  a  US   R                  SS5      OSnU Hc  nXAR                  R                  ;  d  M  SX4   ;  d  M(  US:X  a  Ub  [        R                  U   X4   S'   MK  [        R                  U   X4   S'   Me     U R                  bL  U R                  b?  U R                  U R                  0US	   S'   U R                  U R                  SS0US   S'   O9U R                  b,  U R                  US	   S'   U R                  US   S'   SUS   S'   U R                  b  U R                   b  U[        R"                  ;   aA  US	   S   R%                  [        R"                  U   U R                  U R                   0S.5        U(       aU  U[        R"                  ;   aA  US   S   R%                  [        R"                  U   U R                  U R                   0S.5        U$ )z,
Get the config from the quantization type.
r   N
fake_quantFenable)*weight_quantizer*input_quantizerz*output_quantizerz*q_bmm_quantizerz*k_bmm_quantizerz*v_bmm_quantizerz*softmax_quantizer)	quant_cfgr  r$  r"  r  r   Ar  r#  num_bitsblock_sizesr   dynamicaxis)
scale_bitsscale_block_sizes)modelopt.torch.quantizationr   quantizationr1   _default_disabled_quantizer_cfgr  r  r   r   r   replacer   r  r  r  r  r  quanttype_to_scalingbitsr`   )rC   mtqBASE_CONFIGr$  r{   r  r  r  s           r"   r	  /NVIDIAModelOptConfig.get_config_from_quant_type  s    	21 '3E%:$&&.%6$&$&$&&(	 **<<	 
  ,	&a/	-2IL*  %%c*q03E
Q58##C,DA

BBBxW`WcGc**+3G3\3\]e3f	Z0+?+T+TU[+\	Z(  *t/D/D/P=A=R=RTXTgTg<hI)*=9%%t':':	<I()-8 "".595J5JI)*62484I4II()&14=I()&1 &&2t7P7P7\-FFF-.}=DD&:&S&STZ&[.2.I.I4KdKd-e H(<(U(UU,-m<CC&:&S&ST\&].2.I.I4KdKd-e r!   )r  r  r  r  r  r
  r   r&   r   r  r  r  )
NTNNNNmaxNNF)r   rf   r   r   r  re   r  
int | Noner  r5  r  r5  r  r5  r  rf   r  zCallable | Noner
  zdict | Noner  re   rd   r   )loading)r  rf   )r   rf   rd   rf   rc   )r   r   r   r   rg   r  r0  r   r  r  r	  r    r   r!   r"   r   r   3  s    "J    48 '+%)-1+/(,'+*/CC !1C 	C
 %C #C !+C )C C &C %C $(C 
C>'T:Ar!   r   )#rg   
__future__r   rK   importlib.metadatar   r@   osr  dataclassesr   enumr   typingr   r   	packagingr   utilsr
   r   r   r   r   
get_loggerr   r   rf   r   r$   rl   r   r   r   r   r   r!   r"   <module>r@     s  $
 #    	  !     N N 			H	%d  o o od l(0 l( l(^ /4 / /, [b+ [b [b| !k* !k !kH {2 { {r!   