
    
3j^                         S SK JrJr  SSKJrJrJrJrJr  SSK	J
r
  \(       a  SSKJr  \" 5       (       a
  S SKrS SKJr  \" 5       (       a  S SKJr  \R$                  " \5      r " S	 S
\
5      rg)    )TYPE_CHECKINGAny   )get_module_from_nameis_accelerate_availableis_nvidia_modelopt_availableis_torch_availablelogging   )DiffusersQuantizer)
ModelMixinN)set_module_tensor_to_devicec            
       $  ^  \ rS rSrSrSrSrS/rU 4S jrS r	SS	S
SS\
S\\
\4   4S jrSS	S
SS\
SS4S jrS\\
\\
-  4   S\\
\\
-  4   4S jrSS jrSS S jjrSS	S\\
   4S jr/ 4SS	S\\
   4S jjrS r\S 5       r\S 5       rSrU =r$ )!NVIDIAModelOptQuantizer   z0
Diffusers Quantizer for Nvidia-Model Optimizer
TFnvidia_modeloptc                 (   > [         TU ]  " U40 UD6  g N)super__init__)selfquantization_configkwargs	__class__s      j/home/wildlama/miniconda3/lib/python3.13/site-packages/diffusers/quantizers/modelopt/modelopt_quantizer.pyr    NVIDIAModelOptQuantizer.__init__%   s    ,77    c                 .   [        5       (       d  [        S5      eSU l        UR                  SS 5      n[	        U[
        5      (       aM  SUR                  5       ;   d  SUR                  5       ;   a$  U R                  (       a  [        S5      eSU l        g g g )NzkLoading an nvidia-modelopt quantized model requires nvidia-modelopt library (`pip install nvidia-modelopt`)F
device_mapcpudiskzYou are attempting to perform cpu/disk offload with a pre-quantized modelopt model This is not supported yet. Please remove the CPU or disk device from the `device_map` argument.T)	r   ImportErroroffloadget
isinstancedictvaluespre_quantized
ValueError)r   argsr   r   s       r   validate_environment,NVIDIAModelOptQuantizer.validate_environment(   s    +--}  ZZd3
j$''
))++v9J9J9L/L%%$z 
 $(DL 0M (r   modelr   param_valueztorch.Tensor
param_name
state_dictc                 v    SSK Jn  [        X5      u  pxU R                  (       a  gU" U5      (       a  SU;   a  gg)Nr   )is_quantizedTweightF)!modelopt.torch.quantization.utilsr2   r   r(   )	r   r-   r.   r/   r0   r   r2   moduletensor_names	            r   check_if_quantized_param0NVIDIAModelOptQuantizer.check_if_quantized_param;   s9     	C25E&!!h+&=r   target_deviceztorch.devicec                    SSK Js  Jn  UR                  S[        R                  5      n[        X5      u  pU R                  (       a:  [        R                  R                  UR                  US95      U	R                  U
'   g[        XXBU5        UR                  XR                  R                  S   U R                  R                  5        UR!                  U	5        SU	R"                  l        g)zX
Create the quantized parameter by calling .calibrate() after setting it to the module.
r   Ndtype)device	algorithmF)modelopt.torch.quantizationtorchquantizationr$   float32r   r(   nn	Parameterto_parametersr   	calibrater   modelopt_configforward_loopcompressr3   requires_grad)r   r-   r.   r/   r9   r*   r   mtqr;   r5   r6   s              r   create_quantized_param.NVIDIAModelOptQuantizer.create_quantized_paramM   s     	21

7EMM225E.3hh.@.@WdAe.fF{+'=W\]MM00@@MtOgOgOtOt LL */FMM'r   
max_memoryreturnc                 `    UR                  5        VVs0 s H
  u  p#X#S-  _M     nnnU$ s  snnf )Ng?)items)r   rN   keyvals       r   adjust_max_memory)NVIDIAModelOptQuantizer.adjust_max_memoryh   s5    6@6F6F6HI6H(#c:o6H
I Js   *c                 Z    U R                   R                  S:X  a  [        R                  nU$ )NFP8)r   
quant_typer?   float8_e4m3fn)r   target_dtypes     r   adjust_target_dtype+NVIDIAModelOptQuantizer.adjust_target_dtypel   s'    ##..%7 ..Lr   c                 V    Uc%  [         R                  S5        [        R                  nU$ )NzVYou did not specify `torch_dtype` in `from_pretrained`. Setting it to `torch.float32`.)loggerinfor?   rA   )r   torch_dtypes     r   update_torch_dtype*NVIDIAModelOptQuantizer.update_torch_dtypeq   s$    KKpq--Kr   c                 t   [         R                  [         R                  [         R                  [         R                  [         R
                  [         R                  4n/ nUR                  5        HE  u  pE[        XR5      (       d  M  UR                  SS9 H  u  pgUR                  U SU 35        M     MG     U$ )z
Get parameter names for all convolutional layers in a HuggingFace ModelMixin. Includes Conv1d/2d/3d and
ConvTranspose1d/2d/3d.
F)recurse.)rB   Conv1dConv2dConv3dConvTranspose1dConvTranspose2dConvTranspose3dnamed_modulesr%   named_parametersappend)r   r-   
conv_typesconv_param_namesnamer5   r/   _s           r   get_conv_param_names,NVIDIAModelOptQuantizer.get_conv_param_namesw   s     IIIIII

 !//1LD&--%+%<%<U%<%KMJ$++tfAj\,BC &L 2
  r   keep_in_fp32_modulesc                 6   SS K Js  Jn  U R                  (       a  g U R                  R
                  nUc  / n[        U[        5      (       a  U/nUR                  U5        U R                  R                  (       a   UR                  U R                  U5      5        U H'  nSS0U R                  R                  S   SU-   S-   '   M)     X`R                  l        UR                  USU R                  R                  4/S9  U R                  UR                  l        g )Nr   enableF	quant_cfg*quantize)mode)modelopt.torch.optr?   optr(   r   modules_to_not_convertr%   strextenddisable_conv_quantizationrs   rG   
apply_modeconfig)r   r-   r   ru   r   mtor~   r5   s           r   $_process_model_before_weight_loading<NVIDIAModelOptQuantizer._process_model_before_weight_loading   s     	)(!%!9!9!P!P!)%'",c22&<%="%%&:;##=="))$*C*CE*JK,FYachXiD$$44[A#,QTBTU -:P  7uZ1I1I1Y1Y$Z#[\+/+C+C(r   c                     SSK Jn  U R                  (       a  U$ UR                  5        H8  u  pE[	        XSR
                  5      (       d  M!  XQLd  M'  UR                  U5        M:     U$ )Nr   )ModeloptStateManager)r|   r   r(   rl   hasattr
_state_keyremove_state)r   r-   r   r   rr   ms         r   #_process_model_after_weight_loading;NVIDIAModelOptQuantizer._process_model_after_weight_loading   sR    ;L'')DAq99::q~$11!4 * r   c                     g)NT r   s    r   is_trainable$NVIDIAModelOptQuantizer.is_trainable   s    r   c                 6    U R                   R                  SS9  g)Nsaving)	operationT)r   check_model_patchingr   s    r   is_serializable'NVIDIAModelOptQuantizer.is_serializable   s      555Ir   )r#   )rZ   torch.dtyperO   r   r   )r`   r   rO   r   )__name__
__module____qualname____firstlineno____doc__use_keep_in_fp32_modulesrequires_calibrationrequired_packagesr   r+   r   r&   r   r7   rL   intrT   r[   ra   listrs   r   r   propertyr   r   __static_attributes____classcell__)r   s   @r   r   r      s&     $ *+8(& $ 	
 cN$00 $0 	0
 &06DcCi,@ T#sUXy.EY 
 ,  49  4 +-	DD #3i	D:    r   r   )typingr   r   utilsr   r   r   r	   r
   baser   models.modeling_utilsr   r?   torch.nnrB   accelerate.utilsr   
get_loggerr   r^   r   r   r   r   <module>r      s]    %  & 3 < 
		H	%b0 br   