
    
3jv                        S SK Jr  S SKJrJr  SSKJr  \(       a  SSKJr  SSK	J
r
JrJrJrJrJrJr  \" 5       (       a  \" 5       (       a  S SKrS	S
K	JrJrJrJrJr  \R.                  " \5      r " S S\5      rg)    )annotations)TYPE_CHECKINGAny   )DiffusersQuantizer   )
ModelMixin)get_module_from_nameis_accelerate_availableis_accelerate_versionis_gguf_availableis_gguf_versionis_torch_availableloggingN   )GGML_QUANT_SIZESGGUFParameter#_dequantize_gguf_and_restore_linear_quant_shape_from_byte_shape_replace_with_gguf_linearc                     ^  \ rS rSrSrU 4S jrS rSS jrSS jrSS jr	S r
          SS	 jr  S           SS
 jjr/ 4   SS jjrSS jr\S 5       r\SS j5       r\SS j5       rS rSrU =r$ )GGUFQuantizer&   Tc                  > [         TU ]  " U40 UD6  UR                  U l        UR                  U l        UR                  =(       d    / U l        [        U R                  [        5      (       d  U R                  /U l        g g N)super__init__compute_dtypepre_quantizedmodules_to_not_convert
isinstancelist)selfquantization_configkwargs	__class__s      b/home/wildlama/miniconda3/lib/python3.13/site-packages/diffusers/quantizers/gguf/gguf_quantizer.pyr   GGUFQuantizer.__init__)   so    ,770>>0>>&9&P&P&VTV#$55t<<+/+F+F*GD' =    c                    [        5       (       a  [        SS5      (       a  [        S5      e[        5       (       a  [	        SS5      (       a  [        S5      eg )N<z0.26.0zoLoading GGUF Parameters requires `accelerate` installed in your environment: `pip install 'accelerate>=0.26.0'`z0.10.0zhTo load GGUF format files you must have `gguf` installed in your environment: `pip install gguf>=0.10.0`)r   r   ImportErrorr   r   )r#   argsr%   s      r'   validate_environment"GGUFQuantizer.validate_environment3   sX    &((,A#x,P,P B  !""oc8&D&Dz  'Er)   c                `    UR                  5        VVs0 s H
  u  p#X#S-  _M     nnnU$ s  snnf )Ng?)items)r#   
max_memorykeyvals       r'   adjust_max_memoryGGUFQuantizer.adjust_max_memory>   s5    6@6F6F6HI6H(#c:o6H
I Js   *c                |    U[         R                  :w  a  [        R                  SU S35        [         R                  $ )Nztarget_dtype z3 is replaced by `torch.uint8` for GGUF quantization)torchuint8loggerinfo)r#   target_dtypes     r'   adjust_target_dtype!GGUFQuantizer.adjust_target_dtypeC   s.    5;;&KK-~5hij{{r)   c                $    Uc  U R                   nU$ r   )r   )r#   torch_dtypes     r'   update_torch_dtype GGUFQuantizer.update_torch_dtypeH   s    ,,Kr)   c                    UR                   nUR                   nUR                  n[        U   u  px[        XHU5      n	X:w  a  [	        U SU	 SU 35      eg)Nz% has an expected quantized shape of: z, but received shape: T)shape
quant_typer   r   
ValueError)
r#   
param_namecurrent_paramloaded_paramloaded_param_shapecurrent_param_shaperE   
block_size	type_sizeinferred_shapes
             r'   check_quantized_param_shape)GGUFQuantizer.check_quantized_param_shapeM   sq    )//+11!,,
 0 <
56HU_`0,CNCSSij|i}~  r)   c                0    [        U[        5      (       a  gg)NTF)r!   r   )r#   modelparam_valuerG   
state_dictr%   s         r'   check_if_quantized_param&GGUFQuantizer.check_if_quantized_param\   s     k=11r)   c                2   [        X5      u  pXR                  ;  a   XR                  ;  a  [        U SU	 S35      eXR                  ;   a  UR	                  U5      UR                  U	'   XR                  ;   a  UR	                  U5      UR                  U	'   g g )Nz- does not have a parameter or a buffer named .)r
   _parameters_buffersrF   to)
r#   rR   rS   rG   target_devicerT   unexpected_keysr%   moduletensor_names
             r'   create_quantized_param$GGUFQuantizer.create_quantized_parami   s     35E000[5Wx'TU`Taabcdd,,,.9nn].KF{+//)+6>>-+HFOOK( *r)   c                    UR                  SS 5      nU R                  R                  U5        U R                   Vs/ s H	  ofc  M  UPM     snU l        [        XR                  XPR                  S9  g s  snf )NrT   )r    )getr    extendr   r   )r#   rR   
device_mapkeep_in_fp32_modulesr%   rT   r^   s          r'   $_process_model_before_weight_loading2GGUFQuantizer._process_model_before_weight_loading|   sg     ZZd3
##**+?@<@<W<W&n<W&v<W&n#!%%zJeJe	
 'os   A3A3c                    U$ r    )r#   rR   r%   s      r'   #_process_model_after_weight_loading1GGUFQuantizer._process_model_after_weight_loading   s    r)   c                    gNFrj   r#   s    r'   is_serializableGGUFQuantizer.is_serializable       r)   c                    grn   rj   ro   s    r'   is_trainableGGUFQuantizer.is_trainable   rr   r)   c                    g)NTrj   ro   s    r'   is_compileableGGUFQuantizer.is_compileable   s    r)   c                   UR                   R                  S:H  nU(       aw  [        R                  S5        [	        [
        S5      (       a  [
        R                  R                  5       O[
        R                  R                  5       nUR                  U5        [        XR                  5      nU(       a  UR                  S5        U$ )NcpuzModel was found to be on CPU (could happen as a result of `enable_model_cpu_offload()`). So, moving it to accelerator. After dequantization, will move the model back to CPU again to preserve the previous device.accelerator)devicetyper:   r;   hasattrr8   r{   current_acceleratorcudacurrent_devicer[   r   r    )r#   rR   is_model_on_cpur|   s       r'   _dequantizeGGUFQuantizer._dequantize   s    ,,++u4KK f
 5-00 !!557ZZ..0 
 HHV3E;V;VWHHUOr)   )r   r    r   )r2   dict[str, int | str]returnr   )r<   'torch.dtype'r   r   )r@   r   r   r   )
rR   'ModelMixin'rS    'GGUFParameter' | 'torch.Tensor'rG   strrT   zdict[str, Any]r   bool)NN)rR   r   rS   r   rG   r   r\   z'torch.device'rT   zdict[str, Any] | Noner]   zlist[str] | None)rR   r   rf   z	list[str])rR   r   )r   r   )__name__
__module____qualname____firstlineno__use_keep_in_fp32_modulesr   r.   r5   r=   rA   rO   rU   r`   rg   rk   propertyrp   rt   rw   r   __static_attributes____classcell__)r&   s   @r'   r   r   &   s   #H


 6 	
 # 
& -1,0II 6I 	I
 &I *I *I. +-	

 (	
        r)   r   )
__future__r   typingr   r   baser   models.modeling_utilsr	   utilsr
   r   r   r   r   r   r   r8   r   r   r   r   r   
get_loggerr   r:   r   rj   r)   r'   <module>r      sg    " % % 3   -//  
		H	%E& Er)   