
    3jeI                     .   S r SSKrSSKrSSKJr  SSKJr  SSKJ	r	J
r
  SSKJrJr  SSKJrJrJrJrJr  SS	KJrJr  SS
KJrJrJrJr  \R8                  " \5      rS\\\      \\   -  \-  S\\\      4S jr  " S S\SS9r!\ " S S\5      5       r"S/r#g)zImage processor class for Fuyu.    N)
functional   )TorchvisionBackend)BatchFeatureget_size_dict)group_images_by_shapereorder_images)
ImageInputPILImageResamplingSizeDictis_valid_imagemake_list_of_images)ImagesKwargsUnpack)
TensorTypeauto_docstringloggingrequires_backendsimagesreturnc                    [        U 5      (       a  U //$ [        U [        5      (       a  [        S U  5       5      (       a  U $ [        U [        5      (       a  U  Vs/ s H  n[	        U5      PM     sn$ [        S5      es  snf )Nc              3   B   #    U  H  n[        U[        5      v   M     g 7fN)
isinstancelist).0images     h/home/wildlama/miniconda3/lib/python3.13/site-packages/transformers/models/fuyu/image_processing_fuyu.py	<genexpr>.make_list_of_list_of_images.<locals>.<genexpr>1   s     'TVE
5$(?(?Vs   zHimages must be a list of list of images or a list of images or an image.)r   r   r   allr   
ValueError)r   r   s     r   make_list_of_list_of_imagesr#   +   su     fz&$C'TV'T$T$T&$8>?u#E*??
_
`` @s   A?c                   >    \ rS rSr% Sr\S-  \S'   \\S'   \\S'   Sr	g)FuyuImagesKwargs:   a  
patch_size (`dict[str, int]`, *optional*, defaults to `{"height": 30, "width": 30}`):
    Dictionary in the format `{"height": int, "width": int}` specifying the size of the patches.
padding_value (`float`, *optional*, defaults to 1.0):
    The value to pad the image with.
padding_mode (`str`, *optional*, defaults to "constant"):
    The padding mode to use when padding the image.
N
patch_sizepadding_valuepadding_mode )
__name__
__module____qualname____firstlineno____doc__r   __annotations__floatstr__static_attributes__r*       r   r%   r%   :   s     4r4   r%   F)totalc                     ^  \ rS rSrSrSSS.rSSS.r\R                  r	Sr
SrSrSrS	rS	rSrS
r/ SQr\rS\\   4U 4S jjr S5S\S\S\4S jjr  S6S\R6                  S\SSS\S\R6                  4
U 4S jjjrS\S   S\S\SSS\S\ S\S\ \\    -  S-  S\ \\    -  S-  S \S-  S!\ S-  S"\!S-  S#\S-  S$\!\"-  S-  S\#4S% jr$S7S&\S'\S(\S-  S\4S) jjr%S7S\R6                  S(\S-  S\R6                  4S* jjr& S7S+\R6                  S,\R6                  S-\R6                  S.\R6                  S/\S0\S1\S(\'\!\4   S-  S\#4S2 jjr( S7S(\'\!\4   \-  S-  S\'4U 4S3 jjjr)S4r*U =r+$ )8FuyuImageProcessorI   Ti8  i  heightwidth   g      ?constantg      ?gp?r   image_input_idsimage_patchesimage_patch_indices_per_batch#image_patch_indices_per_subsequencekwargsc                 &   > [         TU ]  " S0 UD6  g )Nr*   )super__init__)selfrC   	__class__s     r   rF   FuyuImageProcessor.__init__`   s    "6"r4   r   expected_ndimsr   c                 :    U R                  U5      n[        U5      $ r   )fetch_imagesr#   )rG   r   rJ   s      r   _prepare_images_structure,FuyuImageProcessor._prepare_images_structurec   s    
 ""6**622r4   Nr   sizeresamplez7PILImageResampling | tvF.InterpolationMode | int | None	antialiasc                   > Uc  [         R                  nUR                  SS u  pgUR                  UR                  pXy::  a  Xh::  a  U$ X-  n
X-  n[        X5      n[        Xl-  5      n[        X|-  5      n[        TU ]!  U[        XS9X4S9$ )av  
Resize an image to fit within `(size.height, size.width)` while maintaining aspect ratio.
Only resizes if the image is larger than the target size.
Args:
    image (`torch.Tensor`):
        Image to resize.
    size (`SizeDict`):
        Dictionary in the format `{"height": int, "width": int}` specifying the max size of the output image.
    resample (`PILImageResampling | tvF.InterpolationMode | int`, *optional*, defaults to `PILImageResampling.BILINEAR`):
        Resampling filter to use when resizing the image.
    antialias (`bool`, *optional*, defaults to `True`):
        Whether to apply antialiasing when resizing.
Nr9   )rP   rQ   )
r   BILINEARshaper:   r;   minintrE   resizer   )rG   r   rO   rP   rQ   rC   image_heightimage_widthtarget_heighttarget_widthheight_scale_factorwidth_scale_factoroptimal_scale_factor
new_height	new_widthrH   s                  r   rX   FuyuImageProcessor.resizek   s    * )22H$)KK$4!&*kk4::|&<+HL+:)7"#6K<=
:;	w~8:?(  
 	
r4   ztorch.Tensor	do_resize
do_rescalerescale_factordo_normalize
image_mean	image_stddo_padr(   r)   disable_groupingreturn_tensorsc           	         U Vs/ s H  nU(       d  M  US   R                   SS  PM      nn[        XSS9u  nn0 nUR                  5        H"  u  nnU(       a  U R                  UX4S9nUUU'   M$     [	        UUSS9nU Vs/ s H  nU(       d  M  US   R                   SS  PM      nnU Vs/ s H	  nUS   /PM     nnU Vs/ s H	  nUS   /PM     nn[        UU5       VVs/ s H  u  nnUS   US   -  /PM     nnnU
(       a  U R                  UUUUUSS9n[        UUSS9u  nn0 nUR                  5        H  u  nnU R                  UXVXxU	5      nUUU'   M!     [	        UUSS9n [        R                  " U  V!s/ s H"  n!U!(       d  M  [        R                  " U!5      PM$     sn!5      n"[        U"UUUS	.US
/S9$ s  snf s  snf s  snf s  snf s  snnf s  sn!f )Nr   rS   T)rj   	is_nested)r   rO   rP   )rm      )pad_size
fill_valuer)   rj   rm   )r   image_unpadded_heightsimage_unpadded_widthsimage_scale_factorsoverflowing_values)datatensor_typeskip_tensor_conversion)rU   r   itemsrX   r	   zippadrescale_and_normalizetorchstackr   )#rG   r   rc   rO   rP   rd   re   rf   rg   rh   ri   r(   r)   rj   rk   rC   batch_imageoriginal_image_sizesgrouped_imagesgrouped_images_indexresized_images_groupedrU   stacked_imagesresized_imagesimage_sizes
image_sizerq   rr   original_sizeresized_sizers   processed_images_groupedprocessed_imagesbatchimages_tensors#                                      r   _preprocessFuyuImageProcessor._preprocess   sQ   & NTcVkWb 9A 4 4RS 9Vc/D0
,, "$%3%9%9%;!E>!%>!`,:"5) &< ((>@T`deDRbN[Va0{1~++BC0NbDO!PKj:a=/K!PCN O;Z*Q-; O 033G/U
/U+| !_}Q//0/U 	 
 !XX()!1 & N 0E-=0
,, $& %3%9%9%;!E>!77
LV_N /=$U+ &< **BDXdhiEU$_EUEY^%7U[[%7EU$_`'*@)>':	 '$8#9	
 		
U  d c!P O
6 %`s4   F<F< GG+GG"G;G
GrY   rZ   r'   c                 <   UcA  [        U R                  [        5      (       a  U R                  nO[        S0 U R                  D6nUR                  UR                  pTX-  S:w  a  [        SU< SU 35      eX%-  S:w  a  [        SU< SU 35      eX-  nX%-  nXg-  nU$ )a:  
Calculate number of patches required to encode an image.
Args:
    image_height (`int`):
        Height of the image.
    image_width (`int`):
        Width of the image.
    patch_size (`SizeDict`, *optional*):
        Dictionary in the format `{"height": int, "width": int}` specifying the size of the patches.
r   zimage_height=z must be divisible by zimage_width=r*   )r   r'   r   r:   r;   r"   )	rG   rY   rZ   r'   patch_heightpatch_widthnum_patches_per_dim_hnum_patches_per_dim_wnum_patchess	            r   get_num_patches"FuyuImageProcessor.get_num_patches   s     $//844!__
%88
$.$5$5z7G7Gk&!+.D\NSTT$)~-CK=QRR , < + :+Cr4   c                    [        U S/5        UcA  [        U R                  [        5      (       a  U R                  nO[        S0 U R                  D6nUR                  UR
                  pCUR                  u  pV  nUR                  SX35      nUR                  SXD5      n	U	R                  5       n	U	R                  XVSX45      n	U	R                  SSSSS5      n	U	R                  USXc-  U-  5      n	U	$ )	a?  
Convert an image into a tensor of patches using PyTorch's unfold operation.
Args:
    image (`torch.Tensor`):
        Image to convert. Shape: [batch, channels, height, width]
    patch_size (`SizeDict`, *optional*):
        Dictionary in the format `{"height": int, "width": int}` specifying the size of the patches.
r|      r   r      rn   r*   )r   r   r'   r   r:   r;   rU   unfold
contiguousviewpermutereshape)
rG   r   r'   r   r   
batch_sizechannels_unfolded_along_heightpatchess
             r   patchify_image!FuyuImageProcessor.patchify_image   s     	$	*$//844!__
%88
$.$5$5z7G7Gk%*[["
a %Q K'..q+K$$&,,zRS//!Q1a0//*b(2IK2WXr4   image_inputimage_presentimage_unpadded_himage_unpadded_wimage_placeholder_idimage_newline_idvariable_sizedc	           
      `   [        U S/5        UcB  [        U R                  [        5      (       a  U R                  nO6[        S0 U R                  D6nO [        U[        5      (       d  [        S0 UD6nUR                  UR
                  p/ n/ n/ n[        UR                  S   5       GHb  n/ n/ n[        UR                  S   5       GH  nX.U4   (       Ga  XU4   nUR                  S   UR                  S   nnU(       af  [        U[        R                  " X>U4   U	-  5      U	-  5      n[        U[        R                  " XNU4   U
-  5      U
-  5      nUSS2SU2SU24   nUUnnU R                  UUUS9n[        R                  " U/U[        R                  UR                  S9nU R!                  UR#                  S5      US9R%                  S5      nUUR                  S   :X  d   eU(       a{  UR'                  S	UU
-  5      n[        R                  " UR                  S   S/U[        R                  UR                  S9n[        R(                  " UU/SS
9nUR'                  S	5      nUR+                  U/5        UR+                  U5        UR+                  U5        GM  UR+                  [        R,                  " / [        R                  UR                  S95        GM     UR+                  U5        UR+                  U5        GMe     / n/ nU GH  nSn/ n/ n U H  n!U!U:H  n"[        R.                  " U"5      n[        R0                  " U[        R2                  U!R                  S9R5                  U!5      n#[        R6                  " U!S	5      n$[        R6                  " U!S	5      n%[        R8                  " U"SS9S   n&U#U-   U$U&'   U#U%U&'   UR+                  U$5        U R+                  U%5        UU-  nM     UR+                  U5        UR+                  U 5        GM
     [;        UUUUUS.S9$ )a  
Process images for model input. In particular, variable-sized images are handled here.

Args:
    image_input (`torch.Tensor` of shape [batch_size, subsequence_size, num_channels, height, width]):
        Tensor of images padded to model input size.
    image_present (`torch.Tensor` of shape [batch_size, subsequence_size, num_images]):
        Tensor of 1s and 0s indicating whether an image is present.
    image_unpadded_h (`torch.Tensor` of shape [batch_size, subsequence_size]):
        Tensor of unpadded image heights.
    image_unpadded_w (`torch.Tensor` of shape [batch_size, subsequence_size]):
        Tensor of unpadded image widths.
    image_placeholder_id (int):
        The id of the image placeholder token. Comes from an associated tokenizer.
    image_newline_id (int):
        The id of the image newline token. Comes from an associated tokenizer.
    variable_sized (bool):
        Whether to process images as variable-sized.
    patch_size (`dict[str, int]`, *optional*):
        Size of the patches.
r|   Nr   rn   r   )rY   rZ   r'   )dtypedevice)r   r'   r   )dimT)as_tupler>   )ru   r*   )r   r   r'   r   r:   r;   rangerU   rV   mathceilr   r|   fullint32r   r   	unsqueezesqueezer   catappendtensorcount_nonzeroarangeint64type_as	full_likenonzeror   )'rG   r   r   r   r   r   r   r   r'   r   r   r   batch_image_patchesbatch_image_input_idsbatch_indexr?   r@   subseq_indexr   rY   rZ   new_hnew_wr   tensor_of_image_idsr   newline_idsrA   rB   sample_image_input_idsindex_offsetper_batch_indicesper_subsequence_indicessubseq_image_input_idspatches_maskindicesindices_in_stream_per_batch!indices_in_stream_per_subsequencepatches_indss'                                          r   preprocess_with_tokenizer_info1FuyuImageProcessor.preprocess_with_tokenizer_info  s1   @ 	$	*$//844!__
%88
J11!/J/J$.$5$5z7G7Gk+-8::< !2!21!56K OM %k&7&7&: ; l!:;;'\(ABE05AA+L% !$( II&6L7P&QT`&`adpp! !$' II&6L7P&QT_&_`cnn! !&a%%&7 8495k"&"6"6%1{Wa #7 #K +0**$';5;;WbWiWi+' #118JWa1bjjklmG&'--*::::%.A.I.I"k]hNh.i+&+jj066q91=,"'++#.#5#5	' /4ii9Lk8Z`a.b+.A.I.I".M+MM5'*#**+>?!((1#**5<<%++VaVhVh+ijU !<V "((9&&}5_ 7b CE%HJ+&;"L "&(#*@&59MM#11,?,,{%++NdNkNkltt* /4oo>TVX.Y+49OODZ\^4_1$}}\DI!L<Cl<R+L9BI1,?!(()DE'../PQ+# +A& *001BC/667NO1 '<2  #8!41N7Z
 	
r4   c           	         > [         TU ]  " S0 UD6nUb(  [        U[        5      (       d  [        S0 [	        USS9D6nXS'   U$ )z1
Process Fuyu-specific kwargs before validation.
r'   )
param_namer*   )rE   _standardize_kwargsr   r   r   )rG   r'   rC   rH   s      r   r   &FuyuImageProcessor._standardize_kwargs  sJ     ,6v6!*Z*J*J!WM*$VWJ)|r4   r*   )r   )NTr   ),r+   r,   r-   r.   rc   rO   r'   r   rT   rP   ri   r(   r)   rf   rg   rh   rd   re   model_input_namesr%   valid_kwargsr   rF   r
   rW   rM   r|   Tensorr   boolrX   r   r1   r2   r   r   r   r   r   dictr   r   r3   __classcell__)rH   s   @r   r7   r7   I   s   IT*D,J!**HFMLLJIJN $L#(8!9 #  33 3 
	3 OS&
||&
 &
 L	&

 &
 
&
 &
PF
^$F
 F
 	F

 LF
 F
 F
 F
 DK'$.F
 4;&-F
 tF
 t|F
 DjF
 +F
 j(4/F
" 
#F
PC c xZ^ jm 4ELL ho Y^YeYe H -1D
\\D
 ||D
  ,,	D

  ,,D
 "D
 D
 D
 cNT)D
 
D
P 8<cNX-4 
	 r4   r7   )$r/   r   r|   torchvision.transforms.v2r   tvFimage_processing_backendsr   image_processing_utilsr   r   image_transformsr   r	   image_utilsr
   r   r   r   r   processing_utilsr   r   utilsr   r   r   r   
get_loggerr+   loggerr   r#   r%   r7   __all__r*   r4   r   <module>r      s    &   7 ; A E  5  
		H	%aj!"T*%55
Ba	$z
a|5  X+ X Xv
  
 r4   