
    3j_E                        S SK Jr  S SKJrJr  S SKrS SKJs  Jr	  S SK
Jr  S SKJr  S SKJrJrJr  S SKJr  S SKJr  S S	KJr  S S
KJrJr  S SKJrJr   " S S5      rg)    )annotations)AnyOptionalN)LongestMaxSize)AugmentationSequential)KORNIA_CHECKKORNIA_CHECK_IS_TENSORKORNIA_CHECK_SHAPE)	normalize)Boxes)	Keypoints)Sam	SamConfig)PromptsSegmentationResultsc                    ^  \ rS rSrSr   S       SU 4S jjjr S       SS jjr\R                  " 5        S       SS jj5       r	SS jr
SS jrSS	 jrSS
.     SS jjr    S         SS jjr\R                  " 5             S             S S jj5       rS!S jrSSSSSSS.             S"S jjrSrU =r$ )#VisualPrompter#   a  Allow the user to run multiple query with multiple prompts for a model.

At the moment, we just support the SAM model. The model is loaded based on the given config.

For default the images are transformed to have their long side with size of the `image_encoder.img_size`. This
Prompter class ensure to transform the images and the prompts before prediction. Also, the image is passed
automatically for the method `preprocess_image`, which is responsible for F.normalize the image and F.pad it to have
the right size for the SAM model :math:`(\text{image_encoder.img_size}, \text{image_encoder.img_size})`. For
default the image is normalized by the mean and standard deviation of the SAM dataset values.

Args:
    config: A model config to generate the model. Now just the SAM model is supported.
    device: The desired device to use the model.
    dtype: The desired dtype to use the model.

Example:
    >>> # prompter = VisualPrompter() # Will load the vit h for default
    >>> # You can load a custom SAM type for modifying the config
    >>> prompter = VisualPrompter(SamConfig('vit_b'))
    >>> image = torch.rand(3, 25, 30)
    >>> prompter.set_image(image)
    >>> boxes = Boxes(
    ...    torch.tensor(
    ...         [[[[0, 0], [0, 10], [10, 0], [10, 10]]]],
    ...         device=prompter.device,
    ...         dtype=torch.float32
    ...    ),
    ...    mode='xyxy'
    ... )
    >>> prediction = prompter.predict(boxes=boxes)
    >>> prediction.logits.shape
    torch.Size([1, 3, 256, 256])

Nc                6  > [         TU ]  5         Uc
  [        SSS9n[        U[        5      (       a  [        R
                  " U5      U l        [        U R                  R                  R                  SS94n[        R                  " / SQX#S9S-  U l        [        R                  " / S	QX#S9S-  U l        O[        eU R                  R                  X#S9U l        [!        US
S06U l        X l        X0l        S U l        S U l        S U l        U R/                  5         g )Nvit_hT)
model_type
pretrainedg      ?)p)g33333^@gR]@gRY@)devicedtype   )g(\2M@g(\L@g     L@same_on_batch)super__init__r   
isinstancer   from_configmodelr   image_encoderimg_sizetorchtensor
pixel_mean	pixel_stdNotImplementedErrortor   
transformsr   r   _original_image_size_input_image_size_input_encoder_sizereset_image)selfconfigr   r   r+   	__class__s        X/home/wildlama/miniconda3/lib/python3.13/site-packages/kornia/contrib/visual_prompter.pyr   VisualPrompter.__init__G   s     	>'dCFfi((0DJ()A)A)J)JcRTJ6vSVYY O 4VQTWW N &%ZZ]]&]>
0*QDQ
<@!9=;?     c                :   [        U[        R                  5      (       a,  [        U[        R                  5      (       a  [        XU5      nOr[        U R                  [        R                  5      (       aI  [        U R
                  [        R                  5      (       a   [        XR                  U R
                  5      nU R                  R                  R                  nXAR                  S   -
  nXAR                  S   -
  n[        R                  " USUSU45      nU$ )a  Normalize and F.pad a torch.Tensor.

For F.normalize the tensor: will prioritize the `mean` and `std` passed as argument,
if None will use the default
Sam Dataset values.

For F.pad the tensor: Will F.pad the torch.Tensor into the right and bottom to match with the size of
`self.model.image_encoder.img_size`

Args:
    x: The image to be preprocessed
    mean: Mean for each channel.
    std: Standard deviations for each channel.

Returns:
    The image preprocessed (normalized if has mean and str available and padded to encoder size)

r   )r    r%   Tensorkornia_normalizer'   r(   r"   r#   r$   shapeFpad)r0   xmeanstdencoder_im_sizepad_hpad_ws          r3   preprocess_imageVisualPrompter.preprocess_imageg   s    * dELL))jell.K.K #.A66:dnnV[VbVb;c;c OOT^^DA**22;;''"+-''"+-EE!a5)*r5   c                <   UR                   S:X  a   [        U/ SQ5        UR                  S5      nO[        U/ SQ5        U R                  5         UR                  S   UR                  S   4U l        U R                  US/S9nU R                  R                  U l        UR                  S   UR                  S   4U l	        U R                  XU5      nUR                  S   UR                  S   4U l        U R                  R                  U5      U l        S	U l        g
)a  Set the embeddings from the given image with `image_decoder` of the model.

Prepare the given image with the selected transforms and the preprocess method.

Args:
    image: RGB image. Normally images with range of [0-1], the model preprocess F.normalize the
           pixel values with the mean and std defined in its initialization. Expected to be into a float32
           dtype. Shape :math:`(3, H, W)`.
    mean: mean value of dataset for normalization.
    std: standard deviation of dataset for normalization.

   )3HWr   )BrH   rI   rJ   r7   r8   input	data_keysTN)ndimr
   	unsqueezer/   r;   r,   r+   _params_tfs_paramsr-   rD   r.   r"   r#   image_embeddingsis_image_set)r0   imager?   r@   s       r3   	set_imageVisualPrompter.set_image   s      ::?uo6OOA&Eu&:;%*[[_ekk"o$F!';??22"'++b/5;;r?!C%%e37$)KKOU[[_#E  $

 8 8 ? r5   c                "   [        UR                  / SQ5        [        UR                  SS/5        [        UR                  S   UR                  S   :H  S5        [	        U[
        R                  5      (       a  [        R                  " U5      nU$ )z:Validate the keypoints shape and ensure to be a Keypoints.)KN2rY   rZ   r   z8The keypoints and labels should have the same batch size)	r
   datar   r;   r    r%   r9   r   from_tensor)r0   	keypointslabelss      r3   _valid_keypointsVisualPrompter._valid_keypoints   sj    9>>?;6;;c
3Y__Q'6<<?:<vwi..!--i8Ir5   c                \   [        U[        R                  5      (       aV  UR                  S:X  a  [	        UR
                  SS/5        O[	        UR
                  / SQ5        [        R                  " USS9nUR                  S:X  a  UnU$ [        R                  " UR                  SS9SS9nU$ )zAValidate the boxes shape and ensure to be a Boxes into xyxy mode.   rY   4)rK   rY   rd   xyxymode)
r    r%   r9   rO   r
   r\   r   r]   rg   	to_tensor)r0   boxes
boxes_xyxys      r3   _valid_boxesVisualPrompter._valid_boxes   s    eU\\**zzQ"5::Sz:"5::?%%e&9E::J  **5???+GfUJr5   c                "    [        U/ SQ5        U$ )zValidate the input masks shape.)rY   1256ro   )r
   )r0   maskss     r3   _valid_masksVisualPrompter._valid_masks   s    5":;r5   rM   c                   U R                   " X!U R                  S.6nUc  / n[        U[        [        45      (       d  U/n[        U5       VVs0 s H
  u  pEXSU   _M     snn$ s  snnf )N)rN   params)r+   rR   r    listtuple	enumerate)r0   rN   promptstransformed_promptsidxkeys         r3   _transform_prompts!VisualPrompter._transform_prompts   sm     #oowTXTdTdeI -e}==#6"7>G	>RS>R(#-->RSSSs   A#c                z   / n/ n[        U[        [        R                  45      (       aR  [        U[        R                  5      (       a3  U R	                  X5      nUR                  S5        UR                  U5        [        U[        [        R                  45      (       a3  U R                  U5        UR                  S5        UR                  U5        [        U[        R                  5      (       a  U R                  U5        U R                  " USU06nSU;   aO  [        US   [        5      (       a7  US   R                  5       n[        U5      (       a  [        U5      (       a  X4n	OSn	SU;   a=  [        US   [        5      (       a%  US   R                  SS9n
[        U
5      (       a  U
nOSn[        W	WUS9$ )zMValidate and preprocess the given prompts to be aligned with the input image.r^   	bbox_xyxyrN   Nre   rf   pointsri   rp   )r    r   r%   r9   r`   appendr   rk   rq   r|   rh   r	   r   )r0   r^   keypoints_labelsri   rp   rN   to_transformr\   kpts_tensorr   _bboxbboxs               r3   preprocess_prompts!VisualPrompter.preprocess_prompts   s    	?Ai)U\\!:;;
K[]b]i]i@j@j--iJI[)	*eeU\\233e$[)&eU\\**e$&&J	J$:d;.?#K#K{+557K%k227MN^7_7_%8F$:d;.?#G#G%//V/<E%e,,DfD>>r5   c                f   [        U R                  S5        U R                  XX45      nU R                  R	                  UR
                  UR                  UR                  S9u  pAU R                  R                  U R                  U R                  R                  R                  5       UU	US9u  p[        X5      nU(       ao  [        U R                  [        5      (       aP  [        U R                  [        5      (       a1  UR!                  U R                  U R                  U R"                  5        U$ )a  Predict masks for the given image based on the input prompts.

Args:
    keypoints: Point prompts to the model. Each point is in (X,Y) in pixels. Shape :math:`(K, N, 2)`. Where
               `N` is the number of points and `K` the number of prompts.
    keypoints_labels: Labels for the point prompts. 1 indicates a foreground point and 0 indicates a background
                     point. Shape :math:`(K, N)`. Where `N` is the number of points, and `K` the number of
                     prompts.
    boxes: A box prompt to the model. If a torch.Tensor, should be in a xyxy mode. Shape :math:`(K, 4)`
    masks: A low resolution mask input to the model, typically coming from a previous prediction
           iteration. Has shape :math:`(K, 1, H, W)`, where for SAM, H=W=256.
    multimask_output: If true, the model will return three masks. For ambiguous input prompts (such as a
                      single click), this will often produce better masks than a single prediction. If only
                      a single mask is needed, the model's predicted quality score can be used to select the
                      best mask. For non-ambiguous prompts, such as multiple input prompts,
                      multimask_output=False can give better results.
    output_original_size: If true, the logits of `SegmentationResults` will be post-process to match the
                          original input image size.

Returns:
    A prediction with the logits and scores (IoU of each predicted mask)

zKAn image must be set with `self.set_image(...)` before `predict` be called!r   )rS   image_pesparse_prompt_embeddingsdense_prompt_embeddingsmultimask_output)r   rT   r   r"   prompt_encoderr   ri   rp   mask_decoderrS   get_dense_per   r    r-   rv   r,   original_res_logitsr.   )r0   r^   r   ri   rp   r   output_original_sizerx   sparse_embeddingsdense_embeddingslogitsscoresresultss                r3   predictVisualPrompter.predict  s   B 	T&&(uv)))uT /3jj.G.G>>gmm /H /
+  00!22ZZ..;;=%6$4- 1 
 &f5 411599444e<<''(>(>@Y@Y[_[s[st r5   c                ~    SU l         SU l        SU l        SU l        [	        U S5      (       a  U ?SU l        SU l        g)a|  Clear cached image state and prompt-transform metadata.

This method invalidates previously computed image embeddings and resets all
size/transform bookkeeping so a new call to :meth:`set_image` starts from a
clean state.

In practice, this resets:
- transformed-image parameters,
- original/input/encoder spatial sizes,
- cached image embeddings,
- ``is_image_set`` status flag.
NrS   F)rR   r,   r-   r.   hasattrrS   rT   )r0   s    r3   r/   VisualPrompter.reset_imageC  sH      $(!!%#' 4+,,% $!r5   Finductor	fullgraphdynamicbackendrg   optionsdisablec          
     r   [         R                  " U R                  R                  UUUUUUS9U R                  l        [         R                  " U R                  R                  UUUUUUS9U R                  l        [         R                  " U R                  R
                  UUUUUUS9U R                  l        g)u  Apply `torch.compile(...)`/dynamo API into the VisualPrompter API.

.. note:: For more information about the dynamo API check the official docs
          https://pytorch.org/docs/stable/generated/torch.compile.html

Args:
    fullgraph: Whether it is ok to break model into several subgraphs
    dynamic: Use dynamic shape tracing
    backend: backend to be used
    mode: Can be either “default”, “reduce-overhead” or “max-autotune”
    options: A dictionary of options to pass to the backend.
    disable: Turn torch.compile() into a no-op for testing

Example:
    >>> # prompter = VisualPrompter()
    >>> # prompter.compile() # You should have torch >= 2.0.0 installed
    >>> # Use the prompter methods ...

r   N)r%   compiler"   r#   r   r   )r0   r   r   r   rg   r   r   s          r3   r   VisualPrompter.compile[  s    N $)==JJ$$$


 > #(--JJ###


 %*MMJJ%%%


!r5   )r.   r-   r,   rR   r   r   rS   rT   r"   r'   r(   r+   )NNN)r1   zOptional[SamConfig]r   zOptional[torch.device]r   zOptional[torch.dtype]returnNone)NN)r>   torch.Tensorr?   Optional[torch.Tensor]r@   r   r   r   )rU   r   r?   r   r@   r   r   r   )r^   zKeypoints | torch.Tensorr_   r   r   r   )ri   zBoxes | torch.Tensorr   r   )rp   r   r   r   )rx   z torch.Tensor | Boxes | KeypointsrN   zOptional[list[str]]r   z+dict[str, torch.Tensor | Boxes | Keypoints])NNNN)
r^   "Optional[Keypoints | torch.Tensor]r   r   ri   Optional[Boxes | torch.Tensor]rp   r   r   r   )NNNNTT)r^   r   r   r   ri   r   rp   r   r   boolr   r   r   r   )r   r   )r   r   r   r   r   strrg   zOptional[str]r   zOptional[dict[Any, Any]]r   r   r   r   )__name__
__module____qualname____firstlineno____doc__r   rD   r%   no_gradrV   r`   rk   rq   r|   r   r   r/   r   __static_attributes____classcell__)r2   s   @r3   r   r   #   s   !J '+)-'+	# ' %	
 
 B cg%;I_	B ]]_fj"!!"!)?"!Mc"!	"! "!H	" \`
T8
TEX
T	4
T 9=3704(,(?5(? 1(? .	(?
 &(? 
(?T ]]_ 9=3704(,!%%)<5< 1< .	<
 &< < #< 
< <|"6  !",0W
 W
 	W

 W
 W
 *W
 W
 
W
 W
r5   r   ) 
__future__r   typingr   r   r%   torch.nn.functionalnn
functionalr<   kornia.augmentationr   %kornia.augmentation.container.augmentr   kornia.core.checkr   r	   r
   kornia.enhancer   r:   kornia.geometry.boxesr   kornia.geometry.keypointsr   kornia.models.samr   r   kornia.models.structuresr   r   r    r5   r3   <module>r      s=   $ #      . H V V 8 ' / , AO
 O
r5   