
    
3j)                     f   S SK Jr  S SKJr  S SKJr  S SKrS SKrS SK	J
r
  S SKJr  S SKJrJr  SS	KJr  SS
KJrJr  SSKJrJr  SSKJrJrJrJr  SSKJr  SSKJ r   SSK!J"r"  SSK#J$r$  \" 5       (       a  S SK%J&s  J'r(  Sr)OSr)\RT                  " \+5      r,Sr-\ " S S\5      5       r. " S S\"5      r/g)    )	dataclass)partial)AnyN)Image)tqdm)CLIPTextModelCLIPTokenizer   )PipelineImageInput)AutoencoderKLUNet2DConditionModel)DDIMSchedulerLCMScheduler)
BaseOutputis_torch_xla_availableloggingreplace_example_docstring)is_scipy_available)randn_tensor   )DiffusionPipeline   )MarigoldImageProcessorTFaE  
Examples:
```py
>>> import diffusers
>>> import torch

>>> pipe = diffusers.MarigoldDepthPipeline.from_pretrained(
...     "prs-eth/marigold-depth-v1-1", variant="fp16", torch_dtype=torch.float16
... ).to("cuda")

>>> image = diffusers.utils.load_image("https://marigoldmonodepth.github.io/images/einstein.jpg")
>>> depth = pipe(image)

>>> vis = pipe.image_processor.visualize_depth(depth.prediction)
>>> vis[0].save("einstein_depth.png")

>>> depth_16bit = pipe.image_processor.export_depth_to_16bit_png(depth.prediction)
>>> depth_16bit[0].save("einstein_depth_16bit.png")
```
c                       \ rS rSr% Sr\R                  \R                  -  \	S'   S\R                  -  \R                  -  \	S'   S\R                  -  \	S'   Sr
g)MarigoldDepthOutputR   u7  
Output class for Marigold monocular depth prediction pipeline.

Args:
    prediction (`np.ndarray`, `torch.Tensor`):
        Predicted depth maps with values in the range [0, 1]. The shape is `numimages × 1 × height × width` for
        `torch.Tensor` or `numimages × height × width × 1` for `np.ndarray`.
    uncertainty (`None`, `np.ndarray`, `torch.Tensor`):
        Uncertainty maps computed from the ensemble, with values in the range [0, 1]. The shape is `numimages × 1 ×
        height × width` for `torch.Tensor` or `numimages × height × width × 1` for `np.ndarray`.
    latent (`None`, `torch.Tensor`):
        Latent features corresponding to the predictions, compatible with the `latents` argument of the pipeline.
        The shape is `numimages * numensemble × 4 × latentheight × latentwidth`.

predictionNuncertaintylatent )__name__
__module____qualname____firstlineno____doc__npndarraytorchTensor__annotations____static_attributes__r        n/home/wildlama/miniconda3/lib/python3.13/site-packages/diffusers/pipelines/marigold/pipeline_marigold_depth.pyr   r   R   sC     

U\\))

"U\\115<<r,   r   c            !         ^  \ rS rSrSrSrSr     S/S\S\S\	\
-  S	\S
\S\S-  S\S-  S\S-  S\S-  S\S-  4U 4S jjjrS\S\S\S\S\S\S\S\\\4   S-  S\R*                  S-  S\R,                  \\R,                     -  S-  S\S\S\4S jr\R2                  R4                  S0S j5       r\R8                  " 5       \" \5                    S1S\S\S-  S\S\S-  S \S\S\S\S\\\4   S-  S\R*                  \\R*                     -  S-  S\R,                  \\R,                     -  S-  S\S\S!\S"\4S# jj5       5       rS\R*                  S\R*                  S-  S\R,                  S-  S\S\S\ \R*                  \R*                  4   4S$ jr!S%\R*                  S\R*                  4S& jr"\#        S2S'\R*                  S\S\S\S(\S)\$S*\S+\$S,\S\ \R*                  \R*                  S-  4   4S- jj5       r%S.r&U =r'$ )3MarigoldDepthPipelineh   a
  
Pipeline for monocular depth estimation using the Marigold method: https://marigoldmonodepth.github.io.

This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)

Args:
    unet (`UNet2DConditionModel`):
        Conditional U-Net to denoise the depth latent, conditioned on image latent.
    vae (`AutoencoderKL`):
        Variational Auto-Encoder (VAE) Model to encode and decode images and predictions to and from latent
        representations.
    scheduler (`DDIMScheduler` or `LCMScheduler`):
        A scheduler to be used in combination with `unet` to denoise the encoded image latents.
    text_encoder (`CLIPTextModel`):
        Text-encoder, for empty text embedding.
    tokenizer (`CLIPTokenizer`):
        CLIP tokenizer.
    prediction_type (`str`, *optional*):
        Type of predictions made by the model.
    scale_invariant (`bool`, *optional*):
        A model property specifying whether the predicted depth maps are scale-invariant. This value must be set in
        the model config. When used together with the `shift_invariant=True` flag, the model is also called
        "affine-invariant". NB: overriding this value is not supported.
    shift_invariant (`bool`, *optional*):
        A model property specifying whether the predicted depth maps are shift-invariant. This value must be set in
        the model config. When used together with the `scale_invariant=True` flag, the model is also called
        "affine-invariant". NB: overriding this value is not supported.
    default_denoising_steps (`int`, *optional*):
        The minimum number of denoising diffusion steps that are required to produce a prediction of reasonable
        quality with the given model. This value must be set in the model config. When the pipeline is called
        without explicitly setting `num_inference_steps`, the default value is used. This is required to ensure
        reasonable results with various model flavors compatible with the pipeline, such as those relying on very
        short denoising schedules (`LCMScheduler`) and those with full diffusion schedules (`DDIMScheduler`).
    default_processing_resolution (`int`, *optional*):
        The recommended value of the `processing_resolution` parameter of the pipeline. This value must be set in
        the model config. When the pipeline is called without explicitly setting `processing_resolution`, the
        default value is used. This is required to ensure reasonable results with various model flavors trained
        with varying optimal processing resolution values.
ztext_encoder->unet->vae)depth	disparityNunetvae	schedulertext_encoder	tokenizerprediction_typescale_invariantshift_invariantdefault_denoising_stepsdefault_processing_resolutionc                   > [         TU ]  5         X`R                  ;  a&  [        R	                  SU SU R                   S35        U R                  UUUUUS9  U R                  UUUU	U
S9  [        U SS 5      (       a/  S[        U R                  R                  R                  5      S-
  -  OS	U l        Xpl        Xl        Xl        Xl        S U l        [%        U R                  S
9U l        g )Nz*Potentially unsupported `prediction_type='z&'`; values supported by the pipeline: .)r3   r4   r5   r6   r7   )r8   r9   r:   r;   r<   r4   r   r      )vae_scale_factor)super__init__supported_prediction_typesloggerwarningregister_modulesregister_to_configgetattrlenr4   configblock_out_channelsr@   r9   r:   r;   r<   empty_text_embeddingr   image_processor)selfr3   r4   r5   r6   r7   r8   r9   r:   r;   r<   	__class__s              r-   rB   MarigoldDepthPipeline.__init__   s     	"A"AANN<_<MMs22316
 	% 	 	
 	+++$;*G 	  	
 W^^bdikoVpVpc$((//*L*L&MPQ&Q Rvw..'>$-J*$(!5tG\G\]r,   imagenum_inference_stepsensemble_sizeprocessing_resolutionresample_method_inputresample_method_output
batch_sizeensembling_kwargslatents	generatoroutput_typeoutput_uncertaintyreturnc           
      	  ^
 S[        U R                  R                  R                  5      S-
  -  nXR                  :w  a  [        SU R                   SU S35      eUc  [        S5      eUS:  a  [        S5      eUS:  a  [        S5      eUS:X  a  [        R                  S	5        US:  a<  U R                  (       d  U R                  (       a  [        5       (       d  [        S
5      eUS:X  a  U(       a  [        S5      eUc  [        S5      eUS:  a  [        S5      eX@R                  -  S:w  a  [        SU R                   S35      eUS;  a  [        S5      eUS;  a  [        S5      eUS:  a  [        S5      eUS;  a  [        S5      eU	b  T
b  [        S5      eUb:  [        U[        5      (       d  [        S5      eSU;   a  US   S;  a  [        S5      eSnSu  nn[        U[        5      (       d  U/n[        U5       GH  u  nn[        U[         R"                  5      (       d  [$        R&                  " U5      (       a`  UR(                  S;  a  [        SU SUR*                   S35      eUR*                  S S  u  nnSnUR(                  S!:X  a  UR*                  S   nOL[        U[,        R,                  5      (       a  UR.                  u  nnSnO[        S"U S#[1        U5       S35      eUc  UUnnO"UU4UU4:w  a  [        S$U S%UU4 S&UU4 35      eUU-  nGM     U	Gb%  [$        R&                  " U	5      (       d  [        S'5      eU	R3                  5       S!:w  a  [        S(U	R*                   S35      eUS:  a=  [5        UU5      nUU-  U-  nX-  U-  nUS:X  d  US:X  a  [        S)U S*U S+35      eUUnnXR                  -   S-
  U R                  -  nUU R                  -   S-
  U R                  -  nX-  U R                  R                  R6                  UU4nU	R*                  U:w  a  [        S,U	R*                   S-U S35      eT
b  [        T
[        5      (       aD  [        T
5      X-  :w  a  [        S.5      e[9        U
4S/ jT
 5       5      (       d  [        S05      e U$ [        T
[$        R:                  5      (       d  [        S1[1        T
5       S35      eU$ )2Nr   r   z/`vae_scale_factor` computed at initialization (z) differs from the actual one (z).zW`num_inference_steps` is not specified and could not be resolved from the model config.z'`num_inference_steps` must be positive.z!`ensemble_size` must be positive.zk`ensemble_size` == 2 results are similar to no ensembling (1); consider increasing the value to at least 3.z9Make sure to install scipy if you want to use ensembling.zpComputing uncertainty by setting `output_uncertainty=True` also requires setting `ensemble_size` greater than 1.zY`processing_resolution` is not specified and could not be resolved from the model config.r   zx`processing_resolution` must be non-negative: 0 for native resolution, or any positive value for downsampled processing.z.`processing_resolution` must be a multiple of r>   )nearestnearest-exactbilinearbicubicareazy`resample_method_input` takes string values compatible with PIL library: nearest, nearest-exact, bilinear, bicubic, area.zz`resample_method_output` takes string values compatible with PIL library: nearest, nearest-exact, bilinear, bicubic, area.z`batch_size` must be positive.)ptr&   z*`output_type` must be one of `pt` or `np`.z2`latents` and `generator` cannot be used together.z)`ensembling_kwargs` must be a dictionary.	reductionmeanmedianzF`ensembling_kwargs['reduction']` can be either `'mean'` or `'median'`.)NN)r   r
      z`image[z(]` has unsupported dimensions or shape: ri   zUnsupported `image[z	]` type: zInput `image[z]` has incompatible dimensions z with the previous images z!`latents` must be a torch.Tensor.z/`latents` has unsupported dimensions or shape: z*Extreme aspect ratio of the input image: [z x ]z`latents` has unexpected shape=z
 expected=z^The number of generators must match the total number of ensemble members for all input images.c              3      >#    U  H4  oR                   R                  TS    R                   R                  :H  v   M6     g7f)r   N)devicetype).0grZ   s     r-   	<genexpr>5MarigoldDepthPipeline.check_inputs.<locals>.<genexpr>A  s,     Xi88==IaL,?,?,D,DDis   <?z;`generator` device placement is not consistent in the list.zUnsupported generator type: )rI   r4   rJ   rK   r@   
ValueErrorrD   rE   r9   r:   r   ImportError
isinstancedictlist	enumerater&   r'   r(   	is_tensorndimshaper   sizern   dimmaxlatent_channelsall	Generator)rN   rQ   rR   rS   rT   rU   rV   rW   rX   rY   rZ   r[   r\   actual_vae_scale_factor
num_imagesWHiimgH_iW_iN_imax_orignew_Hnew_Wwhshape_expecteds             `                 r-   check_inputs"MarigoldDepthPipeline.check_inputs   s_    #$DHHOO,N,N(ORS(S"T"&;&;;A$BWBWAXXw  yP  xQ  QS  T  &vww"FGG1@AAANN? 1$"6"6$:N:NXjXlXlYZZA"4"  !(k  !1$*  !#8#88A=MdNcNcMddefgg (ccC  ")ddC  >=>>l*IJJ9#8QRR(/66 !LMM//4Ek4RZl4l !ijj 
1%&&GE&FAs#rzz**eooc.B.B889,$wqc1YZ]ZcZcYdde%fgg99RS>S88q=))A,CC--88S #6qc49+Q!OPPyC11QC:% #A3&EsCj\Qkmnpqlrkst  #J' ', ??7++ !DEE{{}! #RSZS`S`Raab!cdd$q(q!911X=1X=A:!$'QRSQTTWXYWZZ[%\]]e1***Q.43H3HHAT***Q.43H3HHA(8$((//:Y:Y[\^_`N}}. #B7==/Q[\j[kkl!mnn  )T**y>Z%??$x  XiXXX$%bcc Y
   	5??;; #?Y?PPQ!RSSr,   c                    [        U S5      (       d  0 U l        OA[        U R                  [        5      (       d"  [	        S[        U R                  5       S35      e[        S0 U R                  D6nUR                  SU5      US'   UR                  SU5      US'   Ub  [        U40 UD6$ Ub  [        SSU0UD6$ [	        S5      e)	N_progress_bar_configz=`self._progress_bar_config` should be of type `dict`, but is r>   descleavetotalz/Either `total` or `iterable` has to be defined.r    )hasattrr   ru   rv   rs   rn   getr   )rN   iterabler   r   r   progress_bar_configs         r-   progress_bar"MarigoldDepthPipeline.progress_barH  s    t344(*D%D55t<<OPTUYUnUnPoOppqr  #?T%>%>?&9&=&=fd&KF#':'>'>w'NG$8$788;e;':;;NOOr,   match_input_resolutionoutput_latentreturn_dictc                    U R                   nU R                  nUc  U R                  nUc  U R                  nU R	                  UUUUUUUU	U
UUU5      nU R
                  c]  SnU R                  USU R                  R                  SSS9nUR                  R                  U5      nU R                  U5      S   U l        U R                  R                  XUUU5      u  nnnU R                  XXU5      u  nnAU R
                  R                  UUS9R                  US	S	5      n/ nU R                  [!        SUU-  U5      SS
S9 H  nUUUU-    nUUUU-    nUR"                  S   nUSU n U R$                  R'                  UUS9  U R                  U R$                  R(                  SSS9 Hw  n![*        R,                  " UU/S	S9n"U R/                  U"U!U SS9S   n#U R$                  R1                  U#U!UUS9R2                  n[4        (       d  Mb  [6        R8                  " 5         My     UR;                  U5        M     [*        R,                  " USS9nAAAAAA A"A#[*        R,                  " [!        SUR"                  S   U5       Vs/ s H  nU R=                  UUUU-    5      PM     snSS9n$U(       d  SnU R                  R?                  U$U5      n$Sn%US	:  a  U$R@                  " UU/U$R"                  S	S Q76 n$[!        U5       Vs/ s H9  nU RB                  " U$U   U RD                  U RF                  U40 U	=(       d    0 D6PM;     n$n[I        U$6 u  n$n%[*        R,                  " U$SS9n$U(       a  [*        R,                  " U%SS9n%OSn%U(       aB  U R                  RK                  U$UUSS9n$U%b#  U(       a  U R                  RK                  U%UUSS9n%US:X  a@  U R                  RM                  U$5      n$U%b"  U(       a  U R                  RM                  U%5      n%U RO                  5         U(       d  U$U%U4$ [Q        U$U%US9$ s  snf s  snf )aA  
Function invoked when calling the pipeline.

Args:
    image (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`, `list[PIL.Image.Image]`, `list[np.ndarray]`),
        `list[torch.Tensor]`: An input image or images used as an input for the depth estimation task. For
        arrays and tensors, the expected value range is between `[0, 1]`. Passing a batch of images is possible
        by providing a four-dimensional array or a tensor. Additionally, a list of images of two- or
        three-dimensional arrays or tensors can be passed. In the latter case, all list elements must have the
        same width and height.
    num_inference_steps (`int`, *optional*, defaults to `None`):
        Number of denoising diffusion steps during inference. The default value `None` results in automatic
        selection.
    ensemble_size (`int`, defaults to `1`):
        Number of ensemble predictions. Higher values result in measurable improvements and visual degradation.
    processing_resolution (`int`, *optional*, defaults to `None`):
        Effective processing resolution. When set to `0`, matches the larger input image dimension. This
        produces crisper predictions, but may also lead to the overall loss of global context. The default
        value `None` resolves to the optimal value from the model config.
    match_input_resolution (`bool`, *optional*, defaults to `True`):
        When enabled, the output prediction is resized to match the input dimensions. When disabled, the longer
        side of the output will equal to `processing_resolution`.
    resample_method_input (`str`, *optional*, defaults to `"bilinear"`):
        Resampling method used to resize input images to `processing_resolution`. The accepted values are:
        `"nearest"`, `"nearest-exact"`, `"bilinear"`, `"bicubic"`, or `"area"`.
    resample_method_output (`str`, *optional*, defaults to `"bilinear"`):
        Resampling method used to resize output predictions to match the input resolution. The accepted values
        are `"nearest"`, `"nearest-exact"`, `"bilinear"`, `"bicubic"`, or `"area"`.
    batch_size (`int`, *optional*, defaults to `1`):
        Batch size; only matters when setting `ensemble_size` or passing a tensor of images.
    ensembling_kwargs (`dict`, *optional*, defaults to `None`)
        Extra dictionary with arguments for precise ensembling control. The following options are available:
        - reduction (`str`, *optional*, defaults to `"median"`): Defines the ensembling function applied in
          every pixel location, can be either `"median"` or `"mean"`.
        - regularizer_strength (`float`, *optional*, defaults to `0.02`): Strength of the regularizer that
          pulls the aligned predictions to the unit range from 0 to 1.
        - max_iter (`int`, *optional*, defaults to `2`): Maximum number of the alignment solver steps. Refer to
          `scipy.optimize.minimize` function, `options` argument.
        - tol (`float`, *optional*, defaults to `1e-3`): Alignment solver tolerance. The solver stops when the
          tolerance is reached.
        - max_res (`int`, *optional*, defaults to `None`): Resolution at which the alignment is performed;
          `None` matches the `processing_resolution`.
    latents (`torch.Tensor`, or `list[torch.Tensor]`, *optional*, defaults to `None`):
        Latent noise tensors to replace the random initialization. These can be taken from the previous
        function call's output.
    generator (`torch.Generator`, or `list[torch.Generator]`, *optional*, defaults to `None`):
        Random number generator object to ensure reproducibility.
    output_type (`str`, *optional*, defaults to `"np"`):
        Preferred format of the output's `prediction` and the optional `uncertainty` fields. The accepted
        values are: `"np"` (numpy array) or `"pt"` (torch tensor).
    output_uncertainty (`bool`, *optional*, defaults to `False`):
        When enabled, the output's `uncertainty` field contains the predictive uncertainty map, provided that
        the `ensemble_size` argument is set to a value above 2.
    output_latent (`bool`, *optional*, defaults to `False`):
        When enabled, the output's `latent` field contains the latent codes corresponding to the predictions
        within the ensemble. These codes can be saved, modified, and used for subsequent calls with the
        `latents` argument.
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether or not to return a [`~pipelines.marigold.MarigoldDepthOutput`] instead of a plain tuple.

Examples:

Returns:
    [`~pipelines.marigold.MarigoldDepthOutput`] or `tuple`:
        If `return_dict` is `True`, [`~pipelines.marigold.MarigoldDepthOutput`] is returned, otherwise a
        `tuple` is returned where the first element is the prediction, the second element is the uncertainty
        (or `None`), and the third is the latent (or `None`).
N 
do_not_padTrd   )padding
max_length
truncationreturn_tensorsr   )rm   dtyper   zMarigold predictions...)r   r   )rm   FzDiffusion steps...r}   )encoder_hidden_statesr   )rZ   )is_aar&   )r   r   r   ))_execution_devicer   r;   r<   r   rL   r7   model_max_length	input_idstor6   rM   
preprocessprepare_latentsrepeatr   ranger{   r5   set_timesteps	timestepsr(   catr3   stepprev_sampleXLA_AVAILABLExm	mark_stepappenddecode_predictionunpad_imagereshapeensemble_depthr9   r:   zipresize_antialiaspt_to_numpymaybe_free_model_hooksr   )&rN   rQ   rR   rS   rT   r   rU   rV   rW   rX   rY   rZ   r[   r\   r   r   rm   r   r   prompttext_inputstext_input_idsr   original_resolutionimage_latentpred_latentbatch_empty_text_embeddingpred_latentsr   batch_image_latentbatch_pred_latenteffective_batch_sizetexttbatch_latentnoiser   r   s&                                         r-   __call__MarigoldDepthPipeline.__call__[  s   t ''

 &"&">"> ($($F$F! &&!!"

" $$,F..$>>::# ) K )2255f=N(,(9(9.(I!(LD% /3.B.B.M.M*?/
+w+ %)$8$8Ij%
!k %)%>%>%A%AW\%A%]%d%d1&
" ""!Z-/<DOh # 
A ".a!j.!A +AJ ?#5#;#;A#> -.C/CDDNN(()<V(L&&t~~'?'?uSg&h$yy*<>O)PVWX		,[`	abcd$(NN$7$71/9 %8 %+ " !=LLN i  12'
* ii!4 & YY q+"3"3A"6
CCA &&{1q:~'FGC 

 K ))55j'J
 1#++J]
HXHXYZY[H\]J z*	 +A ##qM((((&	
 ).B +  	 '*:&6#J:15J!#ii;" "-->>/1Gu ? J &+="22CC!46LTY D 
 $--99*EJ&+="22>>{K 	##%[99"!#
 	
*	s   !P:A P?c                    S n[         R                  " [        SUR                  S   U5       Vs/ s H(  nU" U R                  R                  XXu-    5      5      PM*     snSS9nXR                  R                  R                  -  nUR                  USS9nUn	U	c*  [        UR                  UUR                  UR                  S9n	X4$ s  snf )Nc                     [        U S5      (       a  U R                  R                  5       $ [        U S5      (       a  U R                  $ [	        S5      e)Nlatent_distrY   z3Could not access latents of provided encoder_output)r   r   moderY   AttributeError)encoder_outputs    r-   retrieve_latents?MarigoldDepthPipeline.prepare_latents.<locals>.retrieve_latentsu  sI    ~}55%11668833%---$%Z[[r,   r   r   )rZ   rm   r   )r(   r   r   r{   r4   encoderJ   scaling_factorrepeat_interleaver   rm   r   )
rN   rQ   rY   rZ   rS   rW   r   r   r   r   s
             r-   r   %MarigoldDepthPipeline.prepare_latentsm  s    	\ yy q%++a.*==A !1>1J!KL= 
 $hhoo&D&DD#55m5K&""##**"((	K ((%s   /C	r   c                    UR                  5       S:w  d1  UR                  S   U R                  R                  R                  :w  a:  [        SU R                  R                  R                   SUR                   S35      eU R                  R                  XR                  R                  R                  -  SS9S   nUR                  SS	S
9n[        R                  " USS5      nUS-   S-  nU$ )Nri   r   z Expecting 4D tensor of shape [B,z,H,W]; got r>   F)r   r   Tr}   keepdimg            ?g       @)r}   r{   r4   rJ   r   rs   decoder   rg   r(   clip)rN   r   r   s      r-   r   'MarigoldDepthPipeline.decode_prediction  s    ??![%6%6q%9TXX__=\=\%\2488??3R3R2SS^_j_p_p^qqrs  XX__[88??3Q3Q%Q_d_efgh
__D_9
ZZ
D#6
 3&#-
r,   r1   re   regularizer_strengthmax_itertolmax_resc	           
        ^^^^^^^^^^^^ U R                  5       S:w  d  U R                  S   S:w  a  [        SU R                   S35      eTS;  a  [        ST S35      eT(       d  T(       a  [        S5      eS[        R                  4UUU4S	 jjmS[        R                  S
[
        R                  S[        R                  4UUU4S jjm SS[        R                  S[        S[        [        R                  [        R                  S-  4   4U4S jjjmS
[
        R                  S[        R                  S[        4UUUU4S jjmS[        R                  4UUUUU4S jjn	T=(       d    Tn
U R                  S   mU
(       a  U	" U 5      nT" X5      n T" XS9u  pU R                  5       nT(       a  T(       a  U R                  5       nOT(       a  SnO[        S5      eX-
  R                  SS9nX-
  U-  n U(       a  X-  nX4$ )a  
Ensembles the depth maps represented by the `depth` tensor with expected shape `(B, 1, H, W)`, where B is the
number of ensemble members for a given prediction of size `(H x W)`. Even though the function is designed for
depth maps, it can also be used with disparity maps as long as the input tensor values are non-negative. The
alignment happens when the predictions have one or more degrees of freedom, that is when they are either
affine-invariant (`scale_invariant=True` and `shift_invariant=True`), or just scale-invariant (only
`scale_invariant=True`). For absolute predictions (`scale_invariant=False` and `shift_invariant=False`)
alignment is skipped and only ensembling is performed.

Args:
    depth (`torch.Tensor`):
        Input ensemble depth maps.
    scale_invariant (`bool`, *optional*, defaults to `True`):
        Whether to treat predictions as scale-invariant.
    shift_invariant (`bool`, *optional*, defaults to `True`):
        Whether to treat predictions as shift-invariant.
    output_uncertainty (`bool`, *optional*, defaults to `False`):
        Whether to output uncertainty map.
    reduction (`str`, *optional*, defaults to `"median"`):
        Reduction method used to ensemble aligned predictions. The accepted values are: `"mean"` and
        `"median"`.
    regularizer_strength (`float`, *optional*, defaults to `0.02`):
        Strength of the regularizer that pulls the aligned predictions to the unit range from 0 to 1.
    max_iter (`int`, *optional*, defaults to `2`):
        Maximum number of the alignment solver steps. Refer to `scipy.optimize.minimize` function, `options`
        argument.
    tol (`float`, *optional*, defaults to `1e-3`):
        Alignment solver tolerance. The solver stops when the tolerance is reached.
    max_res (`int`, *optional*, defaults to `1024`):
        Resolution at which the alignment is performed; `None` matches the `processing_resolution`.
Returns:
    A tensor of aligned and ensembled depth maps and optionally a tensor of uncertainties of the same shape:
    `(1, 1, H, W)`.
ri   r   z,Expecting 4D tensor of shape [B,1,H,W]; got r>   rf   Unrecognized reduction method: z1Pure shift-invariant ensembling is not supported.r1   c                 (  > U R                  TS5      R                  SS9R                  nU R                  TS5      R                  SS9R                  nT(       aU  T(       aN  SX!-
  R	                  SS9-  nU* U-  n[
        R                  " X445      R                  5       R                  5       nOCT(       a1  SUR	                  SS9-  nUR                  5       R                  5       nO[        S5      eUR                  [        R                  5      nU$ )Nr   r   r   ư>minUnrecognized alignment.)r   r   valuesr~   clampr(   r   cpunumpyrs   astyper&   float64)	r1   init_mininit_maxinit_sinit_tparamrS   r9   r:   s	         r-   
init_param8MarigoldDepthPipeline.ensemble_depth.<locals>.init_param  s    }}]B7;;;BIIH}}]B7;;;BIIH? 3::t:DD 8+		6"23779??A x~~$~77

**, !:;;LL,ELr,   r   r]   c                   > T(       a  T(       a  [         R                  " US5      u  p#[        R                  " U5      R	                  U 5      R                  TSSS5      n[        R                  " U5      R	                  U 5      R                  TSSS5      nX-  U-   nU$ T(       a=  [        R                  " U5      R	                  U 5      R                  TSSS5      nX-  nU$ [        S5      e)Nr   r   r   )r&   splitr(   
from_numpyr   viewrs   )r1   r   sr   outrS   r9   r:   s        r-   align3MarigoldDepthPipeline.ensemble_depth.<locals>.align  s    ?xxq)$$Q'**5166}aAN$$Q'**5166}aANi!m J !$$U+..u5::=!QPQRi J !!:;;r,   depth_alignedreturn_uncertaintyNc                 j  > S nTS:X  a6  [         R                  " U SSS9nU(       a  [         R                  " U SSS9nX24$ TS:X  a`  [         R                  " U SSS9R                  nU(       a6  [         R                  " [         R
                  " X-
  5      SSS9R                  nX24$ [        ST S35      e)Nrg   r   Tr   rh   r   r>   )r(   rg   stdrh   r   absrs   )r
  r  r   r   re   s       r-   ensemble6MarigoldDepthPipeline.ensemble_depth.<locals>.ensemble  s     KF""ZZ1dK
%"'))Mq$"OK ** h&"\\-QMTT
%"',,uyy9S/TZ[ei"j"q"qK ** !#B9+Q!OPPr,   c                   > SnT" X5      n[         R                  " [         R                  " T5      5       H?  u  pEX4   X5   -
  nX&S-  R                  5       R	                  5       R                  5       -  nMA     TS:  an  T" USS9u  pxUR                  5       R                  5       R                  5       n	SUR                  5       -
  R                  5       R                  5       n
X)U
-   T-  -  nU$ )Ng        r   r   Fr  r   )	r(   combinationsarangerg   sqrtitemr   r  r~   )r   r1   costr
  r   jdiffr   _err_nearerr_farr  r  rS   r   s              r-   cost_fn5MarigoldDepthPipeline.ensemble_depth.<locals>.cost_fn  s    D!%/M**5<<+FG$'-*::q(--/4466 H $a' (5 Q
%>>+//1668!11668==?G+/CCCKr,   c           	      0  > SS K nU R                  [        R                  5      nTb4  [	        UR
                  SS  5      T:  a  [        R                  " UTS5      nT" U5      nUR                  R                  [        TUS9UST	TSS.S9nUR                  $ )	Nr   r   r`   )r1   BFGSF)maxiterdisp)methodr   options)scipyr   r(   float32r~   r{   r   resize_to_max_edgeoptimizeminimizer   x)
r1   r%  depth_to_alignr   resr  r   r   r   r   s
        r-   compute_param;MarigoldDepthPipeline.ensemble_depth.<locals>.compute_param  s    "XXemm4N"s>+?+?+C'Dw'N!7!J!J>[bds!t~.E..))~6$,e< * C 55Lr,   r   r  r   r   r   )F)r}   r{   rs   r(   r)   r&   r'   booltuplefloatr~   r   r   )r1   r9   r:   r\   re   r   r   r   r   r-  requires_aligningr   r   	depth_max	depth_mindepth_ranger  r  r  rS   r   s    `` `````       @@@@@r-   r   $MarigoldDepthPipeline.ensemble_depth  s   \ 99;!u{{1~2KEKK=XYZ[[..>ykKLL?PQQ	ell 	 	"	 	bjj 	U\\ 	 	 EJ	+ <<	+=A	+5<<!445	+ 	+ 	2:: 	ell 	u 	 	 	 	 	& ,>A!%(E%'E%eSIIK			II677 ,333="k1&K!!r,   )r   r;   r<   rL   rM   r9   r:   r@   )NTTNN)NNNT)Nr   NTra   ra   r   NNNr&   FFT)TTFrh   g{Gz?r   gMbP?i   )(r!   r"   r#   r$   r%   model_cpu_offload_seqrC   r   r   r   r   r   r	   strr/  intrB   r   rv   r   r(   r)   r   rw   r   compilerdisabler   no_gradr   EXAMPLE_DOC_STRINGr   r0  r   r   staticmethodr1  r   r+   __classcell__)rO   s   @r-   r/   r/   h   s   'R 6!7 '+'+'+.248-^"-^ -^ !</	-^
 $-^ !-^ t-^ -^ -^ "%t-^ (+Tz-^ -^^B!B !B 	B
  #B  #B !$B B  S>D0B $B ??T%//%::TAB B !B 
BH ^^P P$ ]]_12 +/,0'+%/&037<@DH#(# !N
!N
 !4ZN
 	N

  #TzN
 !%N
  #N
 !$N
 N
  S>D0N
 U\\ 22T9N
 ??T%//%::TAN
 N
 !N
 N
  !N
 3 N
`#)||#) $#) ??T)	#)
 #) #) 
u||U\\)	*#)JU\\ ell   !% $#(!&*Z"||Z"Z" Z" !	Z"
 Z" $Z" Z" Z" Z" 
u||U\\D00	1Z" Z"r,   r/   )0dataclassesr   	functoolsr   typingr   r   r&   r(   PILr   	tqdm.autor   transformersr   r	   rM   r   modelsr   r   
schedulersr   r   utilsr   r   r   r   utils.import_utilsr   utils.torch_utilsr   pipeline_utilsr   marigold_image_processingr   torch_xla.core.xla_modelcore	xla_modelr   r   
get_loggerr!   rD   r=  r   r/   r    r,   r-   <module>rQ     s   & "       5 1  5 - . = ))MM			H	% ,  *    *S"- S"r,   