
    
3j'#                         S r SSKJr  SSKJrJrJr  SSKrSSKJ	r	  SSK
JrJr  SSKJr   " S S\	R                  5      rg)	zBase training task abstraction.

This module provides the base TrainingTask class that encapsulates a complete
forward pass including loss computation. Tasks return a dictionary with loss
components and outputs for logging.
    )nullcontext)AnyDictOptionalN)get_state_dictunwrap_model)
ModelEmaV3c                     ^  \ rS rSrSr   S*S\\R                     S\\R                     S\	4U 4S jjjr
U 4S jr S+S	\\   S
S 4S jjr  S,S\S\\   S
\\R                      4S jjrS-S\	S
\\R                      4S jjr   S.S\S\	S\\R                     S
\R                   4S jjrS
\	4S jr  S,S\S\\   S
\\R                      4S jjrS+S\\   S
S4S jjrS-S\	4S jjrS/S\\R                      S\	S
\\R                      4S jjrS/S\\R                      S\	S
\\\4   4S jjr   S0S\\\\4      S\	S\\R                      S\	S
S4
S  jjrS\4S\	S
\\\4   4S! jjr    S1S"\\\4   S#\\\\4      S\	S\	S
S4
S$ jjr!S% r"S&\RF                  S'\RF                  S
\\\RF                  4   4S( jr$S)r%U =r&$ )2TrainingTask   a  Base class for training tasks.

A training task encapsulates a complete forward pass including loss computation.
Tasks return a dictionary containing the training loss and other components for logging.

The returned dictionary must contain:
    - 'loss': The training loss for backward pass (required)
    - 'output': Model output/logits for metric computation (recommended)
    - Other task-specific loss components for logging (optional)

Args:
    device: Device for task tensors/buffers (defaults to cpu)
    dtype: Dtype for task tensors/buffers (defaults to torch default)
    verbose: Enable info logging

Example:
    >>> task = SomeTask(model, criterion, device=torch.device('cuda'))
    >>>
    >>> # Prepare for distributed training (if needed)
    >>> if distributed:
    >>>     task.prepare_distributed(device_ids=[local_rank])
    >>>
    >>> # Training loop
    >>> result = task(input, target)
    >>> result['loss'].backward()
Ndevicedtypeverbosec                    > [         TU ]  5         Ub  UO[        R                  " S5      U l        Ub  UO[        R                  " 5       U l        X0l        g )Ncpu)super__init__torchr   get_default_dtyper   r   )selfr   r   r   	__class__s       H/home/wildlama/miniconda3/lib/python3.13/site-packages/timm/task/task.pyr   TrainingTask.__init__-   sC     	 & 2fU8K#/UU5L5L5N
    c                    > [         R                  " S5      R                  " U0 UD6nUR                  U l        UR                  U l        [
        TU ]  " U0 UD6$ )zFMove task to device/dtype, keeping self.device and self.dtype in sync.r   )r   emptytor   r   r   )r   argskwargsdummyr   s       r   r   TrainingTask.to8   sJ    A!!4262ll[[
wz4*6**r   
device_idsreturnc                     U $ )a`  Prepare task for distributed training.

This method wraps trainable components in DistributedDataParallel (DDP)
while leaving non-trainable components (like frozen teacher models) unwrapped.

Should be called after task initialization but before training loop.

Args:
    device_ids: List of device IDs for DDP (e.g., [local_rank])
    **ddp_kwargs: Additional arguments passed to DistributedDataParallel

Returns:
    self (for method chaining)

Example:
    >>> task = LogitDistillationTask(student, teacher, criterion)
    >>> task.compile()
    >>> task.prepare_distributed(device_ids=[args.local_rank])
 )r   r"   
ddp_kwargss      r   prepare_distributed TrainingTask.prepare_distributed?   s	    2 r   backendmodec                     g)a  Compile hot task components before distributed wrapping.

Subclasses should compile the train/eval modules that do the tensor
work, not the outer task wrapper. The return value is the eval-facing
compiled module/callable used by validation and checkpoint export.
Nr%   r   r)   r*   compile_kwargss       r   compileTrainingTask.compileZ   s     r   Femac                 D    U(       a  [        U SS5      $ [        U SU 5      $ )z1Return the module that owns trainable parameters.trainable_module_emaNtrainable_module)getattr)r   r0   s     r   get_trainable_module!TrainingTask.get_trainable_moduleh   s%    4!7>>t/66r   decay
use_warmupc                 `    [        U R                  5       4UUUS.UD6U l        U R                  $ )z+Create an EMA copy of the trainable module.)r7   r8   r   )r	   r5   r2   )r   r7   r8   r   r   s        r   	setup_emaTrainingTask.setup_eman   sC     %/%%'%
!	%

 %
! (((r   c                 $    U R                  SS9SL$ )z5Return whether this task has an EMA trainable module.Tr0   N)r5   )r   s    r   has_emaTrainingTask.has_ema   s    ((T(2$>>r   c                     U R                  5       (       d  g[        R                  " U R                  SS94UUS.UD6U l        U R                  $ )z3Compile the EMA eval model if one has been created.NTr=   )r)   r*   )r>   r   r.   get_eval_modeleval_model_emar,   s       r   compile_emaTrainingTask.compile_ema   sW     ||~~#mmD)

 	
 """r   stepc                     U R                  5       (       a5  U R                  SS9R                  [        U R                  5       5      US9  gg)z3Update EMA state from the current trainable module.Tr=   )rE   N)r>   r5   updater   )r   rE   s     r   
update_emaTrainingTask.update_ema   sB    <<>>%%$%/66|DD]D]D_7`gk6l r   exclude_headc                     U R                  5       nU(       a#  UR                  5        Vs/ s H  o3PM     snSS $ UR                  5       $ s  snf )z/Return parameters to use for gradient clipping.N)r5   
parameters)r   rJ   r3   ps       r   get_clip_parameters TrainingTask.get_clip_parameters   sO    446/::<=<!A<=crBB**,, >s   Amodulec                 v    Uc5  U(       a  SOSn[        X5      (       a  [        X5      $ U R                  US9nU$ )zReturn the eval model/callable used for validation.

Checkpoint state_dict handling uses unwrap_model separately so DDP and
compiled wrappers do not leak into saved keys.
rB   
eval_modelr=   )hasattrr4   r5   )r   rQ   r0   	eval_attrs       r   rA   TrainingTask.get_eval_model   sA     >,/(\It''t//..3.7Fr   c                     0 $ )z:Return task-owned state outside the eval model state_dict.r%   )r   rQ   r0   s      r   get_task_stateTrainingTask.get_task_state   s    	r   statestrictc                     g)z8Load task-owned state outside the eval model state_dict.Nr%   )r   rZ   r[   rQ   r0   s        r   load_task_stateTrainingTask.load_task_state   s     r   c                     U R                  US9nUc  0 $ U(       a  SOSnU(       a  SOSnU R                  US9nU[        X25      0nU(       a  XgU'   U$ )z3Return checkpoint state entries owned by this task.r=   state_dict_ema
state_dicttask_state_ema
task_state)rA   rX   r   )r   r0   	unwrap_fnrS   	model_keytask_keyrc   rZ   s           r   get_checkpoint_state!TrainingTask.get_checkpoint_state   sg     ((S(1
I(+$	'*#((S(1
N:AB((Or   ra   rc   c                     U R                  US9nUc  U(       a  [        S5      e[        S5      e[        U5      R                  XS9  U R	                  X$US9  g)z-Load model and task-owned checkpoint entries.r=   Nz4Cannot load EMA checkpoint state before setup_ema().z3Cannot load checkpoint state without an eval model.)r[   )r[   r0   )rA   RuntimeErrorr   load_state_dictr]   )r   ra   rc   r0   r[   rS   s         r   load_checkpoint_state"TrainingTask.load_checkpoint_state   sa     ((S(1
"#YZZTUUZ 000KZC@r   c                     U R                  5       nUb%  XLa!  [        US5      (       a  UR                  5       $ [        5       $ )zReturn a no-sync context for gradient accumulation.

Tasks that wrap a trainable component with DDP delegate to that
component's no_sync(). Non-distributed tasks use a no-op context.
no_sync)r5   rT   ro   r   )r   rQ   s     r   ro   TrainingTask.no_sync   s>     **,&"49S9S>>##}r   inputtargetc                     [         e)zPerform forward pass and compute loss.

Args:
    input: Input tensor [B, C, H, W]
    target: Target labels [B]

Returns:
    Dictionary with at least 'loss' key containing the training loss
)NotImplementedError)r   rq   rr   s      r   forwardTrainingTask.forward   s
     "!r   )r   r   rB   r2   r   )NNT)N)inductorN)F)gH.?FN)NF)TNF)NFT)'__name__
__module____qualname____firstlineno____doc__r   r   r   r   boolr   r   listr'   strnnModuler.   r5   floatr:   r>   rC   intrH   rO   rA   r   r   rX   r]   r   rg   rl   ro   Tensorru   __static_attributes____classcell__)r   s   @r   r   r      s   : .2+/ 		U\\*	 EKK(	 		 	+ *.  
	: &"& 3-
 
"))	7 7"))9L 7 "$-1	)) ) U\\*	) 
)"? ? &"&## 3-#
 
"))	#"mx} m m
- -Xbii%8 d W_`b`i`iWj Xbii%8 d W[\_ad\dWe   *.DcN+  RYY'	
  
 " 
c3h	, 48AS#XA !c3h0A 	A
 A 
A 	"<<" LL" 
c5<<	 	" "r   r   )r|   
contextlibr   typingr   r   r   r   torch.nnr   timm.utils.modelr   r   timm.utils.model_emar	   r   r   r%   r   r   <module>r      s3    # & &   9 +f"299 f"r   