
    
3jlV                        S SK Jr  S SKJr  S SKJrJr  S SKrS SKJ	r	  SSK
JrJr  SSKJr  SS	KJrJrJr  S
SKJr  \R*                  " \5      rSr\ " S S\5      5       r " S S\5      rSS/rg)    )annotations)	dataclass)AnyCallableN)tqdm   )MultiPipelineCallbacksPipelineCallback)BlockRefinementScheduler)
BaseOutputloggingreplace_example_docstring   )DiffusionPipelinea	  
    Examples:
        ```python
        >>> import torch
        >>> from transformers import AutoModelForCausalLM, AutoTokenizer
        >>> from diffusers import BlockRefinementScheduler, LLaDA2Pipeline

        >>> model_id = "inclusionAI/LLaDA2.1-mini"
        >>> model = AutoModelForCausalLM.from_pretrained(
        ...     model_id, trust_remote_code=True, dtype=torch.bfloat16, device_map="auto"
        ... )
        >>> tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
        >>> scheduler = BlockRefinementScheduler()

        >>> pipe = LLaDA2Pipeline(model=model, scheduler=scheduler, tokenizer=tokenizer)
        >>> output = pipe(prompt="What is the meaning of life?", gen_length=256)
        >>> print(output.texts[0])
        ```
c                  .    \ rS rSr% S\S'   SrS\S'   Srg)LLaDA2PipelineOutput5   torch.LongTensor	sequencesNlist[str] | Nonetexts )__name__
__module____qualname____firstlineno____annotations__r   __static_attributes__r       d/home/wildlama/miniconda3/lib/python3.13/site-packages/diffusers/pipelines/llada2/pipeline_llada2.pyr   r   5   s    "E"r   r   c                    ^  \ rS rSr% SrS\S'   S\S'   S\S'   / SQr S     SU 4S	 jjjr\S
 5       r	              SS jr
                        SS jr\R                  " 5       \" \5                              S                                                 SS jj5       5       rSrU =r$ )LLaDA2Pipeline;   a  
Pipeline for LLaDA2-style discrete diffusion text generation via block-wise iterative refinement.

This pipeline maintains a template sequence filled with a `mask_token_id` and refines it in blocks. In each
refinement step, it samples candidate tokens for the active block and commits a subset based on confidence.

The model is expected to accept an attention mask and `position_ids`, and to return logits of shape `[batch, seq,
vocab_size]`.
r   modelr   	scheduler	tokenizer)block_xx0x0_ptransfer_index
confidenceactive_blockc                   > [         TU ]  5         U R                  XUS9  U R                  b  [	        U R                  SS 5      OS U l        U R                  b  [	        U R                  SS 5      U l        g S U l        g )N)r$   r%   r&   eos_token_idmask_token_id)super__init__register_modulesr&   getattrr.   r/   )selfr$   r%   r&   	__class__s       r    r1   LLaDA2Pipeline.__init__L   sj     	E)TMQ^^MgGDNNNDImqOS~~OiWT^^_dKosr   c                    U R                   $ N)_num_timesteps)r4   s    r    num_timestepsLLaDA2Pipeline.num_timestepsW   s    """r   c               "   Ub  UR                   S:X  a  UR                  S5      nUR                   S:w  a"  [        S[        UR                  5       S35      eUR
                  [        R                  :w  a  [        SUR
                   S35      eU$ U R                  c  [        S5      eUb  Ub  [        S	5      eUc  Uc  [        S
5      eU=(       d    0 nUb'  U R                  R                  " U4USSSS.UD6nUS   $ U(       ag  [        U R                  SS5      (       aK  [        U[        5      (       a  [        S5      eU R                  R                  " SUS./4USSSS.UD6nUS   $ U R                  US[        U[        5      S9nUS   $ )z?Convert prompt/messages/input_ids to a [batch, seq] LongTensor.N   r   r   z"`input_ids` must be 2D, got shape ./`input_ids` must be int64 token IDs, got dtype=7Tokenizer is required when `input_ids` is not provided.0Provide either `prompt` or `messages`, not both.4Provide one of `prompt`, `messages`, or `input_ids`.Tpt)add_generation_prompttokenizereturn_tensorsreturn_dict	input_idschat_templatez8`prompt` must be a string when `use_chat_template=True`.user)rolecontent)rF   padding)ndim	unsqueeze
ValueErrortupleshapedtypetorchlongr&   apply_chat_templater3   
isinstancelist)r4   promptmessagesrH   use_chat_templaterD   chat_template_kwargsencodeds           r    _prepare_input_ids!LLaDA2Pipeline._prepare_input_ids]   s     ~~"%//2	~~" #EeIOOF\E]]^!_``%**, #RS\SbSbRccd!eff>>!VWWF$6OPPSTT39rnn88&;#  'G ;''$!O!O&$'' ![\\nn88 V45&;#  'G ;''..jQWY]F^._{##r   c           
       ^  Uc  Uc  Uc  [        S5      eUb  Ub  [        S5      eUbi  UR                  S;  a"  [        S[        UR                  5       S35      eUR                  [
        R                  :w  a  [        SUR                   S35      eUb  Uc  T R                  c  [        S5      eUb  Uc  T R                  c  [        S5      eUS::  a  [        S	U S35      eUS::  a  [        S
U S35      eUS::  a  [        SU S35      eUS::  a  [        SU S35      eSUs=::  a  S::  d  O  US:  d  [        SU S35      eU	S;  a  [        SU	< S35      eU
S;  a  [        SU
< S35      eUb'  [        U[        [        45      (       a  UR                  nUbX  [        U 4S jU 5       5      (       d=  [        ST R                   SU Vs/ s H  oT R                  ;  d  M  UPM     sn 35      eg g s  snf )NrB   rA   )r=   r   z(`input_ids` must be 1D or 2D, got shape r>   r?   r@   r   z`gen_length` must be > 0, got z `block_length` must be > 0, got z'`num_inference_steps` must be > 0, got z `minimal_topk` must be > 0, got         g      ?zC`threshold` must be in [0, 1] (or > 1 to force top-k commits), got >   autogreedymultinomialzF`sampling_method` must be one of {'auto','greedy','multinomial'}, got >   seqtextz+`output_type` must be 'seq' or 'text', got c              3  @   >#    U  H  oTR                   ;   v   M     g 7fr8   )_callback_tensor_inputs).0kr4   s     r    	<genexpr>.LLaDA2Pipeline.check_inputs.<locals>.<genexpr>   s      F
7Y!---7Ys   z2`callback_on_step_end_tensor_inputs` has to be in z, but found )rP   rN   rQ   rR   rS   rT   rU   r&   rW   r
   r	   tensor_inputsallrh   )r4   rY   rZ   rH   
gen_lengthblock_lengthnum_inference_stepsminimal_topk	thresholdsampling_methodoutput_typecallback_on_step_end"callback_on_step_end_tensor_inputsrj   s   `             r    check_inputsLLaDA2Pipeline.check_inputs   sk     >h.93DSTT("6OPP ~~V+ #KER[RaRaLbKccd!eff%**, #RS\SbSbRccd!eff)"38NVWWI$5$..:PVWW ?=j\KLL1?~QOPP!#FGZF[[\]^^1?~QOPPy'C')c/bclbmmnopp"CCZ[jZmmno  o-J;/YZ[\\  +
 #35K"L1
 1
 2F1S1S.-9# F
7YF
 C
 C
 DTEaEaDbbnAkA!dNjNjEjAAkln C
9
 ls   G9(G9c                
   Ub'  [        U[        [        45      (       a  UR                  nUc  S/nU R	                  UUUUUUUUUUUUS9  U R                  UUUUUSS9nU R                  nUR                  S:X  a  UR                  S5      nUR                  US9nUR                  u  nnUc  U R                  nUc  U R                  nUc  [        S5      e[        XU-  5      nU R                  R!                  UUS9  UU-   U-   S-
  U-  nUU-  n["        R$                  " UU4U["        R&                  S	9n["        R(                  " UU["        R&                  S	9R                  S5      R+                  US
5      n ["        R,                  " UU4UU["        R&                  S	9n!US:  a  UU!SS2SU24'   UU-  n"U[/        UU"-
  S5      -  U l        ["        R2                  " U4U["        R4                  S	9n#USL=(       a    US:  n$Sn%[7        U S0 5      R9                  5       n&SU&S'   SU&S'   [;        [=        U"U5      40 U&D6 GHZ  n'U'S-   U-  n(U!SS2SU(24   n)USS2SU(24   n*U SS2SU(24   n+U'U-  n,["        R2                  " UU["        R4                  S	9n-U,U:  a  [        UU,-
  U5      n.SU-SU.& Sn/Sn0Sn1U R?                  SSSU' S3S9  U RA                  US9n2U1(       Ga|  U)SS2U* S24   n3U3U:H  RC                  5       n4U4(       d  U/S-  n/U RE                  U)U*U+S9RF                  n5U5SS2U* S2SS24   n6U R                  RI                  U6U0U3UU	U
UUUUUU-USS9n7U7RJ                  n8U7RL                  n9U8U9-  n:U:RC                  5       (       a  U7RN                  U)SS2U* S24'   U(       a,  Ub)  U R                  RQ                  U)U7RR                  U:U#UUUS9n#Ub8  0 n;U H  n<[U        5       U<   U;U<'   M     U" U U%U0U;5      n=U=RW                  SU)5      n)U%S-  n%U4(       a  U0S-  n0U2RY                  S5        U R                  R[                  U0U4U$U9U/UU#S9n1U1(       a  GM|  U2R]                  5         U)U!SS2SU(24'   U(       d  GMC  U#R_                  5       (       d  GM[    O   U!SS2SUU-   24   n>U>SS2US24   n?UbU  US:X  aO  U?S   U:H  Ra                  SS9S   n@[c        U@5      S:  a(  U?SS2S[e        W@S   Rg                  5       5      S-   24   n?SnAUS:X  a'  U Rh                  b  U Rh                  Rk                  U?SS9nAU(       d  U?R                  US9WA4$ [m        U?R                  US9WAS9$ )a  
Generate text with block-wise refinement.

Args:
    prompt (`str` or `List[str]`, *optional*):
        Prompt text. When `use_chat_template` is `True` (default) and a tokenizer with a chat template is
        available, the prompt is wrapped in a chat message before tokenization.
    messages (`List[Dict[str, str]]`, *optional*):
        Chat messages to encode (e.g. `[{"role": "user", "content": "Hello"}]`). Takes precedence over `prompt`
        when provided. Requires a tokenizer with `apply_chat_template`.
    input_ids (`torch.LongTensor`, *optional*):
        Pre-tokenized input IDs. Takes precedence over `prompt` and `messages`.
    use_chat_template (`bool`, defaults to `True`):
        Whether to wrap the prompt in a chat template.
    add_generation_prompt (`bool`, defaults to `True`):
        Whether to add the generation prompt when using chat templates.
    gen_length (`int`):
        Number of tokens to generate.
    block_length (`int`):
        Block size for refinement.
    num_inference_steps (`int`):
        Number of refinement steps per block.
    temperature (`float`):
        Sampling temperature.
    top_p (`float`, *optional*):
        Nucleus sampling cutoff.
    top_k (`int`, *optional*):
        Top-k sampling cutoff.
    sampling_method (`str`):
        Sampling method (`auto`, `greedy`, `multinomial`).
    threshold (`float`):
        Confidence threshold for committing tokens.
    editing_threshold (`float`, *optional*):
        Confidence threshold for editing already-committed (non-mask) tokens. When positive, after all mask
        tokens in a block are resolved, the pipeline continues refining: if the model predicts a different
        token with confidence above this threshold, the existing token is replaced. Set to `None`, `0.0`, or a
        negative value to disable editing. Defaults to `0.5`.
    max_post_steps (`int`):
        Maximum number of additional refinement iterations after all mask tokens in a block are resolved. Only
        used when `editing_threshold` is enabled. Defaults to `16`.
    minimal_topk (`int`):
        Minimum number of tokens to commit per step.
    eos_early_stop (`bool`):
        Whether to stop after committing EOS in a block.
    eos_token_id (`int`, *optional*):
        EOS token ID to use for early stopping.
    mask_token_id (`int`, *optional*):
        Mask token ID to use for the template.
    generator (`torch.Generator`, *optional*):
        RNG for sampling.
    output_type (`str`, defaults to `"text"`):
        Output format. `"text"` decodes sequences into strings (requires a tokenizer). `"seq"` returns raw
        token ID sequences only.
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether to return a [`LLaDA2PipelineOutput`] instead of a tuple.
    callback_on_step_end (`Callable` or `PipelineCallback`, *optional*):
        Callback executed after each refinement step with signature `callback_on_step_end(self, step: int,
        timestep: int, callback_kwargs: Dict)`.
    callback_on_step_end_tensor_inputs (`List[str]`, *optional*):
        Tensor keys to pass to the callback. Allowed keys: `block_x`, `x0`, `x0_p`, `transfer_index`,
        `confidence`, `active_block`.

Examples:
Nr'   )rY   rZ   rH   ro   rp   rq   rr   rs   rt   ru   rv   rw   )rY   rZ   rH   r[   rD   r\   r=   r   )devicezA`mask_token_id` must be provided (or available on the tokenizer).)r{   rS   ra   _progress_bar_configpositionBlocksdescTFzBlock z Inference Steps)r~   leaver   )total)attention_maskposition_ids)model_outputtimestepsampler/   temperaturetop_ptop_krt   rs   editing_thresholdrr   prompt_mask	generatorrG   )cur_xsampled_tokensfinal_transferfinishedr.   r/   prompt_length)step_idxmasks_remainingediting_enabledediting_transfer_index
post_stepsmax_post_stepsr   )as_tuplerf   )skip_special_tokens)r   r   )7rW   r
   r	   rm   rx   r^   _execution_devicerN   rO   torR   r.   r/   rP   minr%   set_timestepsrT   onesrU   arangeexpandfullmaxr9   zerosboolr3   copyr   rangeset_progress_bar_configprogress_baranyr$   logitsstepr*   r   prev_samplecheck_eos_finishedr   localspopupdatecheck_block_should_continueclosern   nonzerolenintitemr&   batch_decoder   )Br4   rY   rZ   rH   r[   rD   ro   rp   rq   r   r   r   rt   rs   r   r   rr   eos_early_stopr.   r/   r   ru   rG   rv   rw   
prompt_idsr{   
batch_sizer   
num_blockstotal_length	attn_maskr   xprefill_blocksr   r   global_stepblock_progress_bar_config	num_blockcurrent_window_endr'   block_attn_maskblock_position_idsblock_start_posprompt_mask_in_blockprompt_end_in_blockr   r   should_continuer   block_tokensr   r   block_logitsscheduler_outputr*   r   r   callback_kwargsrj   callback_outputs	generatedr   eos_positionsr   sB                                                                     r    __call__LLaDA2Pipeline.__call__   s   B  +
 #35K"L1
 1
 2F1S1S.-52;.!% 3%+#!5/Q 	 	
  ,,/"7!% - 

 ''??a#--a0J]]&]1
$.$4$4!
M,,L  ..M `aa!"5\7QR$$%8$H $j0<?!CT
!L0 JJ
L9&PUPZPZ[	||LuzzR\\]^_ffgqsuv JJ
L1=W\WaWab1#-Aa- &,61C
^8SUV4WW;;
}V5::N+47S<MPS<S %,D2H"$M$R$R$T!01!*-,4!&)eNJ?]C\]I"+a-<!?..../G'+>,>+>(>?O!-a1D2D1D.D!E (,6O#(;;|FRWR\R\#] .&)-/*I<&X#=A$%9&9:JH"O((!5PY{ZjGk(l,,3F,GL!&q<-.'89#/=#@"E"E"G&!OJGOZlmtt%a,&:;#'>>#6#6!-%'"/ +$3'&7!- 4' $ $7 $ " "2!@!@)9)P)P&!/2H!H!%%''1A1M1MGA}~-.!l&>#~~@@%'7'F'F'5!)%1&3&3  A  H (3&(O?-3Xa[* @';D+xYh'i$.229gFGq "MH ''*"&.."L"L%$3$3+A)#1% #M #s "/F  (/Aa$$$$%~(,,..s ^x a5=:5556	a/0	#
a&q\\9BBDBQRSTM=!A%%a)K3}Q/?/D/D/F+G!+K)K&KL	& T^^%?NN//	t/TE<<v<.55#ill&l.IQVWWr   )r9   r.   r/   r8   )r$   r   r%   r   r&   z
Any | None)rY   str | list[str] | NonerZ   list[dict[str, str]] | NonerH   torch.LongTensor | Noner[   r   rD   r   r\   zdict[str, Any] | Nonereturnr   )rY   r   rZ   r   rH   r   ro   r   rp   r   rq   r   rr   r   rs   floatrt   strru   r   rv   z;Callable | PipelineCallback | MultiPipelineCallbacks | Nonerw   r   )NNNTTi       r   ra   NNrd   gffffff?g      ?   r=   TNNNrf   TNN)2rY   r   rZ   r   rH   r   r[   r   rD   r   ro   r   rp   r   rq   r   r   r   r   float | Noner   
int | Nonert   r   rs   r   r   r   r   r   rr   r   r   r   r.   r   r/   r   r   ztorch.Generator | Noneru   r   rG   r   rv   zSCallable[[int, int, dict], None] | PipelineCallback | MultiPipelineCallbacks | Nonerw   r   r   z@LLaDA2PipelineOutput | tuple[torch.LongTensor, list[str] | None])r   r   r   r   __doc__r   rh   r1   propertyr:   r^   rx   rT   no_gradr   EXAMPLE_DOC_STRINGr   r   __classcell__)r5   s   @r    r"   r"   ;   s    J''Ng !%		t	t ,	t 		t 	t # #
7$ '7$ .	7$
 +7$  7$  $7$ 47$ 
7$r;&; .; +	;
 ; ; !; ; ; ; ; Z; -=;z ]]_12 *.04-1"&&*#% " ,*- ##'$(,0!  ?C9SX&SX .SX +	SX
  SX  $SX SX SX !SX SX SX SX SX SX (SX  !SX" #SX$ %SX& !'SX( ")SX* *+SX, -SX. /SX01SX8 -=9SX: 
J;SX 3 SXr   r"   )
__future__r   dataclassesr   typingr   r   rT   	tqdm.autor   	callbacksr	   r
   
schedulersr   utilsr   r   r   pipeline_utilsr   
get_loggerr   loggerr   r   r"   __all__r   r   r    <module>r      sy    # !     A 2 C C . 
		H	% * #: # #
mX& mX` 3
4r   