
    3j@                        S SK Jr  S SKrS SKrS SKrS SKJr  S SKJrJ	r	J
r
  \(       a  SSKJr   " S S5      r " S	 S
\5      r " S S\5      r " S S\5      r " S S\5      rg)    )annotationsN)Queue)TYPE_CHECKINGAnycast   )PreTrainedTokenizerBasec                  $    \ rS rSrSrS rS rSrg)BaseStreamer   z?
Base class from which `.generate()` streamers should inherit.
c                    [        5       e)z;Function that is called by `.generate()` to push new tokensNotImplementedErrorselfvalues     [/home/wildlama/miniconda3/lib/python3.13/site-packages/transformers/generation/streamers.pyputBaseStreamer.put!       !##    c                    [        5       e)zHFunction that is called by `.generate()` to signal the end of generationr   r   s    r   endBaseStreamer.end%   r   r    N)__name__
__module____qualname____firstlineno____doc__r   r   __static_attributes__r   r   r   r   r      s    $$r   r   c                  F    \ rS rSrSrS
SS jjrS rS rS
SS jjrS r	Sr
g	)TextStreamer*   aT  
Simple text streamer that prints the token(s) to stdout as soon as entire words are formed.

Parameters:
    tokenizer (`AutoTokenizer`):
        The tokenizer used to decode the tokens.
    skip_prompt (`bool`, *optional*, defaults to `False`):
        Whether to skip the prompt to `.generate()` or not. Useful e.g. for chatbots.
    decode_kwargs (`dict`, *optional*):
        Additional keyword arguments to pass to the tokenizer's `decode` method.

Examples:

    ```python
    >>> from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer

    >>> tok = AutoTokenizer.from_pretrained("openai-community/gpt2")
    >>> model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")
    >>> inputs = tok(["An increasing sequence: one,"], return_tensors="pt")
    >>> streamer = TextStreamer(tok)

    >>> # Despite returning the usual output, the streamer will also print the generated text to stdout.
    >>> _ = model.generate(**inputs, streamer=streamer, max_new_tokens=20)
    An increasing sequence: one, two, three, four, five, six, seven, eight, nine, ten, eleven,
    ```
c                R    Xl         X l        X0l        / U l        SU l        SU l        g )Nr   T)	tokenizerskip_promptdecode_kwargstoken_cache	print_lennext_tokens_are_prompt)r   r'   r(   r)   s       r   __init__TextStreamer.__init__F   s,    "&* ')&*#r   c                   [        UR                  5      S:  a  UR                  S   S:  a  [        S5      e[        UR                  5      S:  a  US   nU R                  (       a  U R                  (       a  SU l        gU R
                  R                  UR                  5       5        [        [        U R                  R                  " U R
                  40 U R                  D65      nUR                  S5      (       a  X R                  S n/ U l        SU l        O[        U5      S:  aO  U R                  [!        US   5      5      (       a-  X R                  S nU =R                  [        U5      -  sl        O>X R                  UR#                  S5      S-    nU =R                  [        U5      -  sl        U R%                  U5        g)	z]
Receives tokens, decodes them, and prints them to stdout as soon as they form entire words.
   r   z'TextStreamer only supports batch size 1FN
 )lenshape
ValueErrorr(   r,   r*   extendtolistr   strr'   decoder)   endswithr+   _is_chinese_charordrfindon_finalized_text)r   r   textprintable_texts       r   r   TextStreamer.putP   s`    u{{aEKKNQ$6FGG!!HE ; ;*/D' 	/C..t/?/?V4CUCUVW ==!.."23N!DDNY]t44Sb]CC!.."23NNNc.11N "..4::c?Q3FGNNNc.11N~.r   c                    [        U R                  5      S:  a\  [        [        U R                  R
                  " U R                  40 U R                  D65      nXR                  S n/ U l        SU l        OSnSU l        U R                  USS9  g)z;Flushes any remaining cache and prints a newline to stdout.r   N T)
stream_end)
r4   r*   r   r9   r'   r:   r)   r+   r,   r?   )r   r@   rA   s      r   r   TextStreamer.endr   s     t 1$T^^2243C3CZtGYGYZ[D!.."23N!DDNN&*#~$?r   c                ,    [        USU(       d  SOSS9  g)zNPrints the new text to stdout. If the stream is ending, also prints a newline.TrD   N)flushr   )printr   r@   rE   s      r   r?   TextStreamer.on_finalized_text   s    d$jBdCr   c                    US:  a  US::  dT  US:  a  US::  dH  US:  a  US::  d<  US:  a  US::  d0  US	:  a  US
::  d$  US:  a  US::  d  US:  a  US::  d  US:  a  US::  a  gg)z6Checks whether CP is the codepoint of a CJK character.i N  i  i 4  iM  i   iߦ i  i? i@ i i  i i   i  i  i TFr   )r   cps     r   r<   TextStreamer._is_chinese_char   sr     6\bFlfvg"-g"-g"-g"-fvg"-r   )r)   r,   r+   r(   r*   r'   NF)r'   r	   r(   boolr)   r   r@   r9   rE   rP   )r   r   r   r    r!   r-   r   r   r?   r<   r"   r   r   r   r$   r$   *   s#    6+ /D@Dr   r$   c                  `   ^  \ rS rSrSr  S       S	U 4S jjjrS
SS jjrS rS rSr	U =r
$ )TextIteratorStreamer   a  
Streamer that stores print-ready text in a queue, to be used by a downstream application as an iterator. This is
useful for applications that benefit from accessing the generated text in a non-blocking way (e.g. in an interactive
Gradio demo).

Parameters:
    tokenizer (`AutoTokenizer`):
        The tokenizer used to decode the tokens.
    skip_prompt (`bool`, *optional*, defaults to `False`):
        Whether to skip the prompt to `.generate()` or not. Useful e.g. for chatbots.
    timeout (`float`, *optional*):
        The timeout for the text queue. If `None`, the queue will block indefinitely. Useful to handle exceptions
        in `.generate()`, when it is called in a separate thread.
    decode_kwargs (`dict`, *optional*):
        Additional keyword arguments to pass to the tokenizer's `decode` method.

Examples:

    ```python
    >>> from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
    >>> from threading import Thread

    >>> tok = AutoTokenizer.from_pretrained("openai-community/gpt2")
    >>> model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")
    >>> inputs = tok(["An increasing sequence: one,"], return_tensors="pt")
    >>> streamer = TextIteratorStreamer(tok)

    >>> # Run the generation in a separate thread, so that we can fetch the generated text in a non-blocking way.
    >>> generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=20)
    >>> thread = Thread(target=model.generate, kwargs=generation_kwargs)
    >>> thread.start()
    >>> generated_text = ""
    >>> for new_text in streamer:
    ...     generated_text += new_text
    >>> generated_text
    'An increasing sequence: one, two, three, four, five, six, seven, eight, nine, ten, eleven,'
    ```
c                `   > [         TU ]  " X40 UD6  [        5       U l        S U l        X0l        g N)superr-   r   
text_queuestop_signaltimeout)r   r'   r(   rZ   r)   	__class__s        r   r-   TextIteratorStreamer.__init__   s-     	A=A'r   c                    U R                   R                  XR                  S9  U(       a/  U R                   R                  U R                  U R                  S9  gg)\Put the new text in the queue. If the stream is ending, also put a stop signal in the queue.rZ   N)rX   r   rZ   rY   rJ   s      r   r?   &TextIteratorStreamer.on_finalized_text   sD    D,,7OO 0 0$,,G r   c                    U $ rV   r   r   s    r   __iter__TextIteratorStreamer.__iter__       r   c                ~    U R                   R                  U R                  S9nXR                  :X  a
  [	        5       eU$ Nr_   )rX   getrZ   rY   StopIterationr   s     r   __next__TextIteratorStreamer.__next__   s6    ##DLL#9$$$/!Lr   )rY   rX   rZ   FNr'   r	   r(   rP   rZ   float | Noner)   r   rO   rQ   )r   r   r   r    r!   r-   r?   rb   ri   r"   __classcell__r[   s   @r   rS   rS      sV    %T " $	
*
 
 	

 
 
H r   rS   c                  `   ^  \ rS rSrSr  S       S	U 4S jjjrS
SS jjrS rS rSr	U =r
$ )AsyncTextIteratorStreamer   a
  
Streamer that stores print-ready text in a queue, to be used by a downstream application as an async iterator.
This is useful for applications that benefit from accessing the generated text asynchronously (e.g. in an
interactive Gradio demo).

Parameters:
    tokenizer (`AutoTokenizer`):
        The tokenizer used to decode the tokens.
    skip_prompt (`bool`, *optional*, defaults to `False`):
        Whether to skip the prompt to `.generate()` or not. Useful e.g. for chatbots.
    timeout (`float`, *optional*):
        The timeout for the text queue. If `None`, the queue will block indefinitely. Useful to handle exceptions
        in `.generate()`, when it is called in a separate thread.
    decode_kwargs (`dict`, *optional*):
        Additional keyword arguments to pass to the tokenizer's `decode` method.

Raises:
    TimeoutError: If token generation time exceeds timeout value.

Examples:

    ```python
    >>> from transformers import AutoModelForCausalLM, AutoTokenizer, AsyncTextIteratorStreamer
    >>> from threading import Thread
    >>> import asyncio

    >>> tok = AutoTokenizer.from_pretrained("openai-community/gpt2")
    >>> model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")
    >>> inputs = tok(["An increasing sequence: one,"], return_tensors="pt")

    >>> # Run the generation in a separate thread, so that we can fetch the generated text in a non-blocking way.
    >>> async def main():
    ...     # Important: AsyncTextIteratorStreamer must be initialized inside a coroutine!
    ...     streamer = AsyncTextIteratorStreamer(tok)
    ...     generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=20)
    ...     thread = Thread(target=model.generate, kwargs=generation_kwargs)
    ...     thread.start()
    ...     generated_text = ""
    ...     async for new_text in streamer:
    ...         generated_text += new_text
    >>>     print(generated_text)
    >>> asyncio.run(main())
    An increasing sequence: one, two, three, four, five, six, seven, eight, nine, ten, eleven,
    ```
c                ^  > [         TU ]  " X40 UD6  [        R                  " 5       U l        S U l        X0l        [        R                  " 5       U l        [        [        SS 5      n[        R                  S:  =(       a    [        U5      U l        U R                  (       a  XPl        g S U l        g )NrZ   )      )rW   r-   asyncior   rX   rY   rZ   get_running_looploopgetattrsysversion_infocallablehas_asyncio_timeoutasyncio_timeout)r   r'   r(   rZ   r)   timeout_contextr[   s         r   r-   "AsyncTextIteratorStreamer.__init__  s     	A=A!--/,,.	!'9d;#&#3#3w#>#\8OC\ 262J2JPTr   c                    U R                   R                  U R                  R                  U5        U(       a;  U R                   R                  U R                  R                  U R                  5        gg)r^   N)rx   call_soon_threadsaferX   
put_nowaitrY   rJ   s      r   r?   +AsyncTextIteratorStreamer.on_finalized_text!  sL    		&&t'A'A4HII**4??+E+EtGWGWX r   c                    U $ rV   r   r   s    r   	__aiter__#AsyncTextIteratorStreamer.__aiter__'  rd   r   c                4  #     U R                   (       ad  U R                  bW  U R                  U R                  5       IS h  vN   U R                  R	                  5       I S h  vN nS S S 5      IS h  vN   O?[
        R                  " U R                  R	                  5       U R                  S9I S h  vN nWU R                  :X  a
  [        5       eU$  N Nr Nd! , IS h  vN  (       d  f       N9= f N@! [
        R                   a    [        5       ef = f7frf   )
r}   r~   rZ   rX   rg   rv   wait_forrY   StopAsyncIterationTimeoutErrorr   s     r   	__anext__#AsyncTextIteratorStreamer.__anext__*  s     	''D,@,@,L//=="&//"5"5"77E >== &..t/B/B/Ddll[[ ((((** >7 >=== \## 	!. 	!s   D=C6 CC6 C#C$C(C6 3C4?C6 3C44C6 8DC6 CC6 C1 C#!C1-C6 6DD)r~   r}   rx   rY   rX   rZ   rk   rl   rO   rQ   )r   r   r   r    r!   r-   r?   r   r   r"   rn   ro   s   @r   rq   rq      s\    ,b " $	U*U U 	U
 U U Y r   rq   c                  j   ^  \ rS rSrSr  S	       S
U 4S jjjrS rS rU 4S jrU 4S jr	Sr
U =r$ )TextDiffusionStreameri:  a  
Streamer that prints text diffusion outputs. Intermediate diffusion steps (drafts) are temporary
and overwritten by subsequent drafts, and removed when confirmed text is printed.

<Tip warning={true}>

If you're running on an environment like tmux, the draft text may fail to overwrite itself.

</Tip>


Parameters:
    tokenizer (`AutoTokenizer`):
        The tokenized used to decode the tokens.
    skip_prompt (`bool`, *optional*, defaults to `False`):
        Whether to skip the prompt to `.generate()` or not. Useful e.g. for chatbots.
    sleep_time (`float`, *optional*):
        Time to sleep between diffusion drafts, which may be helpful to visualize intermediate outputs.
    decode_kwargs (`dict`, *optional*):
        Additional keyword arguments to pass to the tokenizer's `decode` method.

Examples:

    ```python
    >>> from transformers import DiffusionGemmaForBlockDiffusion, AutoProcessor, TextDiffusionStreamer

    >>> model = DiffusionGemmaForBlockDiffusion.from_pretrained(
    ...     "google/diffusiongemma-26B-A4B-it", device_map="auto",
    ... )
    >>> processor = AutoProcessor.from_pretrained("google/diffusiongemma-26B-A4B-it")

    >>> chat = [{"role": "user", "content": "Why is the sky blue?"},]
    >>> input_ids = processor.apply_chat_template(
    ...     chat, tokenize=True, return_tensors="pt", add_generation_prompt=True
    ... )
    >>> streamer = TextDiffusionStreamer(tokenizer=processor.tokenizer)
    >>> model.generate(input_ids.to(model.device), max_new_tokens=512, streamer=streamer)
    ```
c                P   > [         TU ]  " X40 UD6  SU l        SU l        X0l        g )NF)rW   r-   
_has_draft_takes_logits
sleep_time)r   r'   r(   r   r)   r[   s        r   r-   TextDiffusionStreamer.__init__c  s-     	A=A #$r   c                L    U R                   (       a  [        SSSS9  SU l         g g )Nz8[JrD   Tr   rH   F)r   rI   r   s    r   _clear_draft"TextDiffusionStreamer._clear_draftr  s!    ??-Rt4#DO r   c                   U R                  5         [        UR                  5      S:  a  UR                  S   S:  a  [        S5      e[        UR                  5      S:  a  US   nU R                  R
                  " U40 U R                  D6n[        SSSS9  [        SU S	3SSS9  SU l        U R                  b!  [        R                  " U R                  5        g
g
)zq
Receives the full sequence of draft tokens, decodes them, and prints them in yellow.
Overwrites previous draft.
r0   r   z0TextDiffusionStreamer only supports batch size 1z7rD   Tr   z[33mz[0mN)r   r4   r5   r6   r'   r:   r)   rI   r   r   timesleep)r   r   kwargsr@   s       r   	put_draftTextDiffusionStreamer.put_draftx  s    
 	u{{aEKKNQ$6OPP!!HE~~$$UAd.@.@A 	g2T*g&Bd;??&JJt' 'r   c                D   > U R                  5         [        TU ]	  U5        g)zEReceives confirmed tokens, clears draft, and prints them permanently.N)r   rW   r   )r   r   r[   s     r   r   TextDiffusionStreamer.put  s    Er   c                B   > U R                  5         [        TU ]	  5         g)z1Flushes any remaining cache and prints a newline.N)r   rW   r   )r   r[   s    r   r   TextDiffusionStreamer.end  s    r   )r   r   r   rk   )r'   r	   r(   rP   r   rm   r)   r   )r   r   r   r    r!   r-   r   r   r   r   r"   rn   ro   s   @r   r   r   :  sZ    &V "#'	%*% % !	%
 % %$(,
 r   r   )
__future__r   rv   rz   r   queuer   typingr   r   r   tokenization_utils_baser	   r   r$   rS   rq   r   r   r   r   <module>r      si    #  
   + + A$ $p< pfB< BJU Up\L \r   