
    3j                    l   S r SSKJr  SSKrSSKJr  SSKJr  SSKrSSK	J
r
  SS	KJr  SS
KJr  \
R                  " \5      rSSSS.r\" SS9 " S S5      5       r\R(                  SS j5       r\R,                  R.                                            SS j5       r          SS jrg)zSonicMoE integration: fused MoE using CuteDSL kernels from `kernels-community/sonic-moe`.

Provides `sonicmoe_experts_forward` registered as "sonicmoe" in the ExpertsInterface.
Requirements: CUDA, `kernels`, `nvidia-cutlass-dsl`, has_gate=True.
    )annotationsN)Callable)	dataclass   )logging   )lazy_load_kernel)to_localswiglugeglureglu)silugelureluT)frozenc                  .    \ rS rSr% SrS\S'   S\S'   Srg)	SonicMoE(   zAEntry points exposed by the `kernels-community/sonic-moe` kernel.typeactivation_type_enumr   moe_general_routing_inputs N)__name__
__module____qualname____firstlineno____doc____annotations____static_attributes__r       \/home/wildlama/miniconda3/lib/python3.13/site-packages/transformers/integrations/sonicmoe.pyr   r   (   s    K ((r    r   c                    [         R                  R                  5       (       d  [        S5      e[         R                  R	                  5       S   n U S:  a  [        SU  S35      e[        S5      nUc  [        S5      e[        [        US	S5      S
S5      n[        USS5      nSU4SU44 VVs/ s H  u  pEUb  M
  UPM     nnnU(       a  [        SSR                  U5       S35      e[        UUS9$ s  snnf )z
Load sonic-moe once and return its entry points.

Raises `ImportError` if CUDA/hardware requirements are not met, or if the kernel or
required symbols are not found.
zdsonic-moe kernel requires CUDA, but CUDA is not available. Use a different `experts_implementation`.r   	   z`sonic-moe requires a Hopper (SM90+) or newer GPU, but the current device has compute capability z-.x. Use a different `experts_implementation`.z	sonic-moeNu}   Failed to load the sonic-moe kernel — check that `kernels-community/sonic-moe` has a build matching the current torch/CUDA.enumsActivationTyper   zenums.ActivationTypez.sonic-moe kernel is missing required symbols: z, zN. Make sure you have the `kernels` package and `nvidia-cutlass-dsl` installed.)r   r   )	torchcudais_availableImportErrorget_device_capabilityr	   getattrjoinr   )majorkernelr   r   nameattrmissings          r!   _load_sonicmoe_kernelr2   0   sA    ::""$$r
 	

 JJ,,.q1Eqy&&+W,Y[
 	

 k*F~;
 	

 #767D#ACSUYZ!(1Mt!T
 $%9:)+EF

JD  	
   <TYYw=O<P Q[ [
 	

 1#= s   .	C3;C3c                    [        5       nUR                  n[        U[        R	                  US5      R                  5       UR                  5      nUR                  U UUUUUUUU	UUU
SS9u  nnU$ )uW  Module-level shim around `moe_general_routing_inputs` so `allow_in_graph` can wrap it.

sonicmoe asserts `not torch.compiler.is_compiling()` internally because it dispatches
CuteDSL kernels, which Dynamo can't trace. `allow_in_graph` keeps the call in the FX
graph as a single opaque node (no tracing into the body, no graph break) while still
running the real Python at runtime — autograd through `_UpProjection` / `_DownProjection`
flows normally. The decorator must be applied at module load time, not inside the compiled
function — hence this shim plus the `allow_in_graph` decorator above.
r   N)Eactivation_typeis_inference_mode_enabledconcat_layout	stream_id)r2   r   r+   ACT_MAPgetupperSWIGLUr   )hidden_statesrouter_scores
expert_ids	token_idxw1b1w2b2act_namenum_expertsr7   r6   sonicmoer   r5   output_s                    r!   _sonicmoe_wrapperrJ   d   s    0 %&H#88gkk(H=CCEG[GbGbO 33




'";# 4 IFA Mr    c                6   U R                   (       d  [        S5      eUR                  R                  S:w  a  [        S5      eUR                  nUR	                  S5      nUR	                  S5      n[
        R                  " XdS9R                  S5      R                  SU5      R                  S5      R                  5       nUR                  S5      R                  UR                  5      nUR                  S5      R                  5       n	[        U R                  5      n
[        U R                  5      nU R                   (       a  [        U R"                  5      OS nU R                   (       a  [        U R$                  5      OS n['        U R(                  SS	5      R+                  5       nU R,                  (       a  S
OSnU
R.                  " U6 n
UR.                  " U6 n[1        UUU	UU
UUUUU R2                  U R4                  [
        R6                  " 5       (       + S9$ )Nz/sonicmoe requires gated experts (has_gate=True)r'   zsonicmoe requires CUDA devicer   )devicer   
hidden_actr   )r   r   r   )r   r   r   )r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   r7   r6   )has_gate
ValueErrorrM   r   sizer&   arange	unsqueezeexpandreshapeinttodtyper
   gate_up_proj	down_projhas_biasgate_up_proj_biasdown_proj_biasr+   configloweris_transposedpermuterJ   rF   is_concatenatedis_grad_enabled)selfr=   top_k_indextop_k_weightsrM   	num_top_k
num_tokensr@   r>   r?   rA   rC   rB   rD   rE   perms                   r!   sonicmoe_experts_forwardrj      s    ==JKK  F*899!!F  $I##A&J Z7AA!DKKBPYZbbcefjjlI!))"-001D1DEM$$R(,,.J 
$##	$B	$..	!B-1]]$((	)B*.--$%%	&TB t{{L&9??AH **9	D	T	B	T	B##$$**&+&;&;&="= r    )returnr   )r=   torch.Tensorr>   rl   r?   rl   r@   rl   rA   rl   rB   torch.Tensor | NonerC   rl   rD   rm   rE   strrF   rV   r7   boolr6   ro   rk   rl   )
rd   ztorch.nn.Moduler=   rl   re   rl   rf   rl   rk   rl   )r   
__future__r   	functoolscollections.abcr   dataclassesr   r&   utilsr   hub_kernelsr	   tensor_parallelr
   
get_loggerr   loggerr9   r   cacher2   _dynamoallow_in_graphrJ   rj   r   r    r!   <module>r|      sK   #  $ !   ) % 
		H	% Wg
> $) ) ) 0 0f +++ + 	+
 	+ 	+ 	+ 	+ + + +  $+ + +\5
55 5  	5
 5r    