
    3j֗                   R    S SK Jr  S SKJr  SSKJrJrJrJr   " S S5      r	S	S jr
g)
    )annotations)Sequence   )
MODEL_ARCHMODEL_TENSORMODEL_TENSORSTENSOR_NAMESc                  '   \ rS rSr% 0 \R
                  S_\R                  S_\R                  S_\R                  S_\R                  S_\R                  S_\R                  S_\R                  S	_\R                  S
_\R                  S_\R                  S_\R                   S_\R"                  S_\R$                  S_\R&                  S_\R(                  S_\R*                  S_\R,                  S\R.                  S\R0                  S\R2                  S\R4                  S\R6                  S\R8                  S\R:                  S\R<                  S\R>                  S\R@                  S0Er!S\"S'   0 \RF                  S_\RH                  S _\RJ                  S!_\RL                  S"_\RN                  S#_\RP                  S$_\RR                  S%_\RT                  S&_\RV                  S'_\RX                  S(_\RZ                  S)_\R\                  S*_\R^                  S+_\R`                  S,_\Rb                  S-_\Rd                  S._\Rf                  S/_0 \Rh                  S0_\Rj                  S1_\Rl                  S2_\Rn                  S3_\Rp                  S4_\Rr                  S5_\Rt                  S6_\Rv                  S7_\Rx                  S8_\Rz                  S9_\R|                  S:_\R~                  S;_\R                  S<_\R                  S=_\R                  S>_\R                  S?_\R                  S@_E0 \R                  SA_\R                  SB_\R                  SC_\R                  SD_\R                  SE_\R                  SF_\R                  SG_\R                  SH_\R                  SI_\R                  SJ_\R                  SK_\R                  SL_\R                  SM_\R                  SN_\R                  SO_\R                  SP_\R                  SQ_E0 \R                  SR_\R                  SS_\R                  ST_\R                  SU_\R                  SV_\R                  SW_\R                  SX_\R                  SY_\R                  SZ_\R                  S[_\R                  S\_\R                  S]_\R                  S^_\R                  S__\R                  S`_\R                  Sa_\R                  Sb_E0 \R                  Sc_\R                  Sd_\R                  Se_\R                  Sf_\R                  Sg_\R                  Sh_\R                  Si_\R                  Sj_\R                  Sk_\R                  Sl_\R                  Sm_\R                  Sn_\R                  So_\R                  Sp_\R                  Sq_\R                  Sr_\R                  Ss_E0 \R                  St_\R                  Su_\R                  Sv_\R                  Sw_\R                  Sx_\R                  Sy_\R                  Sz_\R                  S{_\R                  S|_\GR                   S}_\GR                  S~_\GR                  S_\GR                  S_\GR                  S_\GR
                  S_\GR                  S_\GR                  S_E0 \GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                   S_\GR"                  S_\GR$                  S_\GR&                  S_\GR(                  S_\GR*                  S_\GR,                  S_\GR.                  S_\GR0                  S_E0 \GR2                  S_\GR4                  S_\GR6                  S_\GR8                  S_\GR:                  S_\GR<                  S_\GR>                  S_\GR@                  S_\GRB                  S_\GRD                  S_\GRF                  S_\GRH                  S_\GRJ                  S_\GRL                  S_\GRN                  S_\GRP                  S_\GRR                  S_E0 \GRT                  S_\GRV                  S_\GRX                  S_\GRZ                  S_\GR\                  S_\GR^                  S_\GR`                  S_\GRb                  S_\GRd                  S_\GRf                  S_\GRh                  S_\GRj                  S_\GRl                  S_\GRn                  S_\GRp                  S_\GRr                  S_\GRt                  S_E0 \GRv                  S_\GRx                  S_\GRz                  S_\GR|                  S_\GR~                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_E0 \GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_E0 \GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_E0 \GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\R&                  S_\GR                  S_\R(                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_\GR                  S_E0 \GR                  S_\GR                  S_\GR                  S_\GR                   S_\GR                  S_\GR                  GS _\GR                  GS_\GR                  GS_\GR
                  GS_\GR                  GS_\GR                  GS_\GR                  GS_\GR                  GS_\GR                  GS_\GR                  GS	_\GR                  GS
_\GR                  GS_E0 \GR                  GS_\GR                  GS_\GR                   GS_\GR"                  GS_\GR$                  GS_\GR&                  GS_\GR(                  GS_\GR*                  GS_\GR,                  GS_\GR.                  GS_\GR0                  GS_\GR2                  GS_\GR4                  GS_\GR6                  GS_\GR8                  GS_\GR:                  GS_\GR<                  GS_E0 \GR>                  GS_\GR@                  GS_\GRB                  GS_\GRD                  GS _\GRF                  GS!_\GRH                  GS"_\GRJ                  GS#_\GRL                  GS$_\GRN                  S_\GRP                  GS%_\GRR                  GS&_\GRT                  GS'_\GRV                  GS(_\GRX                  GS)_\GRZ                  GS*_\GR\                  GS+_\GR^                  GS,_E0 \GR`                  GS-_\GRb                  GS._\GRd                  GS/_\GRf                  GS0_\GRh                  GS1_\GRj                  GS2_\GRl                  GS3_\GRn                  GS4_\GRp                  S_\GRr                  GS5_\GRt                  GS6_\GRv                  GS7_\GRx                  GS8_\GRz                  GS9_\GR|                  GS:_\GR~                  GS;_\GR                  GS<_E0 \GR                  GS=_\GR                  GS>_\GR                  GS?_\GR                  GS@_\GR                  GSA_\GR                  GSB_\GR                  GSC_\GR                  GSD_\GR                  GSE_\GR                  GSF_\GR                  GSG_\GR                  GSH_\GR                  GSI_\GR                  GSJ_\GR                  GSK_\GR                  GSL_\GR                  GSM_E0 \GR                  GSN_\GR                  GSO_\GR                  GSP_\GR                  GSQ_\GR                  GSR_\GR                  GSS_\GR                  GST_\GR                  GSU_\GR                  GSV_\GR                  GSW_\GR                  GSX_\GR                  GSY_\GR                  GSZ_\GR                  GS[_\GR                  GS\_\GR                  GS]_\GR                  GS^_E\GR                  GS_\GR                  GS`\GR                  GSa0EGrfS\"GSb'   G\gGR                  \R^                  GSc\GR                  GSd00GrjGSe\"GSf'   GSg\"GSh'   GSrGSi jGrkGSsGStGSj jjGrlGSsGSuGSk jjGrmGSsGSvGSl jjGrnGSwGSm jGroGSxGSn jGrpGSyGSo jGrqGSpGrrGgq(z  TensorNameMap   )zgpt_neox.embed_inztransformer.wteztransformer.word_embeddingsword_embeddingszmodel.embed_tokensembed_tokenstok_embeddingszembeddings.word_embeddingszembeddings.tok_embeddingswteztransformer.embd.wtezmodel.tok_embeddingszmodel.embeddingzbackbone.embeddingzbackbone.embeddingsztransformer.in_out_embedzembedding.word_embeddingsztransformer.token_embeddingssharedzrwkv.embeddingszmodel.embeddingszmodel.word_embeddingsencoderzmodel.transformer.wter   )z embeddings.token_type_embeddings)word_embeddings_layernormzembeddings.LayerNormzembeddings.normemb_lntransformer.normrwkv.blocks.0.pre_lnr   zmodel.pre_lnzmodel.layers.0.pre_normzbackbone.normzmodel.embedding_norm)ztransformer.wpezembeddings.position_embeddingswpezmodel.embed_positions)	embed_outlm_headoutputword_embeddings_for_headzlm_head.linearoutput_layerheadzhead.outr   zmodel.transformer.ff_outzhead.decoder)dense_2_out)dense_3_out)zgpt_neox.final_layer_normztransformer.ln_f
model.normnormztransformer.norm_fln_fzmodel.final_layernormz
lm_head.lnzmodel.norm_fzbackbone.norm_fztransformer.rms_normzencoder.final_layernormr   r    zrwkv.ln_outzmodel.ln_outzbackbone.final_layer_normr    zmodel.transformer.ln_f
final_normr    )z
rope.freqszrotary_pos_emb.inv_freq )zbackbone.embed)zmodel.embed_vision.embedding)z&model.embed_vision.hard_embedding_norm)z'model.embed_vision.embedding_projection)z&model.embed_vision.soft_embedding_norm)z,model.vision_tower.timm_model.conv_stem.conv)z*model.vision_tower.timm_model.conv_stem.bn)z2model.vision_tower.timm_model.msfa.ffn.pw_exp.conv)z0model.vision_tower.timm_model.msfa.ffn.pw_exp.bn)z3model.vision_tower.timm_model.msfa.ffn.pw_proj.conv)z1model.vision_tower.timm_model.msfa.ffn.pw_proj.bn)z'model.vision_tower.timm_model.msfa.norm)zencoder.out)zencoder.out_mid)zprojector.query)zprojector.qformer.layernorm)zprojector.linearz#dict[MODEL_TENSOR, tuple[str, ...]]mappings_cfg) z%gpt_neox.layers.{bid}.input_layernormztransformer.h.{bid}.ln_1ztransformer.blocks.{bid}.norm_1z#transformer.h.{bid}.input_layernormzh.{bid}.input_layernormztransformer.h.{bid}.ln_mlp"model.layers.{bid}.input_layernormzlayers.{bid}.attention_normmodel.layers.{bid}.ln1zh.{bid}.ln_1ztransformer.h.{bid}.lnzmodel.layers.layers.{bid}.normz(model.layers.layers.{bid}.pre_mixer_normz!model.layers.{bid}.attention_normzmodel.layers.{bid}.normzbackbone.layers.{bid}.normz(transformer.decoder_layer.{bid}.rms_normz model.layers.{bid}.pre_attn_normz.transformer.blocks.{bid}.norm_attn_norm.norm_1z$encoder.layers.{bid}.input_layernormz"transformer.layers.{bid}.attn_normzrwkv.blocks.{bid}.ln1r'   r&   layers.{bid}.input_layernormz(transformer_encoder.{bid}.attention_normzlayers.{bid}.attn_normz model.layers.{bid}.operator_normz(model.transformer.blocks.{bid}.attn_normr(   z&model.layers.{bid}.attention_layernormz*model.layers.{bid}.pre_attention_layernorm)ztransformer.h.{bid}.ln_attnz encoder.layer.{bid}.layer_norm_1zrwkv.blocks.{bid}.ln2model.layers.{bid}.ln2+model.layers.{bid}.post_attention_layernorm)z/gpt_neox.layers.{bid}.attention.query_key_valueztransformer.h.{bid}.attn.c_attnz"transformer.blocks.{bid}.attn.Wqkvz1transformer.blocks.{bid}.norm_attn_norm.attn.Wqkvz2transformer.h.{bid}.self_attention.query_key_valuez&h.{bid}.self_attention.query_key_valuez,model.layers.{bid}.self_attn.query_key_valuez,model.layers.{bid}.attention.query_key_valuezh.{bid}.attn.c_attnztransformer.h.{bid}.mixer.Wqkvzencoder.layers.{bid}.attn.Wqkvzencoder.layers.{bid}.mixer.Wqkvz%model.layers.{bid}.self_attn.qkv_projz(model.layers.layers.{bid}.mixer.qkv_projz3encoder.layers.{bid}.self_attention.query_key_valuez&transformer.layers.{bid}.attn.qkv_projztransformer_encoder.{bid}.qkvzlayers.{bid}.attn.Wqkvz<model.layers.{bid}.self_attn.language_expert_query_key_valuez*model.layers.{bid}.linear_attn.in_proj_qkv)#model.layers.{bid}.self_attn.q_projlayers.{bid}.self_attn.q_projz+model.layers.{bid}.self_attn.q_proj_no_permzlayers.{bid}.attention.wqz(encoder.layer.{bid}.attention.self.queryz'transformer.layer.{bid}.attention.q_linztransformer.h.{bid}.attn.q_projz*model.layers.layers.{bid}.self_attn.q_projzmodel.layers.{bid}.attention.wqz:transformer.decoder_layer.{bid}.multi_head_attention.queryz)transformer.h.{bid}.attn.attention.q_projr+   z%model.transformer.blocks.{bid}.q_projr,   z"backbone.layers.{bid}.mixer.q_proj)#model.layers.{bid}.self_attn.k_projlayers.{bid}.self_attn.k_projz+model.layers.{bid}.self_attn.k_proj_no_permzlayers.{bid}.attention.wkz&encoder.layer.{bid}.attention.self.keyz'transformer.layer.{bid}.attention.k_linztransformer.h.{bid}.attn.k_projztransformer.h.{bid}.attn.kz*model.layers.layers.{bid}.self_attn.k_projzmodel.layers.{bid}.attention.wkz8transformer.decoder_layer.{bid}.multi_head_attention.keyz)transformer.h.{bid}.attn.attention.k_projr-   z%model.transformer.blocks.{bid}.k_projr.   z"backbone.layers.{bid}.mixer.k_proj)#model.layers.{bid}.self_attn.v_projlayers.{bid}.self_attn.v_projzlayers.{bid}.attention.wvz(encoder.layer.{bid}.attention.self.valuez'transformer.layer.{bid}.attention.v_linztransformer.h.{bid}.attn.v_projztransformer.h.{bid}.attn.vz*model.layers.layers.{bid}.self_attn.v_projzmodel.layers.{bid}.attention.wvz:transformer.decoder_layer.{bid}.multi_head_attention.valuez)transformer.h.{bid}.attn.attention.v_projr/   z%model.transformer.blocks.{bid}.v_projr0   z"backbone.layers.{bid}.mixer.v_proj)"z%gpt_neox.layers.{bid}.attention.denseztransformer.h.{bid}.attn.c_projz&transformer.blocks.{bid}.attn.out_projz(transformer.h.{bid}.self_attention.densezh.{bid}.self_attention.dense#model.layers.{bid}.self_attn.o_projlayers.{bid}.self_attn.o_projz%model.layers.{bid}.self_attn.out_projz(model.layers.{bid}.self_attn.linear_attnzlayers.{bid}.attention.woz*encoder.layer.{bid}.attention.output.densezlayers.{bid}.attn.Woz)transformer.layer.{bid}.attention.out_linz!transformer.h.{bid}.attn.out_projz"model.layers.{bid}.self_attn.densez"model.layers.{bid}.attention.densezh.{bid}.attn.c_projz"transformer.h.{bid}.mixer.out_projz*model.layers.layers.{bid}.self_attn.o_projz&model.layers.layers.{bid}.mixer.o_projzmodel.layers.{bid}.attention.woz"encoder.layers.{bid}.attn.out_projz#encoder.layers.{bid}.mixer.out_projz;transformer.decoder_layer.{bid}.multi_head_attention.linearz5transformer.blocks.{bid}.norm_attn_norm.attn.out_projz)encoder.layers.{bid}.self_attention.densez&transformer.layers.{bid}.attn.out_projz+transformer.h.{bid}.attn.attention.out_projr1   ztransformer_encoder.{bid}.woz'model.transformer.blocks.{bid}.attn_outr2   z"backbone.layers.{bid}.mixer.o_projz2model.layers.{bid}.self_attn.language_expert_dense)z.encoder.layer.{bid}.attention.output.LayerNormz%transformer.layer.{bid}.sa_layer_normzencoder.layers.{bid}.norm1z*transformer.decoder_layer.{bid}.rms_norm_1z!model.layers.{bid}.post_attn_normz.transformer.blocks.{bid}.norm_attn_norm.norm_2)r*   %layers.{bid}.post_attention_layernormz+model.layers.{bid}.post_self_attn_layernormz0model.layers.layers.{bid}.post_mixer_norm.weight)z0model.layers.{bid}.self_attn.rotary_emb.inv_freqz1layers.{bid}.attention.inner_attention.rope.freqsz7model.layers.layers.{bid}.self_attn.rotary_emb.inv_freqz,transformer.h.{bid}.attn.rotary_emb.inv_freq)z"model.layers.{bid}.self_attn.sinksz0model.layers.{bid}.self_attn.attention_sink_bias)z&model.layers.{bid}.self_attn.gate_projz(model.layers.{bid}.linear_attn.in_proj_zz#model.layers.{bid}.self_attn.g_proj)z.gpt_neox.layers.{bid}.post_attention_layernormztransformer.h.{bid}.ln_2z h.{bid}.post_attention_layernormztransformer.blocks.{bid}.norm_2r*   zlayers.{bid}.ffn_normr)   zh.{bid}.ln_2zmodel.layers.{bid}.ffn_normz*transformer.decoder_layer.{bid}.rms_norm_2zmodel.layers.{bid}.pre_moe_normz-encoder.layers.{bid}.post_attention_layernormz!transformer.layers.{bid}.ffn_normz#model.layers.{bid}.pre_ff_layernormz$model.layers.{bid}.pre_moe_layernormr*   z"transformer_encoder.{bid}.ffn_normz&model.layers.layers.{bid}.pre_mlp_normz&model.transformer.blocks.{bid}.ff_normr3   z(model.layers.{bid}.feedforward_layernorm$model.layers.{bid}.pre_mlp_layernormzlayers.{bid}.mlp_norm)z,model.layers.{bid}.pre_feedforward_layernormz&layers.{bid}.pre_feedforward_layernormz*model.layers.{bid}.pre_ff_layernorm.weightr4   )z.model.layers.{bid}.pre_feedforward_layernorm_2)z-model.layers.{bid}.post_feedforward_layernormz'layers.{bid}.post_feedforward_layernormz%model.layers.{bid}.post_mlp_layernormz.model.layers.layers.{bid}.post_mlp_norm.weight'model.layers.{bid}.feed_forward.up_projz model.layers.{bid}.post_moe_norm)z/model.layers.{bid}.post_feedforward_layernorm_1)z/model.layers.{bid}.post_feedforward_layernorm_2)zlayers.{bid}.feed_forward.gatez(model.layers.{bid}.block_sparse_moe.gatezmodel.layers.{bid}.mlp.gatez&transformer.decoder_layer.{bid}.routerz)transformer.blocks.{bid}.ffn.router.layerz0model.layers.{bid}.block_sparse_moe.router.layerz&model.layers.{bid}.feed_forward.routerz%encoder.layers.{bid}.mlp.router.layerzmodel.layers.{bid}.mlp.routerzmodel.layers.{bid}.mlp.gate.wgz2model.layers.{bid}.block_sparse_moe.primary_routerz$model.layers.{bid}.feed_forward.gatez"model.layers.{bid}.mlp.router.gatezlayers.{bid}.gatez backbone.layers.{bid}.mixer.gatezmodel.layers.{bid}.moe.gatezmodel.layers.{bid}.router.proj)z)model.layers.{bid}.mlp.shared_expert_gate)
z.model.layers.{bid}.mlp.gate.e_score_correctionz5model.layers.{bid}.mlp.moe_statics.e_score_correctionz'model.layers.{bid}.mlp.gate.expert_biasz"model.layers.{bid}.mlp.expert_biasz+model.layers.{bid}.feed_forward.expert_biasz6model.layers.{bid}.block_sparse_moe.e_score_correctionz3backbone.layers.{bid}.mixer.gate.e_score_correctionz)model.layers.{bid}.mlp.e_score_correctionz;model.layers.{bid}.block_sparse_moe.gate.e_score_correctionz"model.layers.{bid}.moe.router_bias)&z'gpt_neox.layers.{bid}.mlp.dense_h_to_4hztransformer.h.{bid}.mlp.c_fcz$transformer.blocks.{bid}.ffn.up_projz%transformer.h.{bid}.mlp.dense_h_to_4hzh.{bid}.mlp.dense_h_to_4hzmodel.layers.{bid}.mlp.up_projlayers.{bid}.mlp.up_projzlayers.{bid}.feed_forward.w3z&encoder.layer.{bid}.intermediate.densezlayers.{bid}.mlp.Wiz transformer.layer.{bid}.ffn.lin1ztransformer.h.{bid}.mlp.fc_inz transformer.h.{bid}.mlp.linear_3z$model.layers.{bid}.mlp.dense_h_to_4hztransformer.h.{bid}.mlp.w1zh.{bid}.mlp.c_fcztransformer.h.{bid}.mlp.fc1zmodel.layers.{bid}.mlp.fc1z#model.layers.{bid}.mlp.gate_up_projz%model.layers.layers.{bid}.mlp.up_projz*model.layers.layers.{bid}.mlp.gate_up_projz"model.layers.{bid}.feed_forward.w3zencoder.layers.{bid}.mlp.fc11zencoder.layers.{bid}.mlp.fc1zmodel.layers.{bid}.mlp.c_fcz&encoder.layer.{bid}.mlp.gated_layers_vz$encoder.layer.{bid}.mlp.gated_layersz&encoder.layer.{bid}.mlp.up_gated_layerz"model.layers.{bid}.residual_mlp.w3z&encoder.layers.{bid}.mlp.dense_h_to_4hztransformer.h.{bid}.mlp.c_fc_1r5   z!transformer_encoder.{bid}.ffn.w12z&model.layers.{bid}.block_sparse_moe.upz&model.transformer.blocks.{bid}.up_projr6   z#backbone.layers.{bid}.mixer.up_projz+model.layers.{bid}.mlp.language_mlp.up_proj)	z$layers.{bid}.feed_forward.experts.w3z,transformer.decoder_layer.{bid}.moe.linear_vz+transformer.blocks.{bid}.ffn.experts.mlp.v1z&model.layers.{bid}.mlp.experts.up_projz.model.layers.{bid}.block_sparse_moe.experts.w3z/model.layers.{bid}.feed_forward.experts.up_projz'encoder.layers.{bid}.mlp.experts.mlp.w1z.model.layers.{bid}.block_sparse_moe.experts.upzmodel.layers.{bid}.moe.up_proj)	z,model.layers.{bid}.mlp.shared_expert.up_projz-model.layers.{bid}.mlp.shared_experts.up_projz5model.layers.{bid}.feed_forward.shared_expert.up_proj)model.layers.{bid}.feed_forward.down_projz)model.layers.{bid}.mlp.shared_mlp.up_projzlayers.{bid}.shared_experts.w3z2backbone.layers.{bid}.mixer.shared_experts.up_projz:model.layers.{bid}.block_sparse_moe.shared_experts.up_projz'model.layers.{bid}.share_expert.up_proj)z,model.layers.{bid}.mlp.chunk_experts.up_proj)z transformer.blocks.{bid}.ffn.act)z model.layers.{bid}.mlp.gate_projlayers.{bid}.mlp.gate_projzlayers.{bid}.feed_forward.w1ztransformer.h.{bid}.mlp.w2ztransformer.h.{bid}.mlp.c_fc2z'model.layers.layers.{bid}.mlp.gate_projz"model.layers.{bid}.feed_forward.w1zencoder.layers.{bid}.mlp.fc12z&encoder.layer.{bid}.mlp.gated_layers_wz transformer.h.{bid}.mlp.linear_1z"model.layers.{bid}.residual_mlp.w1ztransformer.h.{bid}.mlp.c_fc_0z)model.layers.{bid}.feed_forward.gate_projz&model.transformer.blocks.{bid}.ff_projr8   z-model.layers.{bid}.mlp.language_mlp.gate_proj)z$layers.{bid}.feed_forward.experts.w1z*transformer.decoder_layer.{bid}.moe.linearz+transformer.blocks.{bid}.ffn.experts.mlp.w1z(model.layers.{bid}.mlp.experts.gate_projz.model.layers.{bid}.block_sparse_moe.experts.w1z1model.layers.{bid}.feed_forward.experts.gate_projz0model.layers.{bid}.block_sparse_moe.experts.gatez model.layers.{bid}.moe.gate_proj)z.model.layers.{bid}.mlp.shared_expert.gate_projz/model.layers.{bid}.mlp.shared_experts.gate_projz7model.layers.{bid}.feed_forward.shared_expert.gate_projz+model.layers.{bid}.mlp.shared_mlp.gate_projzlayers.{bid}.shared_experts.w1z<model.layers.{bid}.block_sparse_moe.shared_experts.gate_projz)model.layers.{bid}.share_expert.gate_proj)z.model.layers.{bid}.mlp.chunk_experts.gate_proj)z+model.layers.{bid}.mlp.experts.gate_up_projz'model.layers.{bid}.experts.gate_up_proj)z+backbone.layers.{bid}.mixer.fc1_latent_proj)z+backbone.layers.{bid}.mixer.fc2_latent_proj)!z'gpt_neox.layers.{bid}.mlp.dense_4h_to_hztransformer.h.{bid}.mlp.c_projz&transformer.blocks.{bid}.ffn.down_projz%transformer.h.{bid}.mlp.dense_4h_to_hzh.{bid}.mlp.dense_4h_to_hz model.layers.{bid}.mlp.down_projlayers.{bid}.mlp.down_projzlayers.{bid}.feed_forward.w2z encoder.layer.{bid}.output.densezlayers.{bid}.mlp.Woz transformer.layer.{bid}.ffn.lin2ztransformer.h.{bid}.mlp.fc_outz$model.layers.{bid}.mlp.dense_4h_to_hzh.{bid}.mlp.c_projztransformer.h.{bid}.mlp.fc2zmodel.layers.{bid}.mlp.fc2z'model.layers.layers.{bid}.mlp.down_projz"model.layers.{bid}.feed_forward.w2zencoder.layers.{bid}.mlp.fc2zmodel.layers.{bid}.mlp.c_projzencoder.layer.{bid}.mlp.woz#transformer.layers.{bid}.ffn.proj_2z"model.layers.{bid}.residual_mlp.w2z"encoder.layer.{bid}.mlp.down_layerz&encoder.layers.{bid}.mlp.dense_4h_to_hzmodel.layers.h.{bid}.mlp.c_projr7   z transformer_encoder.{bid}.ffn.w3z(model.layers.{bid}.block_sparse_moe.downz%model.transformer.blocks.{bid}.ff_outr9   z%backbone.layers.{bid}.mixer.down_projz-model.layers.{bid}.mlp.language_mlp.down_proj)z$layers.{bid}.feed_forward.experts.w2z,transformer.decoder_layer.{bid}.moe.linear_1z+transformer.blocks.{bid}.ffn.experts.mlp.w2z(model.layers.{bid}.mlp.experts.down_projz1model.layers.{bid}.block_sparse_moe.output_linearz.model.layers.{bid}.block_sparse_moe.experts.w2z1model.layers.{bid}.feed_forward.experts.down_projz'encoder.layers.{bid}.mlp.experts.mlp.w2z0model.layers.{bid}.block_sparse_moe.experts.downz model.layers.{bid}.moe.down_projz$model.layers.{bid}.experts.down_proj)	z.model.layers.{bid}.mlp.shared_expert.down_projz/model.layers.{bid}.mlp.shared_experts.down_projz7model.layers.{bid}.feed_forward.shared_expert.down_projz+model.layers.{bid}.shared_mlp.output_linearz+model.layers.{bid}.mlp.shared_mlp.down_projzlayers.{bid}.shared_experts.w2z4backbone.layers.{bid}.mixer.shared_experts.down_projz<model.layers.{bid}.block_sparse_moe.shared_experts.down_projz)model.layers.{bid}.share_expert.down_proj)z.model.layers.{bid}.mlp.chunk_experts.down_proj)z/encoder.layers.{bid}.self_attention.q_layernormz(model.layers.{bid}.self_attn.q_layernormz,model.layers.{bid}.self_attn.query_layernorm,model.layers.{bid}.attention.query_layernormz#model.layers.{bid}.self_attn.q_normlayers.{bid}.self_attn.q_normz"transformer.blocks.{bid}.attn.q_lnz/encoder.layer.{bid}.attention.self.layer_norm_qz$transformer.layers.{bid}.attn.q_normz!model.layers.layers.{bid}.mixer.qz&model.layers.layers.{bid}.mixer.q_normr;   r:   )z/encoder.layers.{bid}.self_attention.k_layernormz(model.layers.{bid}.self_attn.k_layernormz*model.layers.{bid}.self_attn.key_layernorm*model.layers.{bid}.attention.key_layernormz#model.layers.{bid}.self_attn.k_normlayers.{bid}.self_attn.k_normz"transformer.blocks.{bid}.attn.k_lnz/encoder.layer.{bid}.attention.self.layer_norm_kz$transformer.layers.{bid}.attn.k_normz!model.layers.layers.{bid}.mixer.kz&model.layers.layers.{bid}.mixer.k_normr=   r<   )z7encoder.layers.{bid}.self_attention.rotary_emb.inv_freq)z$encoder.layer.{bid}.output.LayerNormz)transformer.layer.{bid}.output_layer_normzencoder.layers.{bid}.norm2z*transformer.decoder_layer.{bid}.rms_norm_3z!encoder.layer.{bid}.mlp.layernormz encoder.layer.{bid}.layer_norm_2z"model.layers.{bid}.final_layernorm)zmodel.layers.{bid}.layer_scalar)zmodel.embed_tokens_per_layer)z model.per_layer_model_projection)zmodel.per_layer_projection_norm)zmodel.altup_projections)zmodel.altup_unembed_projections)z'model.layers.{bid}.per_layer_input_gate)z'model.layers.{bid}.per_layer_projection)z,model.layers.{bid}.post_per_layer_input_norm)z)model.layers.{bid}.altup.correction_coefs)z-model.layers.{bid}.altup.correct_output_scale)z)model.layers.{bid}.altup.prediction_coefs)z(model.layers.{bid}.altup.modality_router)z$model.layers.{bid}.altup.router_norm)z%model.layers.{bid}.laurel.linear_left)z&model.layers.{bid}.laurel.linear_right)z*model.layers.{bid}.laurel.post_laurel_norm)zmodel.layers.{bid}.in_projz#backbone.layers.{bid}.mixer.in_projz model.layers.{bid}.mamba.in_projz'model.layers.layers.{bid}.mixer.in_projz+model.layers.{bid}.linear_attn.in_proj_qkvz)zmodel.layers.{bid}.conv1dz"backbone.layers.{bid}.mixer.conv1dzmodel.layers.{bid}.mamba.conv1dz&model.layers.layers.{bid}.mixer.conv1dz%model.layers.{bid}.linear_attn.conv1d)zmodel.layers.{bid}.x_projz"backbone.layers.{bid}.mixer.x_projzmodel.layers.{bid}.mamba.x_projz)model.layers.layers.{bid}.mixer.bcdt_proj)zmodel.layers.{bid}.dt_projz#backbone.layers.{bid}.mixer.dt_projz model.layers.{bid}.mamba.dt_projz'model.layers.layers.{bid}.mixer.dt_projz&model.layers.{bid}.linear_attn.dt_projzbackbone.layers.{bid}.mixer.dtz$model.layers.{bid}.self_attn.dt_proj)z.model.layers.layers.{bid}.mixer.dt_norm.weightz%model.layers.{bid}.mamba.dt_layernorm)zmodel.layers.{bid}.A_logz!backbone.layers.{bid}.mixer.A_logzmodel.layers.{bid}.mamba.A_logz%model.layers.layers.{bid}.mixer.A_logz$model.layers.{bid}.linear_attn.A_logz"model.layers.{bid}.self_attn.A_log)z$model.layers.{bid}.mamba.b_layernormz$model.layers.{bid}.mamba.B_layernormz-model.layers.layers.{bid}.mixer.B_norm.weight)z$model.layers.{bid}.mamba.c_layernormz$model.layers.{bid}.mamba.C_layernormz-model.layers.layers.{bid}.mixer.C_norm.weight)zmodel.layers.{bid}.Dzbackbone.layers.{bid}.mixer.Dzmodel.layers.{bid}.mamba.Dz!model.layers.layers.{bid}.mixer.D)zmodel.layers.{bid}.mamba.normz#model.layers.{bid}.linear_attn.normz backbone.layers.{bid}.mixer.normz#model.layers.{bid}.self_attn.o_norm)zmodel.layers.{bid}.out_projz$backbone.layers.{bid}.mixer.out_projz!model.layers.{bid}.mamba.out_projz'model.layers.{bid}.linear_attn.out_projz(model.layers.layers.{bid}.mixer.out_proj)z(model.layers.{bid}.linear_attn.in_proj_a)z)model.layers.{bid}.linear_attn.in_proj_ba)z%model.layers.{bid}.self_attn.q_conv1d)z%model.layers.{bid}.self_attn.k_conv1d)z%model.layers.{bid}.self_attn.v_conv1d)z%model.layers.{bid}.self_attn.f_a_proj)z%model.layers.{bid}.self_attn.f_b_proj)z(model.layers.{bid}.linear_attn.in_proj_bz#model.layers.{bid}.self_attn.b_proj)z%model.layers.{bid}.self_attn.g_a_proj)z%model.layers.{bid}.self_attn.g_b_proj)zmodel.layers.{bid}.attention.w0)z'rwkv.blocks.{bid}.attention.time_maa_w1z(model.layers.{bid}.self_attn.time_maa_w1zmodel.layers.{bid}.attention.w1)z'rwkv.blocks.{bid}.attention.time_maa_w2z(model.layers.{bid}.self_attn.time_maa_w2zmodel.layers.{bid}.attention.w2)zmodel.layers.{bid}.attention.a0)zmodel.layers.{bid}.attention.a1)zmodel.layers.{bid}.attention.a2)zmodel.layers.{bid}.attention.v0)zmodel.layers.{bid}.attention.v1)zmodel.layers.{bid}.attention.v2)zmodel.layers.{bid}.attention.g1)zmodel.layers.{bid}.attention.g2)z model.layers.{bid}.attention.k_k)z model.layers.{bid}.attention.k_a)z model.layers.{bid}.attention.r_k)z&rwkv.blocks.{bid}.attention.time_maa_xz'model.layers.{bid}.self_attn.time_maa_x)z&rwkv.blocks.{bid}.attention.time_maa_kz'model.layers.{bid}.self_attn.time_maa_k)z&rwkv.blocks.{bid}.attention.time_maa_vz'model.layers.{bid}.self_attn.time_maa_v)z&rwkv.blocks.{bid}.attention.time_maa_rz'model.layers.{bid}.self_attn.time_maa_r)z&rwkv.blocks.{bid}.attention.time_maa_gz'model.layers.{bid}.self_attn.time_maa_g)z&rwkv.blocks.{bid}.attention.time_maa_wz'model.layers.{bid}.self_attn.time_maa_w)z&rwkv.blocks.{bid}.attention.time_faaaa)z&rwkv.blocks.{bid}.attention.time_decayz'model.layers.{bid}.self_attn.time_decay)z)rwkv.blocks.{bid}.attention.time_decay_w1z*model.layers.{bid}.self_attn.time_decay_w1)z)rwkv.blocks.{bid}.attention.time_decay_w2z*model.layers.{bid}.self_attn.time_decay_w2)zrwkv.blocks.{bid}.attention.keyr-   z model.layers.{bid}.attention.keyz#model.layers.{bid}.attention.k_proj)z!rwkv.blocks.{bid}.attention.valuer/   z"model.layers.{bid}.attention.valuez#model.layers.{bid}.attention.v_proj)z&rwkv.blocks.{bid}.attention.receptancer+   z'model.layers.{bid}.attention.receptancez#model.layers.{bid}.attention.r_proj)z rwkv.blocks.{bid}.attention.gatez!model.layers.{bid}.self_attn.gate)z rwkv.blocks.{bid}.attention.ln_xz!model.layers.{bid}.attention.ln_x)z"rwkv.blocks.{bid}.attention.outputr1   z#model.layers.{bid}.attention.outputz#model.layers.{bid}.attention.o_proj)z)rwkv.blocks.{bid}.feed_forward.time_maa_kz#model.layers.{bid}.feed_forward.x_k)z)rwkv.blocks.{bid}.feed_forward.time_maa_r)z"rwkv.blocks.{bid}.feed_forward.keyz#model.layers.{bid}.feed_forward.key)z)rwkv.blocks.{bid}.feed_forward.receptance)z$rwkv.blocks.{bid}.feed_forward.valuez%model.layers.{bid}.feed_forward.value)z%model.layers.{bid}.self_attn.q_a_projzlayers.{bid}.attention.wq_a)z%model.layers.{bid}.self_attn.q_b_projzlayers.{bid}.attention.wq_b)z/model.layers.{bid}.self_attn.kv_a_proj_with_mqaz%layers.{bid}.attention.wkv_a_with_mqa)z&model.layers.{bid}.self_attn.kv_b_proj)z%model.layers.{bid}.self_attn.k_b_projzlayers.{bid}.attention.k_b_proj)z%model.layers.{bid}.self_attn.v_b_projzlayers.{bid}.attention.v_b_proj)z*model.layers.{bid}.self_attn.q_a_layernormzlayers.{bid}.attention.q_a_norm)z+model.layers.{bid}.self_attn.kv_a_layernormz layers.{bid}.attention.kv_a_norm)z*model.layers.{bid}.self_attn.inner_attn_ln)z$model.layers.{bid}.mlp.ffn_layernorm)z&decoder.block.{bid}.layer.0.layer_norm)z+decoder.block.{bid}.layer.0.SelfAttention.q)z+decoder.block.{bid}.layer.0.SelfAttention.k)z+decoder.block.{bid}.layer.0.SelfAttention.v)z+decoder.block.{bid}.layer.0.SelfAttention.o)zAdecoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias)z&decoder.block.{bid}.layer.1.layer_norm)z-decoder.block.{bid}.layer.1.EncDecAttention.q)z-decoder.block.{bid}.layer.1.EncDecAttention.k)z-decoder.block.{bid}.layer.1.EncDecAttention.v)z-decoder.block.{bid}.layer.1.EncDecAttention.o)zCdecoder.block.{bid}.layer.1.EncDecAttention.relative_attention_bias)z&decoder.block.{bid}.layer.2.layer_norm)z/decoder.block.{bid}.layer.2.DenseReluDense.wi_0)z-decoder.block.{bid}.layer.2.DenseReluDense.wiz/decoder.block.{bid}.layer.2.DenseReluDense.wi_1)z-decoder.block.{bid}.layer.2.DenseReluDense.wo)zdecoder.final_layer_norm)z&encoder.block.{bid}.layer.0.layer_norm)z+encoder.block.{bid}.layer.0.SelfAttention.q)z+encoder.block.{bid}.layer.0.SelfAttention.k)z+encoder.block.{bid}.layer.0.SelfAttention.v)z+encoder.block.{bid}.layer.0.SelfAttention.o)zAencoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias)z&encoder.block.{bid}.layer.1.layer_norm)z/encoder.block.{bid}.layer.1.DenseReluDense.wi_0)z-encoder.block.{bid}.layer.1.DenseReluDense.wiz/encoder.block.{bid}.layer.1.DenseReluDense.wi_1)z-encoder.block.{bid}.layer.1.DenseReluDense.wo)z)model.layers.{bid}.mlp.vision_mlp.up_proj)z+model.layers.{bid}.mlp.vision_mlp.gate_proj)z+model.layers.{bid}.mlp.vision_mlp.down_proj)z0model.layers.{bid}.self_attn.vision_expert_dense)z:model.layers.{bid}.self_attn.vision_expert_query_key_value)z+model.layers.{bid}.self_attn.indexer.k_norm)z1model.layers.{bid}.self_attn.indexer.weights_proj)z'model.layers.{bid}.self_attn.indexer.wk)z)model.layers.{bid}.self_attn.indexer.wq_b)zencoder.final_layer_norm
layer_norm)
classifierzclassifier.densepre_classifierdensez
head.dense)zclassifier.out_proj)z	head.norm)zbackbone.convnext.{bid}.dwconv)zbackbone.convnext.{bid}.norm)zbackbone.convnext.{bid}.pwconv1)zbackbone.convnext.{bid}.pwconv2)zbackbone.convnext.{bid}.gamma)zbackbone.posnet.{bid}.conv1)zbackbone.posnet.{bid}.conv2)zbackbone.posnet.{bid}.norm)zbackbone.posnet.{bid}.norm1)zbackbone.posnet.{bid}.norm2)zbackbone.posnet.{bid}.q)zbackbone.posnet.{bid}.k)zbackbone.posnet.{bid}.v)zbackbone.posnet.{bid}.proj_out)zmodel.layers.{bid}.conv.conv)zmodel.layers.{bid}.conv.in_proj)z model.layers.{bid}.conv.out_proj)z"multi_modal_projector.linear_{bid}zmm_projector.proj.linear_{bid}zvisual.merger.mlp.{bid}zmlp_AR.linear_{bid}zmerger.mlp.{bid}zvision_tower.merger.mlp.{bid}zvit.perceive.proj.{bid})z(model.connector.modality_projection.projz$model.vision.linear_proj.linear_projzmodel.projector.layerszvisual.merger.projzvit.perceive.mlp)z model.mm_projector.mlp.mlp.{bid}z'vision_model.vision_adapter.mlp.fc{bid}z
mlp1.{bid}z%model.aligner.fc1.hidden_layers.{bid})z model.mm_projector.peg.peg.{bid})z4vision_tower.vision_model.embeddings.class_embeddingz'model.vision_tower.embeddings.cls_tokenzvision_model.class_embeddingz*model.vision.patch_embedding.cls_embeddingz>vision_model.radio_model.model.patch_generator.cls_token.tokenz-model.vision_model.embeddings.class_embedding)z4vision_tower.vision_model.embeddings.patch_embeddingz9model.vision_tower.embeddings.patch_embeddings.projectionzvpm.embeddings.patch_embedding-model.vision_model.embeddings.patch_embeddingzvit.embeddings.patch_embeddingzvision_tower.patch_convzvision_encoder.patch_convz#vision_model.patch_embedding.linearzvisual.patch_embed.projzvision_tower.patch_embed.projz!model.vision.patch_embedding.projrB   z/siglip2.vision_model.embeddings.patch_embeddingz7vision_model.radio_model.model.patch_generator.embedderz,model.vision_tower.patch_embedder.input_projz(vision_tower.patch_embed.patchifier.projzvision_model.conv1)zvisual.post_conv_layernorm(vision_tower.patch_embed.patchifier.norm)z7vision_tower.vision_model.embeddings.position_embeddingz1model.vision_tower.embeddings.position_embeddingsz!vpm.embeddings.position_embeddingz0model.vision_model.embeddings.position_embeddingz!vit.embeddings.position_embeddingz%vision_model.positional_embedding_vlmz vision_tower.patch_embed.pos_embzvisual.pos_embedz/model.vision.patch_embedding.position_embeddingz$visual.embeddings.position_embeddingz8vision_model.radio_model.model.patch_generator.pos_embedz:model.vision_tower.patch_embedder.position_embedding_tablez!vision_model.positional_embedding)zmodel.image_newlinezvit.perceive.image_newline)zmodel.view_seperatorzvit.perceive.image_sep)zvisual.blocks.{bid}.attn.qkvz"vision_tower.blocks.{bid}.attn.qkvz?model.vision.transformer.layers.{bid}.attention.query_key_valuez>model.vision_model.transformer.layers.{bid}.self_attn.qkv_projz&vision_tower.encoder.blocks.{bid}.wqkvz4vision_model.radio_model.model.blocks.{bid}.attn.qkvz5vision_model.transformer.resblocks.{bid}.attn.in_proj)z?vision_tower.vision_model.encoder.layers.{bid}.self_attn.q_projz7model.vision_tower.encoder.layer.{bid}.attention.q_projz)vpm.encoder.layers.{bid}.self_attn.q_projz8model.vision_model.encoder.layers.{bid}.self_attn.q_projz!vit.layers.{bid}.self_attn.q_projz0vision_model.model.layers.{bid}.self_attn.q_projz6vision_tower.transformer.layers.{bid}.attention.q_projz4vision_encoder.transformer.layers.{bid}.attention.wqzvisual.blocks.{bid}.attn.qz$vision_tower.encoder.blocks.{bid}.wqz:siglip2.vision_model.encoder.layers.{bid}.self_attn.q_projz<model.vision_model.transformer.layers.{bid}.self_attn.q_projz7vision_model.model.layers.{bid}.self_attn.q_proj.linear)z:vision_tower.vision_model.encoder.layers.{bid}.attn.q_normz7model.vision_tower.encoder.layer.{bid}.attention.q_normzvisual.blocks.{bid}.attn.q_normz0vision_model.model.layers.{bid}.self_attn.q_norm)z?vision_tower.vision_model.encoder.layers.{bid}.self_attn.k_projz7model.vision_tower.encoder.layer.{bid}.attention.k_projz)vpm.encoder.layers.{bid}.self_attn.k_projz8model.vision_model.encoder.layers.{bid}.self_attn.k_projz!vit.layers.{bid}.self_attn.k_projz0vision_model.model.layers.{bid}.self_attn.k_projz6vision_tower.transformer.layers.{bid}.attention.k_projz4vision_encoder.transformer.layers.{bid}.attention.wkzvisual.blocks.{bid}.attn.kz$vision_tower.encoder.blocks.{bid}.wkz<model.vision_model.transformer.layers.{bid}.self_attn.k_projz:siglip2.vision_model.encoder.layers.{bid}.self_attn.k_projz7vision_model.model.layers.{bid}.self_attn.k_proj.linear)z:vision_tower.vision_model.encoder.layers.{bid}.attn.k_normz7model.vision_tower.encoder.layer.{bid}.attention.k_normzvisual.blocks.{bid}.attn.k_normz0vision_model.model.layers.{bid}.self_attn.k_norm)z?vision_tower.vision_model.encoder.layers.{bid}.self_attn.v_projz7model.vision_tower.encoder.layer.{bid}.attention.v_projz)vpm.encoder.layers.{bid}.self_attn.v_projz8model.vision_model.encoder.layers.{bid}.self_attn.v_projz!vit.layers.{bid}.self_attn.v_projz0vision_model.model.layers.{bid}.self_attn.v_projz6vision_tower.transformer.layers.{bid}.attention.v_projz4vision_encoder.transformer.layers.{bid}.attention.wvzvisual.blocks.{bid}.attn.vz$vision_tower.encoder.blocks.{bid}.wvz:siglip2.vision_model.encoder.layers.{bid}.self_attn.v_projz<model.vision_model.transformer.layers.{bid}.self_attn.v_projz7vision_model.model.layers.{bid}.self_attn.v_proj.linear)z:vision_tower.vision_model.encoder.layers.{bid}.layer_norm1z4vision_tower.vision_model.encoder.layers.{bid}.norm1z7model.vision_tower.encoder.layer.{bid}.layernorm_beforez$vpm.encoder.layers.{bid}.layer_norm1z3model.vision_model.encoder.layers.{bid}.layer_norm1z vit.layers.{bid}.input_layernormz4vision_tower.transformer.layers.{bid}.attention_normz6vision_encoder.transformer.layers.{bid}.attention_normz/vision_model.model.layers.{bid}.input_layernormzvisual.blocks.{bid}.norm1z'vision_tower.encoder.blocks.{bid}.norm0z5model.vision.transformer.layers.{bid}.input_layernormz7model.vision_model.transformer.layers.{bid}.layer_norm1z5siglip2.vision_model.encoder.layers.{bid}.layer_norm1z1vision_model.radio_model.model.blocks.{bid}.norm1zvision_tower.blocks.{bid}.norm1z-vision_model.transformer.resblocks.{bid}.ln_1)zAvision_tower.vision_model.encoder.layers.{bid}.self_attn.out_projz8vision_tower.vision_model.encoder.layers.{bid}.attn.projzAmodel.vision_tower.encoder.layer.{bid}.attention.projection_layerz+vpm.encoder.layers.{bid}.self_attn.out_projz:model.vision_model.encoder.layers.{bid}.self_attn.out_projz!vit.layers.{bid}.self_attn.o_projzBmodel.vision_model.encoder.layers.{bid}.self_attn.projection_layerz0vision_model.model.layers.{bid}.self_attn.o_projz6vision_tower.transformer.layers.{bid}.attention.o_projz4vision_encoder.transformer.layers.{bid}.attention.wozvisual.blocks.{bid}.attn.projz$vision_tower.encoder.blocks.{bid}.woz5model.vision.transformer.layers.{bid}.attention.densez>model.vision_model.transformer.layers.{bid}.self_attn.out_projz<siglip2.vision_model.encoder.layers.{bid}.self_attn.out_projz5vision_model.radio_model.model.blocks.{bid}.attn.projz7vision_model.model.layers.{bid}.self_attn.o_proj.linearz#vision_tower.blocks.{bid}.attn.projz6vision_model.transformer.resblocks.{bid}.attn.out_proj)z:vision_tower.vision_model.encoder.layers.{bid}.layer_norm2z4vision_tower.vision_model.encoder.layers.{bid}.norm2z6model.vision_tower.encoder.layer.{bid}.layernorm_afterz$vpm.encoder.layers.{bid}.layer_norm2z3model.vision_model.encoder.layers.{bid}.layer_norm2z)vit.layers.{bid}.post_attention_layernorm8vision_model.model.layers.{bid}.post_attention_layernormz.vision_tower.transformer.layers.{bid}.ffn_normz0vision_encoder.transformer.layers.{bid}.ffn_normzvisual.blocks.{bid}.norm2z'vision_tower.encoder.blocks.{bid}.norm1z>model.vision.transformer.layers.{bid}.post_attention_layernormz7model.vision_model.transformer.layers.{bid}.layer_norm2z5siglip2.vision_model.encoder.layers.{bid}.layer_norm2z1vision_model.radio_model.model.blocks.{bid}.norm2z9vision_model.model.layers.{bid}.pre_feedforward_layernormzvision_tower.blocks.{bid}.norm2z-vision_model.transformer.resblocks.{bid}.ln_2)z6vision_tower.vision_model.encoder.layers.{bid}.mlp.fc1z.model.vision_tower.encoder.layer.{bid}.mlp.fc1z vpm.encoder.layers.{bid}.mlp.fc1z/model.vision_model.encoder.layers.{bid}.mlp.fc1z"vit.layers.{bid}.mlp.dense_h_to_4hz:vision_tower.transformer.layers.{bid}.feed_forward.up_projz7vision_encoder.transformer.layers.{bid}.feed_forward.w3z'vision_model.model.layers.{bid}.mlp.fc1zvisual.blocks.{bid}.mlp.fc1zvisual.blocks.{bid}.mlp.up_projz"visual.blocks.{bid}.mlp.linear_fc1z)vision_tower.encoder.blocks.{bid}.mlp.fc0z3model.vision_model.transformer.layers.{bid}.mlp.fc1z-model.vision.transformer.layers.{bid}.mlp.fc1z1siglip2.vision_model.encoder.layers.{bid}.mlp.fc1z3vision_model.radio_model.model.blocks.{bid}.mlp.fc1z+vision_model.model.layers.{bid}.mlp.up_projz1vision_model.transformer.resblocks.{bid}.mlp.c_fc)z<vision_tower.transformer.layers.{bid}.feed_forward.gate_projz7vision_encoder.transformer.layers.{bid}.feed_forward.w1z!visual.blocks.{bid}.mlp.gate_projz-vision_model.model.layers.{bid}.mlp.gate_proj)z6vision_tower.vision_model.encoder.layers.{bid}.mlp.fc2z.model.vision_tower.encoder.layer.{bid}.mlp.fc2z vpm.encoder.layers.{bid}.mlp.fc2z/model.vision_model.encoder.layers.{bid}.mlp.fc2z"vit.layers.{bid}.mlp.dense_4h_to_hz<vision_tower.transformer.layers.{bid}.feed_forward.down_projz7vision_encoder.transformer.layers.{bid}.feed_forward.w2z'vision_model.model.layers.{bid}.mlp.fc2zvisual.blocks.{bid}.mlp.fc2z!visual.blocks.{bid}.mlp.down_projz"visual.blocks.{bid}.mlp.linear_fc2z)vision_tower.encoder.blocks.{bid}.mlp.fc1z-model.vision.transformer.layers.{bid}.mlp.fc2z3model.vision_model.transformer.layers.{bid}.mlp.fc2z1siglip2.vision_model.encoder.layers.{bid}.mlp.fc2z3vision_model.radio_model.model.blocks.{bid}.mlp.fc2z-vision_model.model.layers.{bid}.mlp.down_projz3vision_model.transformer.resblocks.{bid}.mlp.c_proj)rD   )z:vision_model.model.layers.{bid}.post_feedforward_layernorm)z2vision_tower.vision_model.encoder.layers.{bid}.ls1z/model.vision_tower.encoder.layer.{bid}.lambda_1z-vision_model.transformer.resblocks.{bid}.ls_1)z2vision_tower.vision_model.encoder.layers.{bid}.ls2z/model.vision_tower.encoder.layer.{bid}.lambda_2z-vision_model.transformer.resblocks.{bid}.ls_2)z,vision_model.model.layers.{bid}.layer_scalar)z&vision_tower.vision_model.pre_layrnormzvision_tower.ln_prezvision_encoder.ln_prezvision_model.layernorm_prezmodel.vision_model.pre_layrnormrC   zvision_model.ln_pre)z(vision_tower.vision_model.post_layernormz!model.vision_model.post_layernormzvision_model.layernorm_postzvisual.merger.ln_qz$vision_tower.encoder.final_layernormzvisual.post_layernormz#siglip2.vision_model.post_layernorm)z"visual.merger.post_projection_normzvision_tower.post_trunk_normzvit.perceive.after_rms)z)multi_modal_projector.mm_input_projection)	zmulti_modal_projector.normz multi_modal_projector.layer_normzmulti_modal_projector.pre_normzmm_projector.pre_normpre_mm_projector_normmodel.vision.linear_proj.norm1zmlp_AR.pre_normzmerger.ln_qzvision_tower.merger.ln_q)z&multi_modal_projector.mm_soft_emb_norm)zresampler.pos_embed_k)zresampler.attn.in_proj_q)zresampler.attn.in_proj_k)zresampler.attn.in_proj_v)zresampler.attn.out_projz*model.vision_model.head.attention.out_proj)zresampler.kv_proj)zresampler.ln_post)zresampler.ln_kv)zresampler.ln_q)zresampler.proj)zresampler.query)zv.token_embd.img_break)z0multi_modal_projector.patch_merger.merging_layerzpatch_merger.merging_layerzvisual.downsample)z-model.visual.deepstack_merger_list.{bid}.norm)z3model.visual.deepstack_merger_list.{bid}.linear_fc1)z3model.visual.deepstack_merger_list.{bid}.linear_fc2)zmodel.sam_model.pos_embed)z model.sam_model.patch_embed.proj)z"model.sam_model.blocks.{bid}.norm1)z"model.sam_model.blocks.{bid}.norm2)z+model.sam_model.blocks.{bid}.attn.rel_pos_h)z+model.sam_model.blocks.{bid}.attn.rel_pos_w)z%model.sam_model.blocks.{bid}.attn.qkv)z&model.sam_model.blocks.{bid}.attn.proj)z%model.sam_model.blocks.{bid}.mlp.lin1)z%model.sam_model.blocks.{bid}.mlp.lin2)zmodel.sam_model.neck.{bid})zmodel.sam_model.net_2)zmodel.sam_model.net_3)rF   )z&model.vision.linear_proj.dense_h_to_4hzvisual.merger.up_proj)z&model.vision.linear_proj.dense_4h_to_hzvisual.merger.down_proj)z"model.vision.linear_proj.gate_projzvisual.merger.gate_proj)zmodel.vision.boi)zmodel.vision.eoi)zvit.perceive.before_rms)zvit.perceive.image_begin)zvit.perceive.image_end)zmodel.vision_tower.std_bias)zmodel.vision_tower.std_scale)zaudio_tower.embed_positionszaudio_embedding.embedding)zaudio_embedding.embedding_norm)zaudio_embedding.to_logits)zaudio_tower.conv{bid}zconformer.pre_encode.conv.{bid}z;model.audio_tower.subsample_conv_projection.conv_{bid}.convz3conformer.subsample_conv_projection.layer{bid}.conv)z3conformer.subsample_conv_projection.layer{bid}.norm)z5conformer.subsample_conv_projection.input_proj_linearzencoder.input_linear)zaudio_tower.conv2d{bid})zaudio_tower.conv_out)zaudio_tower.layer_normzaudio_tower.ln_post)z)audio_tower.layers.{bid}.self_attn.q_projz)conformer.layers.{bid}.self_attn.linear_qz,conformer.layers.{bid}.attention.attn.q_projz'conformer.layers.{bid}.self_attn.q_projzencoder.layers.{bid}.attn.to_q)z)audio_tower.layers.{bid}.self_attn.k_projz)conformer.layers.{bid}.self_attn.linear_kz,conformer.layers.{bid}.attention.attn.k_projz'conformer.layers.{bid}.self_attn.k_projzencoder.layers.{bid}.attn.to_k)z)audio_tower.layers.{bid}.self_attn.v_projz)conformer.layers.{bid}.self_attn.linear_vz,conformer.layers.{bid}.attention.attn.v_projz'conformer.layers.{bid}.self_attn.v_projzencoder.layers.{bid}.attn.to_v)z0conformer.layers.{bid}.self_attn.relative_k_proj)z%conformer.layers.{bid}.norm_post_attn)z$conformer.layers.{bid}.norm_pre_attn)3conformer.layers.{bid}.attention.attn.per_dim_scalez.conformer.layers.{bid}.self_attn.per_dim_scale)zconformer.layers.{bid}.norm)z-audio_tower.layers.{bid}.self_attn_layer_normz$conformer.layers.{bid}.norm_self_attz.conformer.layers.{bid}.attention.pre_attn_normz"encoder.layers.{bid}.attn.pre_norm)z+audio_tower.layers.{bid}.self_attn.out_projz+conformer.layers.{bid}.self_attn.linear_outz%conformer.layers.{bid}.attention.postz%conformer.layers.{bid}.self_attn.postz encoder.layers.{bid}.attn.to_out)z)audio_tower.layers.{bid}.final_layer_normzconformer.layers.{bid}.norm_outz*conformer.layers.{bid}.attention.post_normzencoder.layers.{bid}.post_norm)z)conformer.layers.{bid}.norm_feed_forward1z5conformer.layers.{bid}.ffw_layer_start.pre_layer_normz3conformer.layers.{bid}.feed_forward1.pre_layer_normz!encoder.layers.{bid}.ff1.pre_norm)z6conformer.layers.{bid}.ffw_layer_start.post_layer_normz4conformer.layers.{bid}.feed_forward1.post_layer_norm)z7conformer.layers.{bid}.ffw_layer_start.post_layer_scale)zaudio_tower.layers.{bid}.fc1z,conformer.layers.{bid}.feed_forward1.linear1z2conformer.layers.{bid}.ffw_layer_start.ffw_layer_1z0conformer.layers.{bid}.feed_forward1.ffw_layer_1z encoder.layers.{bid}.ff1.up_proj)zaudio_tower.layers.{bid}.fc2z,conformer.layers.{bid}.feed_forward1.linear2z2conformer.layers.{bid}.ffw_layer_start.ffw_layer_2z0conformer.layers.{bid}.feed_forward1.ffw_layer_2z"encoder.layers.{bid}.ff1.down_proj)z,conformer.layers.{bid}.feed_forward2.linear1z0conformer.layers.{bid}.ffw_layer_end.ffw_layer_1z0conformer.layers.{bid}.feed_forward2.ffw_layer_1z encoder.layers.{bid}.ff2.up_proj)z,conformer.layers.{bid}.feed_forward2.linear2z0conformer.layers.{bid}.ffw_layer_end.ffw_layer_2z0conformer.layers.{bid}.feed_forward2.ffw_layer_2z"encoder.layers.{bid}.ff2.down_proj)z)conformer.layers.{bid}.norm_feed_forward2z3conformer.layers.{bid}.ffw_layer_end.pre_layer_normz3conformer.layers.{bid}.feed_forward2.pre_layer_normz!encoder.layers.{bid}.ff2.pre_norm)z4conformer.layers.{bid}.ffw_layer_end.post_layer_normz4conformer.layers.{bid}.feed_forward2.post_layer_norm)z5conformer.layers.{bid}.ffw_layer_end.post_layer_scale)z+conformer.layers.{bid}.self_attn.linear_poszJconformer.layers.{bid}.attention.attn.relative_position_embedding.pos_proj)z+conformer.layers.{bid}.self_attn.pos_bias_u)z+conformer.layers.{bid}.self_attn.pos_bias_v)zconformer.pre_encode.outz=model.audio_tower.subsample_conv_projection.input_proj_linearzconformer.output_proj)z(audio.multi_modal_projector.linear_{bid}zaudio_adapter.model.{bid}zaudio_tower.proj{bid})z"audio.multi_modal_projector.linearzaudio_tower.projzmodel.audio_tower.output_proj)z"audio.multi_modal_projector.ln_pre)z"audio.multi_modal_projector.ln_mid)z*conformer.layers.{bid}.conv.depthwise_convz/conformer.layers.{bid}.lconv1d.depthwise_conv1dz)encoder.layers.{bid}.conv.depth_conv.conv)z&conformer.layers.{bid}.conv.batch_normz-conformer.layers.{bid}.lconv1d.pre_layer_normz$encoder.layers.{bid}.conv.batch_norm)z+conformer.layers.{bid}.conv.pointwise_conv1z+conformer.layers.{bid}.lconv1d.linear_startz!encoder.layers.{bid}.conv.up_conv)z+conformer.layers.{bid}.conv.pointwise_conv2z)conformer.layers.{bid}.lconv1d.linear_endz#encoder.layers.{bid}.conv.down_conv)z conformer.layers.{bid}.norm_convz(conformer.layers.{bid}.lconv1d.conv_normzencoder.layers.{bid}.conv.norm)z7conformer.layers.{bid}.attention.attn.per_dim_key_scale)rG   )zmodel.embed_audio.embedding)z%model.embed_audio.hard_embedding_norm)z&model.embed_audio.embedding_projection)z%model.embed_audio.soft_embedding_norm)z,encoder.layers.{bid}.attn.rel_pos_emb.weight)z?projector.qformer.encoder.layer.{bid}.attention.attention.query)z=projector.qformer.encoder.layer.{bid}.attention.attention.key)z?projector.qformer.encoder.layer.{bid}.attention.attention.value)z<projector.qformer.encoder.layer.{bid}.attention.output.dense)z@projector.qformer.encoder.layer.{bid}.attention.output.LayerNorm)zDprojector.qformer.encoder.layer.{bid}.crossattention.attention.query)zBprojector.qformer.encoder.layer.{bid}.crossattention.attention.key)zDprojector.qformer.encoder.layer.{bid}.crossattention.attention.value)zAprojector.qformer.encoder.layer.{bid}.crossattention.output.dense)zEprojector.qformer.encoder.layer.{bid}.crossattention.output.LayerNorm)z>projector.qformer.encoder.layer.{bid}.intermediate_query.dense)z8projector.qformer.encoder.layer.{bid}.output_query.dense)z<projector.qformer.encoder.layer.{bid}.output_query.LayerNorm)zmodel.layers.{bid}.eh_proj)zmodel.layers.{bid}.embed_tokens)zmodel.layers.{bid}.enorm)zmodel.layers.{bid}.hnorm)z#model.layers.{bid}.shared_head.head)z#model.layers.{bid}.shared_head.normblock_mappings_cfg)z%model.layers.{bid}.residual_layernorm)r*   z5dict[MODEL_ARCH, dict[MODEL_TENSOR, tuple[str, ...]]]arch_block_mappings_cfgz#dict[str, tuple[MODEL_TENSOR, str]]mappingc                l   0 U l         U R                  R                  5        HF  u  p4U[        U   ;  a  M  [        U   nX54U R                   U'   U H  nX54U R                   U'   M     MH     XR
                  ;   a(  U R                  R                  U R
                  U   5        [        U5       H  nU R                  R                  5        Hb  u  p4U[        U   ;  a  M  [        U   R                  US9nX54U R                   U'   U H"  nUR                  US9nX54U R                   U'   M$     Md     M     g )N)bid)
rJ   r%   itemsr   r	   rI   rH   updaterangeformat)selfarchn_blockstensorkeystensor_namekeyrL   s           M/home/wildlama/miniconda3/lib/python3.13/site-packages/gguf/tensor_mapping.py__init__TensorNameMap.__init__  s%    --335LF]400&v.K)/(=DLL%%+$9S!  6 ///##**4+G+G+MN?C $ 7 7 = = ?t!44*62999D-3,A[)C**3*/C)/(=DLL%   !@ #    c                    U R                   R                  U5      nUb  U$ U HU  nUR                  U5      (       d  M  U R                   R                  US [        U5      *  5      nUc  MH  US   US   U-   4s  $    g )Nr   r   )rJ   getendswithlen)rQ   rW   try_suffixesresultsuffixs        rX   get_type_and_nameTensorNameMap.get_type_and_name  s~    !!#&M"F||F##))#mF|*<=%!!9fQi&&888	 #
 r[   c                2    U R                  XS9nUc  g US   $ )Nr`   r   rc   rQ   rW   r`   ra   s       rX   get_nameTensorNameMap.get_name  '    '''I>ayr[   c                2    U R                  XS9nUc  g US   $ )Nrf   r   rg   rh   s       rX   get_typeTensorNameMap.get_type  rk   r[   c                Z     U R                   U   S   $ ! [         a    [        U5      ef = f)Nr   )rJ   KeyErrorrQ   rW   s     rX   __getitem__TensorNameMap.__getitem__  s4    	 <<$Q'' 	 3-	 s    *c                    XR                   ;   $ NrJ   rq   s     rX   __contains__TensorNameMap.__contains__  s    ll""r[   c                ,    [        U R                  5      $ ru   )reprrJ   )rQ   s    rX   __repr__TensorNameMap.__repr__  s    DLL!!r[   rv   N)rR   r   rS   int)r$   )rW   strr`   Sequence[str]returnztuple[MODEL_TENSOR, str] | None)rW   r~   r`   r   r   z
str | None)rW   r~   r`   r   r   zMODEL_TENSOR | None)rW   r~   r   r~   )rW   r~   r   bool)r   r~   (s  __name__
__module____qualname____firstlineno__r   
TOKEN_EMBDTOKEN_TYPESTOKEN_EMBD_NORMPOS_EMBDOUTPUTDENSE_2_OUTDENSE_3_OUTOUTPUT_NORM
ROPE_FREQSROPE_FACTORS_LONGROPE_FACTORS_SHORTCONV1DV_MM_EMBEDDINGV_MM_HARD_EMB_NORMV_MM_INP_PROJV_MM_SOFT_EMB_NORMV_ENC_CONV_STEMV_ENC_CONV_STEM_NORMV_ENC_MSFA_EXPV_ENC_MSFA_EXP_NORMV_ENC_MSFA_PROJV_ENC_MSFA_PROJ_NORMV_ENC_MSFA_NORM	A_CTC_OUTA_CTC_OUT_MIDA_QF_PROJ_QUERYA_QF_PROJ_NORMA_QF_PROJ_LINEARr%   __annotations__	ATTN_NORMATTN_NORM_2ATTN_QKVATTN_QATTN_KATTN_VATTN_OUTATTN_OUT_NORMATTN_POST_NORMATTN_ROT_EMBD
ATTN_SINKS	ATTN_GATEFFN_NORMFFN_PRE_NORMFFN_PRE_NORM_2FFN_POST_NORMFFN_POST_NORM_1FFN_POST_NORM_2FFN_GATE_INPFFN_GATE_INP_SHEXPFFN_EXP_PROBS_BFFN_UP
FFN_UP_EXPFFN_UP_SHEXPFFN_UP_CHEXPFFN_ACTFFN_GATEFFN_GATE_EXPFFN_GATE_SHEXPFFN_GATE_CHEXPFFN_GATE_UP_EXPMOE_LATENT_DOWNMOE_LATENT_UPFFN_DOWNFFN_DOWN_EXPFFN_DOWN_SHEXPFFN_DOWN_CHEXPATTN_Q_NORMATTN_K_NORMLAYER_OUT_NORMLAYER_OUT_SCALEPER_LAYER_TOKEN_EMBDPER_LAYER_MODEL_PROJPER_LAYER_PROJ_NORM
ALTUP_PROJALTUP_UNEMBD_PROJPER_LAYER_INP_GATEPER_LAYER_PROJPER_LAYER_POST_NORMALTUP_CORRECT_COEFALTUP_CORRECT_SCALEALTUP_PREDICT_COEFALTUP_ROUTERALTUP_ROUTER_NORMLAUREL_LLAUREL_RLAUREL_POST_NORMSSM_IN
SSM_CONV1DSSM_XSSM_DTSSM_DT_NORMSSM_A
SSM_B_NORM
SSM_C_NORMSSM_DSSM_NORMSSM_OUT	SSM_ALPHASSM_BETA_ALPHASSM_CONV1D_QSSM_CONV1D_KSSM_CONV1D_VSSM_F_ASSM_F_BSSM_BETASSM_G_ASSM_G_BTIME_MIX_W0TIME_MIX_W1TIME_MIX_W2TIME_MIX_A0TIME_MIX_A1TIME_MIX_A2TIME_MIX_V0TIME_MIX_V1TIME_MIX_V2TIME_MIX_G1TIME_MIX_G2TIME_MIX_K_KTIME_MIX_K_ATIME_MIX_R_KTIME_MIX_LERP_XTIME_MIX_LERP_KTIME_MIX_LERP_VTIME_MIX_LERP_RTIME_MIX_LERP_GTIME_MIX_LERP_WTIME_MIX_FIRSTTIME_MIX_DECAYTIME_MIX_DECAY_W1TIME_MIX_DECAY_W2TIME_MIX_KEYTIME_MIX_VALUETIME_MIX_RECEPTANCETIME_MIX_GATETIME_MIX_LNTIME_MIX_OUTPUTCHANNEL_MIX_LERP_KCHANNEL_MIX_LERP_RCHANNEL_MIX_KEYCHANNEL_MIX_RECEPTANCECHANNEL_MIX_VALUEATTN_Q_AATTN_Q_BATTN_KV_A_MQA	ATTN_KV_BATTN_K_BATTN_V_BATTN_Q_A_NORMATTN_KV_A_NORMATTN_SUB_NORMFFN_SUB_NORMDEC_ATTN_NORM
DEC_ATTN_Q
DEC_ATTN_K
DEC_ATTN_VDEC_ATTN_OUTDEC_ATTN_REL_BDEC_CROSS_ATTN_NORMDEC_CROSS_ATTN_QDEC_CROSS_ATTN_KDEC_CROSS_ATTN_VDEC_CROSS_ATTN_OUTDEC_CROSS_ATTN_REL_BDEC_FFN_NORMDEC_FFN_GATE
DEC_FFN_UPDEC_FFN_DOWNDEC_OUTPUT_NORMENC_ATTN_NORM
ENC_ATTN_Q
ENC_ATTN_K
ENC_ATTN_VENC_ATTN_OUTENC_ATTN_REL_BENC_FFN_NORMENC_FFN_GATE
ENC_FFN_UPENC_FFN_DOWN	VISEXP_UPVISEXP_GATEVISEXP_DOWNVISEXP_ATTN_OUTVISEXP_ATTN_QKVINDEXER_K_NORMINDEXER_PROJINDEXER_ATTN_KINDEXER_ATTN_Q_BENC_OUTPUT_NORMCLSCLS_OUTCLS_NORMCONVNEXT_DWCONVNEXT_NORMCONVNEXT_PW1CONVNEXT_PW2CONVNEXT_GAMMAPOSNET_CONV1POSNET_CONV2POSNET_NORMPOSNET_NORM1POSNET_NORM2POSNET_ATTN_NORMPOSNET_ATTN_QPOSNET_ATTN_KPOSNET_ATTN_VPOSNET_ATTN_OUTSHORTCONV_CONVSHORTCONV_INPROJSHORTCONV_OUTPROJV_MMPROJV_MMPROJ_FCV_MMPROJ_MLPV_MMPROJ_PEGV_ENC_EMBD_CLSV_ENC_EMBD_PATCHV_ENC_EMBD_NORMV_ENC_EMBD_POSV_ENC_EMBD_IMGNLV_ENC_EMBD_VSEPV_ENC_ATTN_QKVV_ENC_ATTN_QV_ENC_ATTN_Q_NORMV_ENC_ATTN_KV_ENC_ATTN_K_NORMV_ENC_ATTN_VV_ENC_INPUT_NORMV_ENC_ATTN_OV_ENC_POST_ATTN_NORMV_ENC_FFN_UPV_ENC_FFN_GATEV_ENC_FFN_DOWNV_ENC_ATTN_POST_NORMV_ENC_FFN_POST_NORMV_LAYER_SCALE_1V_LAYER_SCALE_2V_LAYER_OUT_SCALE
V_PRE_NORMV_POST_NORMV_MM_POST_NORMV_MM_INP_NORMV_RESMPL_POS_EMBD_KV_RESMPL_ATTN_QV_RESMPL_ATTN_KV_RESMPL_ATTN_VV_RESMPL_ATTN_OUTV_RESMPL_KVV_RESMPL_POST_NORMV_RESMPL_KV_NORMV_RESMPL_Q_NORMV_RESMPL_PROJV_RESMPL_QUERYV_TOK_EMBD_IMG_BREAKV_MM_PATCH_MERGER	V_DS_NORMV_DS_FC1V_DS_FC2V_SAM_POS_EMBDV_SAM_PATCH_EMBDV_SAM_PRE_NORMV_SAM_POST_NORMV_SAM_ATTN_POS_HV_SAM_ATTN_POS_WV_SAM_ATTN_QKVV_SAM_ATTN_OUTV_SAM_MLP_LIN_1V_SAM_MLP_LIN_2
V_SAM_NECKV_SAM_NET_2V_SAM_NET_3V_MM_POST_FC_NORMV_MM_UP	V_MM_DOWN	V_MM_GATE	V_TOK_BOI	V_TOK_EOIV_MM_PRE_NORMV_TOK_IMG_BEGINV_TOK_IMG_END
V_STD_BIASV_STD_SCALEA_ENC_EMBD_POSA_ENC_EMBD_NORMA_ENC_EMBD_TO_LOGITSA_ENC_CONV1DA_ENC_CONV1D_NORMA_ENC_INP_PROJA_ENC_CONV2DA_ENC_CONV_OUT
A_PRE_NORMA_POST_NORMA_ENC_ATTN_QA_ENC_ATTN_KA_ENC_ATTN_VA_ENC_ATTN_K_RELA_ENC_ATTN_POST_NORMA_ENC_ATTN_PRE_NORMA_ENC_PER_DIM_SCALEA_ENC_LAYER_PRE_NORMA_ENC_INPUT_NORMA_ENC_OUTPUTA_ENC_OUTPUT_NORMA_ENC_FFN_NORMA_ENC_FFN_POST_NORMA_ENC_FFN_SCALEA_ENC_FFN_UPA_ENC_FFN_GATEA_ENC_FFN_DOWNA_ENC_FFN_UP_1A_ENC_FFN_DOWN_1A_ENC_FFN_NORM_1A_ENC_FFN_POST_NORM_1A_ENC_FFN_SCALE_1A_ENC_LINEAR_POSA_ENC_POS_BIAS_UA_ENC_POS_BIAS_V	A_ENC_OUTA_MMPROJA_MMPROJ_FCA_MM_NORM_PREA_MM_NORM_MIDA_ENC_CONV_DWA_ENC_CONV_NORMA_ENC_CONV_PW1A_ENC_CONV_PW2A_ENC_NORM_CONVA_PER_DIM_K_SCALEA_PER_DIM_SCALEA_MM_EMBEDDINGA_MM_HARD_EMB_NORMA_MM_INP_PROJA_MM_SOFT_EMB_NORMA_ENC_ATTN_REL_POS_EMBA_QF_SELF_ATTN_QA_QF_SELF_ATTN_KA_QF_SELF_ATTN_VA_QF_SELF_ATTN_OA_QF_SELF_ATTN_NORMA_QF_CROSS_ATTN_QA_QF_CROSS_ATTN_KA_QF_CROSS_ATTN_VA_QF_CROSS_ATTN_OA_QF_CROSS_ATTN_NORMA_QF_FFN_UPA_QF_FFN_DOWNA_QF_FFN_NORMNEXTN_EH_PROJNEXTN_EMBED_TOKENSNEXTN_ENORMNEXTN_HNORMNEXTN_SHARED_HEAD_HEADNEXTN_SHARED_HEAD_NORMrH   r   ARCTICFFN_NORM_EXPrI   rY   rc   ri   rm   rr   rw   r{   __static_attributes__r$   r[   rX   r   r      s`!   d9 "
d9> 	   #
?d9H 	$$ '
Id9f 	  
gd9v 	 
wd9P 	   #
Qd9V 	   #
Wd9^ 	   #
_d9P 	 "
Qd9Z 	&&[d9\ 	'']d9` 	 
ad9h 	## &
id9n 	'' *
od9t 	"" %
ud9z 	'' *
{d9@ 	$$ '
Ad9F 	)) ,
 	## &
 	(( +
 	$$ '
 	)) ,
 	$$ '
 	 !
 	"" %
 	$$ '
 	## &
 	%% (
Cd9L5 dL~? !!
~?L 	   #
M~?^ 	  
_~?N 	 
O~?t 	 
u~?\ 	 
]~?B 	 # 
C~?N 	"" %
O~?` 	## &
a~?p 	"" %
q~?~ 	 "
~?H 	 !
I~?V 	  
W~?L 	!! $
M~?Z 	## &
[~?d 	"" %
e~?v 	$$ '
w~?~ 	$$ '
~?F 	!! $
G~?n 	'' *
o~?v 	$$ '
w~?R	 	 '
S	~?d
 	 
"
e
~?|
 	!! 
$
}
~?T 	!! $
U~?^ 	 
_~?h 	  
i~?N 	!! 	$
O~?d 	## &
e~?x 	## &
y~?@ 	$$ '
A~?J 	$$ '
K~?R 	"" %
S~?\ 	 " 
]~?d 	!! $
e~?@ 	## 
&
A~?X 	## &
Y~?` 	   #
a~?@ 	   #
A~?` 	 "
a~?h 	## &
i~?| 	$$ '
}~?D 	)) ,
E~?L 	)) ,
M~?T 	(( +
U~?\ 	 "
]~?d 	&& )
e~?l 	'' *
m~?t 	## &
u~?| 	(( +
}~?D 	'' *
E~?L 	(( +
M~?T 	'' *
U~?\ 	!! $
]~?d 	&& )
e~?l 	  
m~?t 	  
u~?| 	%% (
}~?D 	 
E~?T 	 "
U~?d 	 
e~?r 	 
s~?F 	   #
G~?P 	 
Q~?b 	 "
c~?n 	 "
o~?z 	 
{~?H 	  
I~?V 	 
W~?f 	 !
g~?n 	## &
o~?x 	!! $
y~?~ 	!! $
~?D 	!! $
E~?J 	 
K~?P 	 
Q~?V 	  
W~?^ 	 
_~?d 	 
e~?j 	   #
k~?r 	   #
s~?~ 	   #
~?J 	   #
K~?R 	   #
S~?Z 	   #
[~?b 	   #
c~?j 	   #
k~?r 	   #
s~?z 	   #
{~?B 	   #
C~?J 	!! $
K~?R 	!! $
S~?Z 	!! $
[~?b 	$$ '
c~?l 	$$ '
m~?v 	$$ '
w~?@ 	$$ '
A~?J 	$$ '
K~?T 	$$ '
U~?^ 	## &
_~?f 	## &
g~?p 	&& )
q~?z 	&& )
{~?D 	!! $
E~?R 	## &
S~?` 	(( +
a~?n 	"" %
o~?x 	   #
y~?B 	$$ '
C~?P 	'' *
Q~?Z 	'' *
[~?b 	$$ '
c~?l 	++ .
m~?t 	&& )
u~?~ 	  
~?H 	  
I~?R 	"" %
S~?\ 	 !
]~?d 	  
e~?n 	  
o~?x 	"" %
y~?B 	## &
C~?L 	"" %
M~?T 	!! $
U~?\ 	"" %
]~?d 	 "
e~?l 	 "
m~?t 	 "
u~?| 	!! $
}~?D 	## &
E~?L 	(( +
M~?T 	%% (
U~?\ 	%% (
]~?d 	%% (
e~?l 	'' *
m~?t 	)) ,
u~?| 	!! $
}~?D 	!! $
E~?L 	 "
M~?V 	!! $
W~?^ 	$$ '
_~?f 	"" %
g~?n 	 "
o~?v 	 "
w~?~ 	 "
~?F  	!! $
G ~?N  	## &
O ~?V  	!! $
W ~?^  	!! $
_ ~?f  	 "
g ~?p  	!! $
q ~?x  	 !
y ~?@! 	   #
A!~?H! 	   #
I!~?P! 	$$ '
Q!~?X! 	$$ '
Y!~?`! 	## &
a!~?h! 	!! $
i!~?p! 	## &
q!~?x! 	%% (
y!~?D" 	$$ '
E"~?N" 	 
O"~?^" 	 
_"~?f" 	  
g"~?p" 	   #
q"~?x" 	"" %
y"~?@# 	!! $
A#~?H# 	!! $
I#~?P# 	## &
Q#~?X# 	!! $
Y#~?`# 	!! $
a#~?h# 	   #
i#~?p# 	!! $
q#~?x# 	!! $
y#~?@$ 	%% (
A$~?H$ 	"" %
I$~?P$ 	"" %
Q$~?X$ 	"" %
Y$~?`$ 	$$ '
a$~?h$ 	## &
i$~?p$ 	%% (
q$~?x$ 	&& )
y$~?F% 	  
G%~?Z% 	   #
[%~?j% 	!! $
k%~?x% 	!! $
y%~?@& 	## &
A&~?R& 	%% (
S&~?z& 	$$ '
{&~?D' 	## &
E'~?d' 	%% (
e'~?n' 	$$ '
o'~?x' 	## &
y'~?L( 	!! $
M(~?l( 	&& )
m(~?z( 	!! $
{(~?Z) 	&& )
[)~?h) 	!! $
i)~?H* 	%% (
I*~?p* 	!! $
q*~?\+ 	)) ,
]+~?F, 	!! $
G,~?p, 	## &
q,~?~, 	## &
,~?h- 	)) ,
i-~?p- 	(( +
q-~?x- 	$$ '
y-~?D. 	$$ '
E.~?P. 	&& )
Q.~?X. 	 "
Y.~?l. 	   #
m.~?@/ 	## &
A/~?L/ 	"" %
M/~?T/ 	"" 
%
U/~?l/ 	'' *
m/~?t/ 	(( +
u/~?|/ 	$$ '
}/~?D0 	$$ '
E0~?L0 	$$ '
M0~?T0 	&& )
U0~?^0 	   #
_0~?f0 	'' *
g0~?n0 	%% (
o0~?v0 	$$ '
w0~?~0 	"" %
0~?F1 	## &
G1~?N1 	)) ,
O1~?V1 	&& )
W1~?b1 	 !
c1~?j1 	  
k1~?r1 	  
s1~?z1 	## &
{1~?B2 	%% (
C2~?J2 	## &
K2~?R2 	$$ '
S2~?Z2 	%% (
[2~?b2 	%% (
c2~?j2 	## &
k2~?r2 	## &
s2~?z2 	$$ '
{2~?B3 	$$ '
C3~?J3 	 "
K3~?R3 	   #
S3~?Z3 	   #
[3~?b3 	&& )
c3~?j3 	 
k3~?t3 	 !
u3~?~3 	 !
3~?H4 	 !
I4~?P4 	 !
Q4~?X4 	"" %
Y4~?`4 	$$ '
a4~?h4 	"" %
i4~?p4 	 "
q4~?x4 	   #
y4~?D5 	## &
E5~?N5 	$$ '
O5~?V5 	)) ,
W5~?^5 	!! $
_5~?l5 	&& )
m5~?t5 	## &
u5~?~5 	!! $
5~?F6 	## &
G6~?N6 	O6~?R6 	   #
S6~?\6 	!! $
]6~?l6 	!! $
m6~?|6 	!! $
}6~?L7 	%% (
M7~?T7 	)) ,
U7~?\7 	(( +
]7~?d7 	(( +
e7~?n7 	)) ,
o7~?v7 	%% (
w7~?D8 	!! $
E8~?T8 	&& )
U8~?b8 	## &
c8~?p8 	(( +
q8~?z8 	$$ '
{8~?B9 	!! $
C9~?R9 	##RS9~?V9 	## &
W9~?f9 	## &
g9~?t9 	%% (
u9~?B: 	%% (
C:~?P: 	** -
Q:~?Z: 	&& )
[:~?b: 	%% (
c:~?l: 	%% (
m:~?t: 	%% (
u:~?|: 	 !
}:~?N; 	  
O;~?Z; 	   #
[;~?f; 	"" %
g;~?n; 	"" %
o;~?v; 	"" %
w;~?B< 	$$ '
C<~?N< 	## &
O<~?Z< 	## &
[<~?f< 	$$ '
g<~?r< 	&& )
s<~?z< 	$$ '
{<~?B= 	## &
C=~?H= 	'' *
I=~?N= 	"" %
O=~?T= 	'' *
U=~?\= 	++ .
]=~?d= 	%% (
e=~?l= 	%% (
m=~?t= 	%% (
u=~?|= 	%% (
}=~?D> 	(( +
E>~?L> 	&& )
M>~?T> 	&& )
U>~?\> 	&& )
]>~?d> 	&& )
e>~?l> 	)) ,
m>~?t> 	   #
u>~?|> 	"" %
}>~?D? 	"" %
E?~?N? 	"" %
O?~?V? 	'' *
W?~?^? 	   #
_?~?f? 	   #
 	++ .
 	++ .
w?~?; ~D@ 	!! $ %% (	
	VR 	 10>,	 	   #" "r[   r   c                    [        X5      $ ru   )r   )rR   rS   s     rX   get_tensor_name_mapr    s    ((r[   N)rR   r   rS   r}   r   r   )
__future__r   typingr   	constantsr   r   r   r	   r   r  r$   r[   rX   <module>r     s$    "  L Ll#" l#"^G)r[   