
    +jtv             -       0x   d dl Z d dlmZmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlZd dlmZ d dlmZmZmZ d dlmZmZmZmZ d d	lmZ d d
lmZmZ d dlmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z) d dl*m+Z+m,Z,m-Z-m.Z.m/Z/ d dl0m1Z1m2Z2 d dl3m4Z5 d dl6m7Z7m8Z8 d dl9m:Z;  e	d          Z< ed          Z=ej>        j?        Z?ej@        A                    ddd          ZB eCd          \  ZDZEZFd ZGd ZHdeee=e<f         gee=e<f         f         fdZIde!fdZJd ZKd ZL eIe?jM        e?jN        g           e/            dddejO        ddfd                         ZP eIe?jQ        jR        e?jQ        jS        g           e/            d!                         ZT eIe?jU        jR        e?jU        jS        g           e/            d"d#d$                        ZU eIe?jV                   e/            d%                         ZV eIe?jW        jR        e?jW        jS        e?jX        jR        e?jX        jS        g           e/d&d'          d(                         ZY eIe?jZ        jR        e?jZ        jS        g           e/            d)                         ZZd* Z[dVd+ed,e\e]         d-e^fd.Z_ eIe?j`        jR        e?j`        jS        g           e/            d/                         ZadZbd,e\e]         fd0Zc eIe?jd        jR        e?jd        jS        g           e/            d1                         Ze eIe?jf        jg                  dd2d3            Zh eIe?jf        jR                  eji        dddd4d5            Zj eIe?jk        jR        e?jk        jS        g           e/            eji        dddd4d6                        Zl eIe?jk        jm        e?jk        jn        g           e/            eji        dddd4d7                        Zo eIe?jp        jR        e?jp        jS        g           e/            ddddd4d8                        Zq eIe?jr        jR                  d9             Zs eIe?jt        jR                  d:             Zud; Zv eIe?jw        jR                  dWd>            Zx eIe?jy        jR                  dWd?            Zz eIe?j{        jR        e?j{        jS        g           e/            d+ed,e\e]         d@e]dAe]fdB                        Z| eIe?j}        jR                  dVdC            Z~dD Z eIe?j        jR                  dE             Z eIe?j                  	 	 	 dXdFedGedHedIedz  dJedz  dKej        dz  fdL            Z eIe?j                  	 dYdMedNedOedKej        dz  fdP            Z eIe?j                  dQdQddRdFedMedNedOedKej        dz  f
dS            Z eIe?j                  	 	 	 	 	 	 	 dZdTej        dUej        dIedz  dVedz  dKej        dz  dWe^dXe]dYe]dZe]fd[            Z eIe?j        jR                  d\d]d+ed,e]d^ed_ej        d`edae^defdb            Z eIe?j        jR                  d\d]d+ed,e]d^ed_ej        d`edae^defdc            Z e/             eIe?j        jR                  dd                         Z eIe?j        jR                  dddd dddedfed`edgedz  d'edz  dhedz  die]dje^defdk            Z eIe?j        jR        e?j        j        g           e/            dl                         Z eIe?j        j                  dVdm            Z eIe?j        jR        e?j        j        g           e/            dn                         Z eIe?j        j                  dVdo            Z eIe?j        jR                  dp             Z eIe?j        jS                  dq             Z eIe?j        jR                  dr             Z eIe?j        j                  ds             Z eIe?j        jR                  dt             Z eIe?j        jR                  ddddddudv            Z eIe?j        jR                  d[dw            Z eIe?j        jR                  dXdx            Z eIe?j        jR                  d[dy            Z eIe?j        jR                  dz             Z eIe?j        j                  d{             Zd+ed|efd}Zd+ed~edefdZ	 d\ded|ede^fdZd]d~ed|edefdZd~edede^d|efdZ	 d^dededFedefdZdefdZ eIe?j        jR        e?j        j        g           e/dd          d_d~edede^fd                        Z eIe?j        jR        e?j        jS        g           e/            dFedefd                        Z eIe?j        g           e/dd          dFefd                        ZdedefdZ eIe?j                   e/            d+ed~ede^defd                        Z eIe?j                   e/            dVd+ed~ede^defd                        Z eIe?j                   e/            dVd+ede^defd                        Z eIe?j                   e/            dVd+ede^defd                        Z eIe?j        jR                  d`d~ede^de^fd            Z eIe?j        jR        e?j        jS        g           e/            dFededefd                        Z eIe?j        jR                  dVd~ede^fd            Z eIe?j        jR        e?j        jS        g           e/ddd          dddd+ede^de^deeeef         fd                        Z eIe?j        jR        e?j        jS        g           e/            dddededede^def
d                        Z eIe?j        jR        e?j        jS        g           e/ddd          d\dd~ede^deeeef         fd                        Z eIe?j        jR        e?j        jS        g           e/ddd          d\ddd~ede^de^deeeef         fd                        Z eIe?j        jR        e?j        jS        g           e/            d\dddededede^de^defd                        Z eIe?j                   e/ddd          	 	 dadedede^de^deeeef         f
d                        Zdedee^e^f         fdZ eIe?j        jR        e?j        jS        g           e/dd          dbd~ededeeef         fd                        Z eIe?j        jR        e?j        j        g           e/dddd          d~edeeeeef         fd                        Z eIe?j        jR                  	 	 	 dcd~ede^de^dedz  fdĄ            Zdededee\e]         e\e]         f         fdǄZdedededz  deeef         fdȄZdFedede^fdʄZ eIe?j                  d\dddddd˜d~edede^de^dedz  dedz  dedz  dedz  deeeeef         fd̄            Z eIe?j        jR        e?j        jS        g          d\ddd͜d~edede^de^de^dedz  defdЄ            Z eIe?j                   e/ddd\Ӧ          	 	 	 ddd+ed~ede^de^de^deeef         fdՄ                        Z eIe?j        jR                  dք             Z eIe?j                   e/            	 	 dedFededede^de^defdׄ                        Zd؄ Zdل Z eIe?j                   e/            dڄ                         Z eIe?j                   e/            dۄ                         Zd܄ Z eIe?j                   e/dݦ          dބ                         Z eIe?j                   e/dݦ          d߄                         Zd Z eIe?j                   e/            d                         Z eIe?j                   e/            d                         Z eIe?j        jR                  d             Z eIe?j        jR        e?j        j        e?j        jR        e?j        j        g           e/dݦ          d                         Zd Z eIe?j                   e/            d                         Z eIe?j                   e/            d                         Z  eIe?j        jR        e?j        j        e?j        jR        e?j        j        g           e/dݦ          d                         Z eIe?j                   e/            dfd+ededefd                        Z eIe?j                   e/            ded+edededef
d                        Z eIe?j	        jR        e?j	        jS        g           e/d\Ӧ          dQdQdd                        Z
 eIe?j        jR        e?j        jS        g           e/            dd2d                        Z eIe?j        j                  dgd            Z eIe?j        j                  dgd            Z eIe?j        jR        e?j        jS        g           e/            dYd                        Z eIe?j        jR                  	 	 d`d            Z eIe?j                   e/d\Ӧ          dYdKej        dz  fd                        Zd ZdhdZ	 dYdej        dGej        de\e]         e]z  de\e]         e]z  de\e]         e]z  de^d e]de\e]         e]z  dz  fdZd Z eIe?j        jR                  dej        dGej        dIej        dz  dej        dz  dej        dz  de^dedefd	            Z eIe?j        jR                  dej        dGej        dIej        de\e]         de\e]         de\e]         de^de\e]         d e]fd
            Zej        j         r%ej@        A                    ddd          Z! eIej>        j"        j#        jR                  d             Z$ eIej>        j"        j%        jR                  d             Z&ej        j'        rEej@        A                    ddd          Z( eIej>        j)        j*                  d             Z+ej@        A                    ddd          Z, eIej>        j-        j.        jR                   eIej>        j-        j/        jR                   eIej>        j-        j/        j0                  d                                     Z1 eIej>        j-        j.        j2                   eIej>        j-        j.        j3                  d                         Z4 eIej>        j-        j5        jR                   eIej>        j-        j5        j0                  d                         Z6 eIej>        j-        j5        j2                   eIej>        j-        j5        j3                  d                         Z7 eIej>        j-        j8        jR                   eIej>        j-        j9        jR                  d                         Z:ej@        A                    ddd          Z; eIej>        j<        j=                  	 	 	 	 did            Z> eIej>        j<        j?                  d             Z@d ZA eIe?jB                  dFej        dede]dej        dej        f
d             ZC eIe?jD        jR                  	 	 	 	 	 djd!            ZEd" ZF eIe?jG        jR                  d#             ZH eIe?jI                   e/            	 	 	 	 	 djd$                        ZJ eIe?jK                   e/dݦ          d%                         ZL eIe?jM        jR                  d&             ZN eIe?jO        jR                  d'             ZP eIe?jQ        jR                  d(             ZR eIe?jS                   e/dݦ          d)                         ZTd*edefd+ZU eIe?jV                   e/dd'          d,                         ZW eIe?jX                   e/dݦ          d-                         ZY eIe?jZ                   e/dd'          d.                         Z[ eIe?j\                   e/dݦ          d/                         Z] eIe?j^        j                  dYd0            Z_ eIe?j`        jR        e?j`        jS        g           e/            d1                         Za eIe?jb        jR        e?jb        jS        g           e/            d"d2d3e]fd4                        Zb eIej>        j?        jc        jR        ej>        j?        jc        jS        g           e/            d5                         Zc eIe?jd        j        e?je        j        g          d6             Zf eIe?jg        jR        g          d7             Zh eIe?ji        jR        e?ji        jS        g           e/d\Ӧ          dQdQdd8                        Zj eIe?jk        j        g          d9             Zl eIe?jm        jR        e?jn        jR        g          ddd:d;            Zo eIe?jp        jR        g          ddd:d<            Zq eIe?jr        g           e/            d=                         Zs eIe?jt        g          d>             Zu eIe?jv        g          d?             Zw eIe?jx        g          d@             Zy eIe?jz        g          dA             Z{ eIe?j|        g          dB             Z|dCe]dDe]de]fdEZ}dF Z~ eIe?j        g          dIedz  fdG            Z eIe?j        g          dH             Z eIe?j        g          dI             Z eIe?j        jR                  dJ             Z eIe?j                   e/            dK                         Z eIe?j        jR                  	 	 	 	 	 	 dkdL            Z eIe?j        jR                  dM             Zd\dNZ eIe?j        jR        e?j        jS        g           e/            dlddOdP                        Z eIe?j        jR        e?j        jR        g          dQ             Z eIe?j        j        e?j        j        e?j        j        e?j        j        e?j        jR        e?j        j        g           e/d&d'          dmdR                        Z eIe?j        jR                  dS             Z eIe?j        jR                  dT             Z eIe?j        jR                  dU             Z eIe?j        j        e?j        j        e?j        j        e?j        j        e?j        jR        e?j        jR        e?j        jR        g          dV             Z eIe?j        j        e?j        j        e?j        j        e?j        j        g          ddW            Z eIe?j        j        e?j        j        g          ddX            Z eIe?j        jR        e?j        j        g          dY             ZdZ Z eIe?j        j        e?j        j        g          d[             Z eIe?j        j        e?j        j        g          d\             Z eIe?j        jR                  d]             Z eIe?j        j        e?j        j        g          d^             Z eIe?j        j        e?j        j        g          d_             Z eIe?j        jR                  d`             Z eIe?j        j                   e/            ddefda                        Z eIe?j        g           e/            	 dndd                        Z eIe?j        g          	 dnde            Z eIe?j        g          	 dndf            Z eIe?j        jR        e?j        jR        g          dVdg            Z eIe?j        j                  dh             Z eIe?j        jR                  di             Z eIe?j                  dj             Z eIe?j                   e/            dk                         Z eIe?j                  dl             Z eIe?j        jR                  dVdm            ZАd[dnZ eIe?j        jR        e?j        jS        g           e/d\Ӧ          do                         Z eIe?j        j        e?j        j        g           e/d\Ӧ          dp                         ZՐdq Z֐dr Zאds Zؐdt Z	 dVdFedue]dve]dwe]dxe]dye]dze]d{e]d|e]d}e]d~e]de]de]de]de]de]de]de]de]de]dede^f,dZڐd ZdFed*edue]dve]dwe]dxe]dye]dze]d{e]d|e]d}e]d~e]de]de]de]de]de]de]def&dZܐd Z eIe?j                   e/dݦ          d                         Z eIe?j        jR                  	 	 	 	 did            Z eIe?j        jR                  d             Z eIe?j                   e/dd'          	 	 	 	 did                        Z eIe?j                   e/dݦ          d                         ZdFedefdZ G d de          ZdFedede]fdZ eIe?j        jR                  d             Z eIe?j                   e/            d                         Z eIe?j                   e/dݐd          d                         Z eIe?j        jR        g          d             Z eIe?j        jR                  	 	 	 	 	 dod            Z eIe?j        jR        e?j        jS        g           e/            ddddddd                        Z eIe?j        jR        e?j        jS        g           e/            ddddddd                        Z eIe?j        jR                  d             Z eIe?j        jR                  dpd            Zd Zd\d,e]de]de^fdZd Zd Z eIe?j        jR                  dVd            ZdVdZdYdZ d ZdYdZdqdZ eIe?j        jR                  d             Z eIe?j                  d             Z eIe?j        j	        e?j        j
        e?j        j        e?j        j        g           e/            dYd                        Z eIe?j        j	        e?j        j
        e?j        j        e?j        j        g          dYd            Z eIe?j        jR        g          	 	 	 	 drdedededede^de^dedz  fd            Z eIe?j        j<        g          	 	 	 	 drdededededz  dedz  dedz  dede^de^dedz  fd            Zdedee]df         fdZ eIe?j        g          	 	 	 	 drdededededz  de^dede^de^dedz  fdÄ            Z eIe?j        g          	 	 	 	 	 dsdededededz  dede^de^dedz  fdĄ            Z eIe?j                  ddŜdededededede\e^         dedededede]de]dede^dedededz  f"dτ            Z eIe?j        g          	 dYdedededededededede]de]dede^dedededz  fdЄ            Z eIe?j        g          	 	 	 	 dtdedededede^dedz  dedz  fd҄            Z eIe?j        g          	 	 d[dededededededede^dedz  dedz  fdӄ            Z eIe?j         g          	 	 	 	 	 	 dudededededz  dede^dedz  dedz  de^deeef         fdք            Z! eIe?j"        g          	 	 	 dvdededededz  de^de^dedz  fdׄ            Z# eIe?j$        g          	 	 dwdedededededz  dededededede\e^         de^dedz  fd؄            Z% eIe?j&        g          	 dYdededededededededededede]de]dede^dedz  f dل            Z' eIe?j(        jR        g          	 	 	 	 	 	 	 dxdededededz  dedz  de]de]dede^de^dedz  de]dz  de]dz  dedz  dedz  dedz  de]dz  f"d            Z) eIe?j*        jR        g          	 	 	 	 	 	 	 dxdedededededz  dedz  de]de]dede^de^dedz  de]dz  de]dz  dedz  dedz  dedz  de]dz  f$d            Z+ eIe?j(        j<        g          	 	 	 	 	 dodededededz  dedz  de]de]dede^de^dedz  dedz  dedz  dedz  de]dz  de]dz  dedz  dedz  f$d            Z, eIe?j-        g          	 	 	 dXdedededededededede]de]dede^dedededz  de]dz  de]dz  f"d            Z. eIe?j/        g          	 	 	 	 	 dydedededIedz  dedz  dedz  de]dz  de]dz  dede]de^dedz  dedz  dedz  de]dz  fd            Z0 eIe?j1        g          	 	 	 dqdededededIedz  dedz  dedz  dej2        dej2        dedededede]de^dedz  de]dz  de^f$d            Z3	 	 	 	 dzd+ej        dOej        dej        dej        dIej        dz  dej        dz  dKej        dz  de^fdZ4 eIe?j5        jR        g          	 	 	 	 dzd+ej        dOej        dej        dej        dIej        dz  dej        dz  dKej        dz  de^fd            Z6	 	 	 	 	 d{d+ej        dOej        de\ej                 de\e7         de\ej                 de\e7         dIej        dz  dKej        dz  de\e8         dz  de\e8         dz  de^fdZ7 eIe?j8        jR        g          	 	 	 	 dzd+ej        dOej        de\ej                 de\e7         de\e8         de\ej                 de\e7         de\e8         dIej        dz  dej        dz  de\e]         dz  de^fd            Z9 eIe?j:        j;        e?j:        j<        g           e/            d\d                        Z= eIe?j>        j;                  d\d             Z? eIe?j@        jR        e?j@        jS        g           e/            dVdd2d                        ZAd ZBd ZC eIe?jD        jR        e?jE        jR        g          dYd            ZD eIe?jF        jR        e?jG        jR        g          d[d            ZF eIe?jH        jR        e?jI        jR        g          	 	 d[d*edee]ej2        z           dee]ej2        z           dedz  d	edz  f
d
            ZH eIe?jJ        jR        e?jK        jR        g          dXd            ZJ eIe?jL        jR        e?jL        jM        e?jL        j        e?jL        jN        g          d|d            ZOd ZP eIe?jQ        jR                  	 	 d[d            ZR eIe?jS        jR                  d             ZS eIe?jT        jR                  d             ZT eIe?jU        jR                  d             ZUd ZVd ZW eIe?jX        jR        e?jY        jR        g          dld            ZZ eIe?j[        jR                  d}d            Z[ eIe?j\        jR                  d~d            Z] eIe?j^                   e/            	 dd                        Z_ eIe?j`        jR        e?j`        j        g           e/d&d'          dmd                        Zaejb        Zcd Zd eIe?je        jR                  d             Ze eIe?jf        jR                  d             Zf eIe?jg        jR                  d             Zh eIe?ji        jR                  d             Zi eIe?jj        j        e?jj        jk        g           e/            dddd                        Zl eIe?jj        j        e?jj        jm        g          dddd+e&d ed!e^d"e^fd#            Zn eIe?jo        g           e/            dd%                        Zp eIe?jq        jR        e?jr        jR        e?js        jR        g          	 	 d[d&            Zt eIe?ju        jR        e?jv        jR        g          	 	 d[d'            Zw eIe?jx        jR                  d(             Zx eIe?jy        jR        e?jy        jS        g           e/            dXd)                        Zy eIej>        j?        jz                  d*             Zz eIej>        j?        j{                  d+             Z{ eIe?j|                   e/            ddddd,d-                        Z}d. Z~ eIe?j                  d/             Z eIe?j                  	 dd0            Z eIe?j                  	 dd1            Z eIe?j                  	 dd2            Z eIe?j                   e/            ddd3d4                        Z eIe?j                   e/            d5e]d+edefd6                        Z eIe?j                  d+efd7            Z eIe?j                   e/d\Ӧ          d+edefd8                        Z eIe?j                   e/            d+edefd9                        Zd: Z	 	 	 	 	 d{d;ed<edej        dz  dej        dz  d=edz  dIedz  dej        dz  dKej        dz  de^fd>Z eIe?j                   e/            	 	 	 dXd;ed<ed=edz  dIedz  dKej        dz  defd?                        Z eIe?j        g          	 	 	 	 	 d{d;ej        d<ej        dej        dej        d=ej        dz  dIej        dz  dej        dz  dKej        dz  de^fd@            Z eIe?j        j                  ddA            Z eIe?j                   e/            dBed,e]dCe^defdD                        Z eIe?j                   e/            ddE                        Z eIe?j                   e/            	 	 	 ddGed'edFe]dGe^dHe^defdI                        Z eIe?j        jR                  	 dd&edhe\e         dJe\e]         dKefdL            ZdM Z eIe?j        jR                  	 	 	 	 ddN            ZdO Z ee?j                    ee?j                    ee?j                    ee?j                    ee?j                    ee?j                    ee?j                    ee?j                    ee?j                    ee?j                    ee?j                    ee?j                    ee?j                    ee?j                    ee?j                    ee?j                    ee?j                    ee?j                    ee?j                    ee?j                    ee?j                   dP Z eIe?j                   e/            dQ                         Z eIe?j                   e/            dQdRdS                        Z eIe?j                   e/            dQdRdT                        Z ee?j                  Z ee?j                  Z ee?j                  Zd dl0Zd dlZd dlZdU Z e             dS (      N)CallableSequence)Enum)wraps)TypeVar)	ParamSpec)SymBoolSymFloatTensor)_add_op_to_registry_convert_out_paramsglobal_decomposition_table
meta_table)
OpOverload)_prim_elementwise_meta$ELEMENTWISE_PRIM_TYPE_PROMOTION_KIND)BoolLikecorresponding_complex_dtypecorresponding_real_dtypeelementwise_dtypesELEMENTWISE_TYPE_PROMOTION_KIND	FloatLikeIntLikemake_contiguous_strides_forNumber
NumberTypesuggest_memory_formatsym_min
TensorLike)_maybe_convert_to_dtype_maybe_resize_out_resize_output_check_safe_copy_outout_wrapper)_broadcast_shapes_maybe_broadcast)_config)ScalingTypeSwizzleType)_pytree_T_PatenIMPLMeta   c                     | |z   dz
  |z  S N    abs     \/home/wildlama/comfy/ComfyUI/.venv/lib/python3.11/site-packages/torch/_meta_registrations.pyceil_divr9   9   s    EAI!    c                     | |z   dz
  |z  |z  S )z$Rounds up x to nearest multiple of yr3   r4   xys     r8   round_upr?   =   s    UQY1!!r:   returnc                       fd}|S )Nc                 \     t                       fd}t          j        |            S )Nc                 4    t          t          |            d S N)r   r   )opfns    r8   registerz0register_meta.<locals>.wrapper.<locals>.registerF   s    
B33333r:   )r   pytree	tree_map_)rF   rG   rE   s   ` r8   wrapperzregister_meta.<locals>.wrapperC   sD     $$	4 	4 	4 	4 	4 	2&&&	r:   r4   )rE   rJ   s   ` r8   register_metarK   B   s#         Nr:   type_promotionc                     t          j        |d| i\  }fd|D             }t          | }t          |dt          j        iS )Ntype_promotion_kindc                 0    g | ]}t          |          S r4   )r    ).0r=   result_dtypes     r8   
<listcomp>z$elementwise_meta.<locals>.<listcomp>X   s$    CCC#A|44CCCr:   rL   )utilsr   r&   r   r   DEFAULT)rL   args_rQ   s      @r8   elementwise_metarW   O   sp    
 .	* OA| DCCCdCCCD T"D "	BJ  r:   c                     t           j        t           j        t           j        t           j        t           j        t           j        i}|                    | |           S rD   )torch	complex32halfcfloatfloatcdoubledoubleget)dtypefrom_complexs     r8   toRealValueTyperc   c   s<    eku|L
 E5)))r:   c                 x     t          t           g|R            t          j         k     fd           d S )Nc                      d d  S )Nzoutput with shape z# doesn't match the broadcast shape r4   )broadcasted_shape
self_shapes   r8   <lambda>z)check_inplace_broadcast.<locals>.<lambda>p   s    gZggTegg r:   )tupler%   rY   _check)rg   
args_shaperf   s   ` @r8   check_inplace_broadcastrl   l   sX    /
HZHHHII	LZ'ggggg    r:   Fc	                    	 t           t          j                  r,t          j                                         dk    d            t          t          j                  r,t          j                                        dk    d            t          d  fD                       rVt          j        t          j                              		nAt          j        t          j	                  	fd           npt          j                    t          t          j
                  st          dt                               t          j        t          t                     fd           t          t                    st          dt                               t          j        dk    d	            t          j        f|d
||          S )Nr   c                      dS Nz:linspace only supports 0-dimensional start and end tensorsr4   r4   r:   r8   rh   z(meta_linspace_logspace.<locals>.<lambda>       P r:   c                      dS ro   r4   r4   r:   r8   rh   z(meta_linspace_logspace.<locals>.<lambda>   rp   r:   c              3   @   K   | ]}t          |t                    V  d S rD   )
isinstancecomplex)rP   args     r8   	<genexpr>z)meta_linspace_logspace.<locals>.<genexpr>   s,      
C
C:c7##
C
C
C
C
C
Cr:   c                      d  d S )Nzlinspace(): inferred dtype z& can't be safely cast to passed dtype r4   )default_complex_dtypera   s   r8   rh   z(meta_linspace_logspace.<locals>.<lambda>   s    z6Kzzsxzz r:   zdtype must be torch.dtype, got c                      dt                    j         dt                     j         dt                    j         dS )Nz4received an invalid combination of arguments - got (, ))type__name__)endstartstepss   r8   rh   z(meta_linspace_logspace.<locals>.<lambda>   sY     Hu++H Hs)),H H04U0DH H H r:   zsteps must be IntLike, got c                      dS )Nz$number of steps must be non-negativer4   r4   r:   r8   rh   z(meta_linspace_logspace.<locals>.<lambda>   s    %K r:   metara   layoutdevice
pin_memoryrequires_grad)rs   rY   r   rj   dimanyrS   r   get_default_dtypeis_complex_dtypera   AssertionErrorr|   _check_typer   empty)
r   r~   r   basera   r   r   r   r   rx   s
   ``` `    @r8   meta_linspace_logspacer   t   s    %&& 
IIKK1PP	
 	
 	
 #u|$$ 
GGIINPP	
 	
 	

 
C
CsE/B
C
C
CCC 3 % A#%%!
 !
 =)EEL&u--zzzzz   
 2022eU[)) NLtE{{LLMMM 
5'""	H 	H 	H 	H 	H 	H  
 eW%% JH4;;HHIII	L!KKLLL;	#   r:   c                    t          j        j        t           j        k    fd           t          j        |                                 dk    o                                dk     d            |                     j                  S )Nc                      d j          S )Nz2take(): Expected a long tensor for index, but got ra   indexs   r8   rh   zmeta_take.<locals>.<lambda>   s    RU[RR r:   r   c                      dS )Nz*take(): tried to take from an empty tensorr4   r4   r:   r8   rh   zmeta_take.<locals>.<lambda>   s    < r:   )rY   rj   ra   long_check_indexnumel	new_emptyshape)selfr   s    `r8   	meta_taker      s     
Luz!RRRR  
 
ZZ\\Q55;;==A#56<<   >>%+&&&r:   r   c                J     j         }j         }t          j        ||k    d            t          j                                       dk    o                              dk     fd           t	           j        j                  }                     |          S )Nc                      dS )Nz=linalg.cross: inputs must have the same number of dimensions.r4   r4   r:   r8   rh   zlinalg_cross.<locals>.<lambda>       O r:   r0   c                  f    d  d                                 d                                 S )Nzlinalg.cross: inputs dimension z must have length 3. Got  and size)r   otherr   s   r8   rh   zlinalg_cross.<locals>.<lambda>   sG    :c : :99S>>: :(-

3: : r:   )ndimrY   rj   r   r%   r   r   )r   r   r   x_dy_d	out_shapes   ```   r8   linalg_crossr      s     )C
*C	Ls
OO   
L		#!4

31 4	
 	
 	
 	
 	
 	
   "$*ek::I>>)$$$r:   c                     t          | d           t          | d           t          j        | t          j                  S )Nzlinalg.matrix_expmemory_format)squareCheckInputscheckFloatingOrComplexrY   
empty_likecontiguous_formatr   s    r8   linalg_matrix_expr      s?     d/0004!4555D0GHHHHr:   valuesindicesc                    t          j        | j        | j        | j                  }t          j        | j        | j        t           j                  }|                                 dk    r | j        dk    rt          || j                   ||fS )Nr   ra   r   )	rY   r   r   r   ra   int64r   r   maybe_wrap_dim)r   r   r   r   s       r8   	cummaxminr      su    
 [DKtzJJJFk$*T[LLLGzz||qTY!^^sDI&&&7?r:   c                 l    t          || j                   t          j        | t          j                  S Nr   )r   r   rY   r   r   )r   r   s     r8   logcumsumexpr      s/     3	"""D0GHHHHr:   c                   |j         }t          |          }||z
  }t          t          |                    }d t          |          D             |D ]}	d|	<   fd|D             }
|
t          |          z   }t          |
          }|                                |d |         }|                    fdd           |||d          z   }|                    |          }dgt          |j        |d                    z   }|                    |          }|	                    d          }||d<   t          |          }t          t          |                    D ]}|||                  ||dz   <   | 
                    |t          j        	           d
 t          |          D             }d}|dz
  }|dk    r=||                     d          z  |||         <   ||||                  z  }|dz  }|dk    =t          ||          D ]&}|                     d||z
  z             |||         <   '|                     |||                                            | S )Nc                     g | ]}d S Fr4   rP   rV   s     r8   rR   z_exec_fft.<locals>.<listcomp>   s    555A%555r:   Tc                 $    g | ]}|         
|S r4   r4   )rP   dis_transformed_dims     r8   rR   z_exec_fft.<locals>.<listcomp>  s$    @@@!*<Q*?@A@@@r:   c                     |          S rD   r4   )r=   self_stridess    r8   rh   z_exec_fft.<locals>.<lambda>	  s    <? r:   keyreverser   r   r3   r   c                     g | ]}d S r   r4   r   s     r8   rR   z_exec_fft.<locals>.<listcomp>  s    ***1***r:   )r   lenlistrangestridesortpermuter   reshaper   resize_rY   r   as_strided_storage_offset)outr   	out_sizesr   forwardr   signal_ndim
batch_dimsdim_permuter   left	batch_endtmpinputbatched_sizes
batch_sizebatched_out_sizesiout_stridesbatch_numelr   r   s                       @@r8   	_exec_fftr      s   9Dc((K#J uT{{##K55t555 % % $1 A@@@{@@@Dc"KD		I;;==L
jyj
!CHH****DH999IJJ//KLL%%E D4JKK 8999MMM-((EAJ!M!]++3s88__ 5 5#,SV#4!a%  KK!1HKIII +*eDkk***KKQA
q&&&1CJJqMM&AKN#yQ00	Q q&& :t$$ G G&)jja*n1E&F&FKN##OOI{C,>,>,@,@AAAJr:   r   r   exclude_lastc                     t          |          }|                                 t          |          t          |          z
  }t	          |d |         fdd          |d |<   |S )Nc                     |          S rD   r4   )r   r   s    r8   rh   z_sort_dims.<locals>.<lambda>,  s    a r:   Tr   )r   r   r   intsorted)r   r   r   sorted_dimsr~   r   s        @r8   
_sort_dimsr   '  st    s))K;;==L
k

S..
.CDSD8888$  K r:   c                   
 t          j        | j        j                   |s|                                 S t          |           dk    rJt           j        j                                        s'| 	                    | 
                                          S | 
                                }| 	                    |          }t          |           dk    r$t          | |          }t          || |||          S t          |          }| }	 |                                
|                    
fdd           t!          t"          t%          |                    }|t%          |          |z
  d          }	t          ||||	|           |d t%          |          |z
           }|s|S || u r|}| 	                    |          }n||}})Ncpucudar   Tc                     |          S rD   r4   r   stridess    r8   rh   zmeta_fft_c2c.<locals>.<lambda>I  s    wqz r:   r   )rY   rj   ra   
is_complexclonedevice_hintbackendsmklis_availabler   r   r   r   r   r   r   mincufft_max_ndimr   )r   r   normalizationr   r   outputr   working_tensormax_dims	last_dimsr   s             @r8   meta_fft_c2cr  3  s    
L&''' zz||4E!!%.*<*I*I*K*K!~~diikk***		I^^I&&F4F"" s++y+wOOOO s))KN< ''))11114@@@~s;'7'788K 0 08 ; = =>	&.)YPPPP!"?C$4$4x$?"?@ 	MT!!#N^^I..FF%3VNF!<r:   c                     t          |           t          k    s+t          |           dk    r| d         dk    r| d         dk    rdS dS )N   r   r3   FT)r   r   r   s    r8   use_optimized_cufft_pathr  ]  sD    
3xx.  SXX]]s1v{{s1vQR{{utr:   c                 $   t          j        | j        j                   t	          |                                           }t	          |          }|d         }||         dz  dz   }t	          |          }|||<   |r|||<   t          |           dk    st          |           dk    r|                     |t          j	        | j                            }	| }
t          |           dk    r%t          |          rt          |	|
||d           nt          |          dk    r|n|}t          |	|
||gd           t          |          dk    r.|                     |t          j	        | j                            }
|d d         }|r|
|	}
}	|
                                |                    fd	d
           t          t           t          |                    }|t          |          |z
  d          }t          |	|
||d           |d t          |          |z
           }||sB|	                    |          ||         k    r#|
                    |t           j                   |
}	|	S t           j        j                                        rTt-          | |d          }|                     |t          j	        | j                            }	t          |	| ||d          S |                     |t          j	        | j                            S )Nr   r  r3   r   xpur   Tr   c                     |          S rD   r4   r   s    r8   rh   zmeta_fft_r2c.<locals>.<lambda>  s    '!* r:   r   r   )r   )rY   rj   ra   is_floating_pointr   r   r   r   rS   r   r  r   r   r   r   r   r   r   r   r   r   r   r   )r   r   r   onesidedinput_sizesr   last_dimlast_dim_halfsizeonesided_sizesr   r  target_sizesr   r  r  r   s                  @r8   meta_fft_r2cr  d  sR    
L-...tyy{{##K[!!I2wH#H-2Q6+&&N0N8 0/	(4F""k$&7&75&@&@ U>tzJJ   
 
 t&&+CC+H+H&fnidKKKKK ),CA99>LfnlXJPTUUUU3xx!||!%U%Ftz%R%R "0 " "
 crc(K I)7(//11  ,,,,d !    ~s;/?/?@@'K(8(88(C(E(EF	NNIt    **GC,<,<x,G*GH  I  	({{8$$	((;;;&&y@W&XXX'			(	(	*	* 
 s>>>U>tzJJ   
 
 y+tLLLL ~~U>tzJJ  
 
 	
r:   )	generatorc                H    t          |t          j        | g                    S rD   )r!   rY   Size)nr  r   s      r8   meta_randpermr    s    S%*aS//222r:   ra   r   r   r   c                4    t          j        | ||||          S Nr  rY   r   )r  ra   r   r   r   s        r8   meta_randperm_defaultr    s(     ;	vf   r:   c                v     dt          j         k     fd           t          j        |||||          S )Nr   c                      d d  S Nz:random_ expects 'from' to be less than 'to', but got from=z >= to=r4   highlows   r8   rh   zmeta_randint.<locals>.<lambda>      _S__Y]__ r:   r  rY   rj   r   )r!  r   ra   r   r   r   r"  s   `     @r8   meta_randintr%    s[     C	Ls
_____   ;E&J   r:   c                r     t          j         k     fd           t          j        |||||          S )Nc                      d d  S r  r4   r   s   r8   rh   z"meta_randint_low.<locals>.<lambda>  r#  r:   r  r$  )r"  r!  r   ra   r   r   r   s   ``     r8   meta_randint_lowr(    sV     
Ls
_____   ;E&J   r:   c                4    t          j        | ||||          S r  r  )r   ra   r   r   r   s        r8   meta_rand_defaultr*    s(     ;E&J   r:   c                 f    t          j                                         dk    o j        d         dk     fd           t          j         j        t           j        k     fd           t          j        dk    fd            j        d d         }                     g|dR           S )Nr3   r   r  c                      d j          S )Nz>_philox_key_split: key must have shape (*batch, 2), got shape r   r   s   r8   rh   z'meta_philox_key_split.<locals>.<lambda>  s    \QTQZ\\ r:   c                      d j          S )Nz3_philox_key_split: key must have dtype uint64, got r   r.  s   r8   rh   z'meta_philox_key_split.<locals>.<lambda>  s    QciQQ r:   r   c                      d  S )Nz4_philox_key_split: num_splits must be positive, got r4   )
num_splitss   r8   rh   z'meta_philox_key_split.<locals>.<lambda>  s    SzSS r:   )rY   rj   r   r   ra   uint64r   )r   r1  batch_sizess   `` r8   meta_philox_key_splitr4    s    	L		Q-39R=A-\\\\   
L	U\!QQQQ   
LQSSSS   )CRC.K==*6{6A66777r:   c                      t          j                                         dk    o j        d         dk     fd           t          j         j        t           j        k     fd           t          j                   S )Nr3   r   r  c                      d j          S )Nz@_philox_key_fold_in: key must have shape (*batch, 2), got shape r-  r.  s   r8   rh   z)meta_philox_key_fold_in.<locals>.<lambda>  s    ^SVS\^^ r:   c                      d j          S )Nz5_philox_key_fold_in: key must have dtype uint64, got r   r.  s   r8   rh   z)meta_philox_key_fold_in.<locals>.<lambda>  s    S	SS r:   )rY   rj   r   r   ra   r2  r   )r   datas   ` r8   meta_philox_key_fold_inr9     s    	L		Q-39R=A-^^^^   
L	U\!SSSS   C   r:   c           	          t          j        j        j         fd           t          j        j        t           j        k     fd           t          j        j        j        k     fd           t          j                                        dk    oj        d         dk     fd                                           dk    rt          j                                                                        dz   k     fd           j        d                                          t          j        t          d	 t          j                  D                        fd
           d S d S )Nc                        dj          S )Nz,: self must be a floating point tensor, got r   )op_namer   s   r8   rh   z1_check_philox_distribution_args.<locals>.<lambda>  s    7TT
TT r:   c                       d j          S )Nz": key must have dtype uint64, got r   r   r<  s   r8   rh   z1_check_philox_distribution_args.<locals>.<lambda>  s    7IIciII r:   c                  ,     dj          d j          S )Nz/: self and key must be on the same device, got r   r   r   r<  r   s   r8   rh   z1_check_philox_distribution_args.<locals>.<lambda>  s3     2 2;2 2%(Z2 2 r:   r3   r   r  c                       d j          S )Nz5: key must have shape (2,) or (*batch, 2), got shape r-  r>  s   r8   rh   z1_check_philox_distribution_args.<locals>.<lambda>  s    XXSYXX r:   c                  ,     d j          dj          S )Nz?: batched key must have ndim == output ndim + 1, got key shape z with output shape r-  rA  s   r8   rh   z1_check_philox_distribution_args.<locals>.<lambda>&  s9     L L!$L L?CzL L r:   c              3   4   K   | ]\  }}|d k    p||k    V  dS r3   Nr4   )rP   kssss      r8   rv   z2_check_philox_distribution_args.<locals>.<genexpr>-  s3      MMBa#28MMMMMMr:   c                  <     dt                      dj         S )Nz: key batch shape z( is not broadcastable with output shape r   r   )	key_batchr<  r   s   r8   rh   z1_check_philox_distribution_args.<locals>.<lambda>.  s<     G Gd9oo G G:>*G G r:   )
rY   rj   ra   r  r2  r   r   r   allzip)r<  r   r   rJ  s   ```@r8   _check_philox_distribution_argsrM    s   	L
$TTTTT   
L	U\!IIIII   
Lsz!	
 	
 	
 	
 	
 	
   
L		Q-39R=A-	
 	
 	
 	
 	
   wwyy1}}GGIIa'     	
 	
 	
 Il

l+	MM#i2L2LMMMMM     	
 	
 	
 	
 	
 }r:                 ?c                 (    t          d| |           | S )N_philox_normal_rM  )r   r   meanstds       r8   meta_philox_normal_rU  5  s    #$5tSAAAKr:   c                 (    t          d| |           | S )N_philox_uniform_rR  )r   r   r"  r!  s       r8   meta_philox_uniform_rX  ;  s    #$6cBBBKr:   r   lastdimc                    t          j        | j        j                   t	          |           dk    r t          |                                           }|||d         <   |                     |t          | j                            }t          |          r2t          ||                     t           j                  ||d          S t          |          dk    rt          | |d d         dd          }n |                     t           j                  }t          ||||d         gd          S t           j        j                                        r| }t          |          dk    r'|d d         }t          | ||d          }|dd          }t          |                                          }|||d         <   |                     |t          | j                            }	t          |	|||d          S t          |                                           }|||d         <   |                     |t          | j                            S )	Nr   r   r   r   Fr   r3   r   )rY   rj   ra   r   r   r   r   r   rc   r  r   r   r   r   r  r   r   r   )
r   r   r   rY  r   r   tempr   c2c_dimsr   s
             r8   meta_fft_c2rr]  A  s    
L&'''4F""%%	$	#b'	1L1LMM#C(( 	P

)@
AA    3xx!||#D#crc(AuEEEzz0GzHHVT9s2wiOOOO			(	(	*	* Ls88a<<3B3xH xNNNEbcc(C&&	$	#b'nnYodj.I.InJJeYUCCCC %%	$	#b'~~itz/J/J~KKKr:   c                 0   ddl m}  ||           s't          j        |           dk    rt	          d          t          |t                    rH|                    | |          }t          j        	                    || 
                                           | S )Nr   free_unbacked_symbolsr3   zQmore than one element of the written-to tensor refers to a single memory location)%torch.fx.experimental.symbolic_shapesr`  rY   _debug_has_internal_overlapRuntimeErrorrs   r   to_refsexpandr   )r   srcnon_blockingr`  intermediates        r8   
meta_copy_rj  p  s     LKKKKK "!$''
,1,Md,S,SWX,X,X_
 
 	
 #v 6vvdL11 	<555Kr:   c                 <   t          |                                           }t          |                                           }||                                 k    rdn||         ||         z  }|                    |d           |                    ||           ||fS r2   )r   r   r   r   insert)tensorr   result_sizesresult_strides
new_strides        r8   inferUnsqueezeGeometryrq    s    &&L&--//**NVZZ\\))|C/@>RUCV/VJQ#z***''r:   c                     t          ||                                 dz             }t          | |          \  }}|                     ||           | S r2   )r   r   rq  r   )r   r   g_sizes	g_stridess       r8   meta_unsqueeze_ru    sL    
dhhjj1n
-
-C/c::GYWi(((Kr:   r   weight_metabias_activation_opt	out_dtypec                 v   t          | j                  }|g|                    d          |                    d          k    r;t          d|                    d           d|                    d                     |                    d          |                     d          dz  k    r>t          d|                    d           d|                     d          dz             |                    d          |d<   t	          | j                  dk    r%t          d	t	          | j                   d
          d|                     d          f}|?| j        t          j        k    r|t          j        k    st          d| j         d|           | 	                    |||| j        n|          }|S )Nr   z%output size mismatch: weight.size(0)= != bias.size(0)=r3   r   r  zweight.size(1)=z != input.size(-1)/2=z0we can only handle the squashed input case, got D inputzKout_dtype is only supported for i8i8->i32 linear operator, got input.dtype=, out_dtype=r   )
r   r   r   r   r   ra   rY   int8int32new_empty_strided)	r   rv  rw  rx  ry  rz  output_sizestransposed_stridesr   s	            r8   meta_sparse_structured_linearr    s    $$L;;q>>TYYq\\)) gAggY]YbYbcdYeYegg   {{1~~B!+++Wfkk!nnWW5::b>>TUCUWW
 
 	
 {{1~~L 5;1Xs5;?O?OXXX
 
 	
 UZZ]]+uz))i5;.F.F  C^c^i  C  C  xA  C  C   $$&.ekkI %  F Mr:   mat1	mat1_metamat2c                 d   t          | j                  dk    r%t          dt          | j                   d          t          |j                  dk    r%t          dt          |j                   d          t          |j                  dk    r%t          dt          |j                   d          |                     d          |                    d          dz  k    r>t          d|                     d           d	|                    d          dz             |                     d          |                    d          g}|?|j        t
          j        k    r|t
          j        k    st          d
|j         d|           |                    |||j        n|          }|S )Nr  mat1 must be 2D, got Dmat1_meta must be 2D, got mat2 must be 2D, got r3   r   mat1.size(1)= != mat2.size(0)/2=Jout_dtype is only supported for i8i8->i32 linear operator, got mat2.dtype=r~  r   	r   r   r   r   ra   rY   r  r  r   )r  r  r  rz  r  r   s         r8   meta_sparse_structured_mmr    s    4:!GS__GGGHHH
9?q  Q#io:N:NQQQRRR
4:!GS__GGGHHHyy||tyy||a'''ODIIaLLOOTYYq\\A=MOO
 
 	
 IIaLL$))A,,/L
ej((Y%+-E-E  A]a]g  A  Au~  A  A   ^^%-djj9   F
 Mr:   r3   )alphabetarz  c                   t          | j                  dk    r%t          dt          | j                   d          t          |j                  dk    r%t          dt          |j                   d          t          |j                  dk    r%t          dt          |j                   d          t          |j                  dk    r%t          dt          |j                   d          |                     d	          |                    d	          k    r;t          d
|                     d	           d|                    d	                     |                    d          |                    d	          dz  k    r>t          d|                    d           d|                    d	          dz             |                    d	          |                    d          g}|?|j        t
          j        k    r|t
          j        k    st          d|j         d|           |                    |||j        n|          }|S )Nr3   zKonly input broadcasted to columns of mat1 * mat2 product is supported, got r}  r  r  r  r  r  r   zUonly input broadcasted to columns of mat1 * mat2 product is supported, input.size(0)=z != mat1.size(0)=r  r  r  r~  r   r  )	r   r  r  r  r  r  rz  r  r   s	            r8   meta_sparse_structured_addmmr    sd    5;1sZ]^c^iZjZjsss
 
 	
 4:!GS__GGGHHH
9?q  Q#io:N:NQQQRRR
4:!GS__GGGHHHzz!}}		!$$L"ZZ]]L L=AYYq\\L L
 
 	
 yy||tyy||a'''ODIIaLLOOTYYq\\A=MOO
 
 	
 IIaLL$))A,,/L
ej((Y%+-E-E  A]a]g  A  Au~  A  A   ^^%-djj9   F
 Mr:   compressed_Adense_Br  transpose_resultalg_idsplit_ksplit_k_modec	           	         |j         t          j        t          j        t          j        t          j        t          j        hvrt          d|j                    | j         |j         k    rt          d| j          d|j                    t          |j	                  dk    r%t          dt          |j	                   d          | j         t          j        t          j        fv }	|	r#|
                                rt          d          |                    d          }
|                     d	          }|A||                    d	          k    r(t          d
| d|                    d	                     |U|	r0|t          j        t          j        t          j        t          j        hv s#t          d| j          d|j          d| d          |r|
|fn||
f}|                    ||          S )NzA_cslt_sparse_mm only supports fp16, bf16, int8, and fp8e4m3, got z%inputs must have the same dtype, got r   r  z-_cslt_sparse_mm only supports 2d inputs, got r  z.dense input must be transposed for 8bit dtypesr3   r   zbias size mismatch: m=r|  zout_dtype is not supported for z x z -> z matmul!r   )ra   rY   float32float16bfloat16r  float8_e4m3fnr   r   r   is_contiguousr   r  r   )r  r  rx  r  rz  r  r  r  r  is_8bit_input_typer  moutput_shapes                r8   meta__cslt_sparse_mmr    s%    }
   _PWP]__
 
 	
 W]**\L4F\\W]\\
 
 	
 7=QQC<N<NQQQ
 
 	
 &+
E<O/PP S  "" 	S !QRRRQA!A		! KKKTYYq\\KK   	#	  !o,2Doooo\eooo   .9Aq66Aq6L\;;;r:   T)include_selfr   sourcereducer  c                B    t          j        | t           j                  S r   rY   r   r   r   r   r   r  r  r  s         r8   meta_index_reducer  [  s     D0GHHHHr:   c                    | S rD   r4   r  s         r8   meta_index_reduce_r  h  s	     Kr:   c                     t          |                                           }|                                 dk    r|                                ||<   |                     |          S Nr   )r   r   r   r   r   )r   r   r   result_sizes       r8   meta_index_selectr  v  sN     tyy{{##KxxzzA~~ ;;==C>>+&&&r:   )lengthsr   offsetsaxisunsafeinitialr8  r  r  r  r  c                     |t          d           fd}| ||j                  S |,|j        d d         |j        d         dz
  fz   }	 ||	          S t          d          )Nz?segment_reduce(): indices based reduction is not supported yet.c                 x    t          j        | j        dz   d          z   j        dt           j                  S )Nr3   r   ra   r   r   )rY   r   r   ra   r   )lengths_shaper  r8  s    r8   segment_reduce_lengths_tensorz:meta_segment_reduce.<locals>.segment_reduce_lengths_tensor  sA    {DJtaxzz22*1	
 
 
 	
r:   r   r3   z<segment_reduce(): Either lengths or offsets must be defined.)NotImplementedErrorr   rc  )
r8  r  r  r   r  r  r  r  r  r  s
   `    `    r8   meta_segment_reducer    s     !M
 
 	

 
 
 
 
 
 ,,W];;; crc*gmB.?!.C-EE,,];;;
U
V
VVr:   c                 ,    |                      d          S Nr4   r   r   s    r8   meta_maxr         >>"r:   c                     t          j        | j        |f          }t          | ||          }|                     |          |                     |t
          j                  fS Nr   rS   reduction_dimsr   _compute_reduction_shaper   rY   r   r   r   keepdimr  s       r8   meta_max_dimr    W    

tzC6
2
2C+D#w??L|$$|5:66 r:   c                 ,    |                      d          S r  r  r   s    r8   meta_minr    r  r:   c                     t          j        | j        |f          }t          | ||          }|                     |          |                     |t
          j                  fS r  r  r  s       r8   meta_min_dimr    r  r:   c                     |                                  rt          | j                  }nt          | t          j                  \  }}t          j        | |          S NrN   r   )r   r   ra   r   r   INT_TO_FLOATrY   r   )r   rQ   rV   s      r8   
meta_angler    s^     
/
;;, ? L
 
 
< D5555r:   c                     t          j        ||                                 | j                   |                    t          j        |                     S rD   )rY   _resize_output_r   r   copy_angle)r   r   s     r8   meta_angle_outr    s=    	#tyy{{DK88899U[&&'''r:   c                     d S rD   r4   )vals    r8   assert_asyncr        
Fr:   c                     d S rD   r4   )r  
assert_msgs     r8   assert_async_metar    r  r:   c                     d S rD   r4   )ss    r8   
print_metar    r  r:   ra   r   r   r   r   c                 .    t          j        dd          S )Nr4   r   r@  r  r  s        r8   make_dep_tokenr    s     ;r&))))r:   c                     ddl m} t          | t          t          f          rt          d           || ||           d S )Nr   )constrain_range'Constraining SymFloat or Symbool is nyir   max)ra  r  rs   r
   r	   
ValueError)r   r   r  r  s       r8   sym_constrain_ranger    s[     FEEEEE$7+,, DBCCCODcs++++++r:   c                 @    t                               | ||           |S Nr  )r-   r  r   r   r  	dep_tokens       r8   functional_sym_constrain_ranger    s#    Ts444r:   c                 X   ddl m} ||t          j        | dk               d S t	          | t
          t          f          rt          d          t          |           t          u r6|t          j        | |k               |t          j        | |k               d S  || ||           d S )Nr   )_constrain_range_for_sizer  r  )
ra  r  rY   rj   rs   r
   r	   r  r|   r   )r   r   r  r  s       r8   sym_constrain_range_for_sizer     s     POOOOO
{s{TQY$7+,, DBCCCDzzS?L%%%?L%%%d555555r:   c                 @    t                               | ||           |S r  )r-   r  r  s       r8   'functional_sym_constrain_range_for_sizer    s#    %%d%===r:   c                     |S rD   r4   )r  r  r  s      r8   functional_assert_async_metar    s    r:   f_namec                                                       dk     r&t           d                                             t          j                             d                               d          k     fd           d S )Nr  z8: The input tensor must have at least 2 dimensions, got r   c                  f      d                     d           d                     d           dS )Nz5: A must be batches of square matrices, but they are r   by r   	 matricesr   )r  r   s   r8   rh   z#squareCheckInputs.<locals>.<lambda>(  sK    6 D D		"D D+/99R==D D D r:   )r   r   rY   rj   r   )r   r  s   ``r8   r   r      s    xxzzA~~[[txxzz[[
 
 	
 
L		"2&	D 	D 	D 	D 	D    r:   Anamec                     t          j         j        j        k     fd           t          j         j        j        k     fd           t          j                            d                              d          k    fd           t          j                            d                               d          k     fd           d S )Nc                  *    dj          d j          dS )Nz:Expected b and A to be on the same device, but found b on z
 and A on 	 instead.r@  r  r   s   r8   rh   z(linearSolveCheckInputs.<locals>.<lambda>3  s0    :{: :&'h: : : r:   c                  *    dj          d j          dS )Nz=Expected b and A to have the same dtype, but found b of type z and A of type r  r   r  s   r8   rh   z(linearSolveCheckInputs.<locals>.<lambda>;  s0    =z= =*+'= = = r:   r   r  c                  b    d                      d           d                      d           dS )Nz3A must be batches of square matrices, but they are r  r  r   r   r   r  s   r8   rh   z(linearSolveCheckInputs.<locals>.<lambda>C  sF    BFF2JJB B,-FF2JJB B B r:   c                      d d                      d           d                      d           d                     d           d                     d           
S )NzIncompatible matrix sizes for z: each A matrix is r   r  z but each b matrix is r  r   )r  r  r   s   r8   rh   z(linearSolveCheckInputs.<locals>.<lambda>K  s    HT H HH H)*H H%)YYr]]H H8<		"H H r:   )rY   rj   r   ra   r   )r   r  r  s   ```r8   linearSolveCheckInputsr  0  s   	Lqx	
 	
 	
 	
 	
   
L
ag	
 	
 	
 	
 	
   
L	r

affRjj 	
 	
 	
 	
   
L	r

diimm#	
 	
 	
 	
 	
 	
    r:   tallow_low_precision_dtypesc                 ,   | j         t          j        |                                 p|                                 fd           |sIt          j        t          j        t          j        t          j        t          j        fv fd           d S d S )Nc                       d  S )Nz<: Expected a floating point or complex tensor as input. Got r4   ra   r  s   r8   rh   z(checkFloatingOrComplex.<locals>.<lambda>\  s    6^^W\^^ r:   c                       d  S )Nz*: Low precision dtypes not supported. Got r4   r  s   r8   rh   z(checkFloatingOrComplex.<locals>.<lambda>a  s    vPPPP r:   )	ra   rY   rj   r  r   r]   r_   r\   r^   )r  r  r  ra   s    ` @r8   r   r   T  s    
 GE	L	/^^^^^   & 
ek5<u}MMPPPPP	
 	
 	
 	
 	

 
r:   arg_namec                 h    t          j        |                                 dk    fd           d S )Nr  c                       d  dS )Nz: The input tensor z! must have at least 2 dimensions.r4   )r  r  s   r8   rh   zcheckIsMatrix.<locals>.<lambda>i  s    6YYhYYY r:   )rY   rj   r   )r  r  r  s    ``r8   checkIsMatrixr  f  s@    	L	1YYYYY    r:   Br   c                 4    t                      t                     t          j        r,                     d                              d          k    n+                     d                              d          k     fd           d S )Nr  r   c                       drdnd d                      d           d                      d           d                     d           d                     d           d	S )
Nz2: Incompatible shapes of A and B for the equation zAX = BzXA = Bz (r  r=   r   r   r{   r   )r  r  r  r   s   r8   rh   z#checkInputsSolver.<locals>.<lambda>r  s     J J-xxXJ JJ JffRjjJ J/0vvbzzJ J<=FF2JJJ J J r:   )r   r  rY   rj   r   )r  r  r   r  s   ````r8   checkInputsSolverr  m  s    a   !V	L$(Fr

affRjj  affRjjAFF2JJ.F	
 	
 	
 	
 	
 	
 	
    r:   resultfn_nameresult_namec                 `     t          j        j        j        k     fd           d S )Nc            	      8      d d dj          dj          	S )Nz: Expected z5 and input tensors to be on the same device, but got z on z and input on r@  )r  r   r  r  s   r8   rh   z!checkSameDevice.<locals>.<lambda>  sV     L L; L LL L &L L=B\L L r:   )rY   rj   r   )r  r  r   r  s   ````r8   checkSameDevicer  z  sU     
L%	
 	
 	
 	
 	
 	
 	
    r:   UPLOc                                                        }t          j        t                     dk    o|dk    p|dk     fd           d S )Nr3   ULc                      d  S )Nz1Expected UPLO argument to be 'L' or 'U', but got r4   )r   s   r8   rh   zcheckUplo.<locals>.<lambda>  s    JDJJ r:   )upperrY   rj   r   )r   UPLO_uppercases   ` r8   	checkUplor'    s[    ZZ\\N	LD		QKNc1J^s5JJJJJ    r:   eigenvalueseigenvectorsr#  	compute_vc                    t          | d           t          |           t          | j                  }|r;|                     |          }|                    |t          |d                     n|                     dg          }|                                 |                     |t          | j	                            }||fS )Nzlinalg.eighF	row_majorr   r   )
r   r'  r   r   r   r   r   poprc   ra   )r  r   r*  r   vecsvalss         r8   meta__linalg_eighr1    s     a'''dOOOMME  {{5!! ;EU S S STTTT{{A3	IIKKK;;uOAG$<$<;==D:r:   c                     t          | d           t          j        | j                  r| j        nt          j        | j                  }|                     | j        d d         |          S )Nzlinalg.eigvalsr   r   )r   rS   r   ra   r   r   r   )r   complex_dtypes     r8   meta__linalg_eigvalsr4    sj     e-... !%+..	<.u{;; 
 ??5;ss+=?AAAr:   c                    t          | d           t          j        | j                  r| j        nt          j        | j                  }|                     | j        d d         |          }|                     | j        |          }t          |           dk    }|                    | j        t          | j        |                     ||fS )Nz
linalg.eigr   r   r   r,  )
r   rS   r   ra   r   r   r   r   r   r   )r   r3  r   vectorsis_cudas        r8   meta_linalg_eigr8    s     e\*** !%+..	<.u{;; 
 __U["-]_CCFooeko??G%  F*G0PPP   7?r:   rg  c                 t    | j                             t          j                                      dd          S )Nr   r  r   )mTr   rY   r   	transpose)rg  s    r8   cloneBatchedColumnMajorr<    s+    6<<e&=<>>HHRPPPr:   r%  c                      t          |           S rD   )r<  )r   r  r%  s      r8   _cholesky_solve_helperr>    s     #4(((r:   c                      t          j         j        dk     fd           t          j        j        dk    fd           t           d          \  }}t	          |||          S )Nr  c                      d j          dS )Nz-b should have at least 2 dimensions, but has  dimensions insteadr   r   s   r8   rh   z cholesky_solve.<locals>.<lambda>  s    ^	^^^ r:   c                      d j          dS )Nz-u should have at least 2 dimensions, but has rA  rB  r	  s   r8   rh   z cholesky_solve.<locals>.<lambda>  s    [[[[ r:   cholesky_solve)rY   rj   r   !_linalg_broadcast_batch_dims_namer>  )r   r  r%  self_broadcastedA_broadcasteds   ``   r8   rD  rD    s     
L	Q^^^^   
L	![[[[   'Ha!' '#m ""2M5IIIr:   c                     |                                  dk    r t          j        | t          j                  S t	          | d           t          |           S )Nr   r   cholesky)r   rY   r   legacy_contiguous_formatr   r<  r   r%  s     r8   rI  rI    sN     zz||qE4RSSSSdJ'''"4(((r:   c                 @    t          | d           t          |           S )Ncholesky_inverse)r   r<  rK  s     r8   rM  rM    s#     d.///"4(((r:   check_errorsc                 D   t          | d           t          | d           | j        }t          |          }t	          |d          }|                     |          }|                    ||           |                     |d|dz
           t          j                  }||fS )Nzlinalg.choleskyFr   r  r   )	r   r   r   r   r   r   r   rY   r  )r  r%  rN  A_shaper   	L_stridesr#  infoss           r8   linalg_cholesky_exrS    s    a*+++1/000gGw<<D ,GU;;I	GAMM'9%%% KKD1H-U[KAAEe8Or:   tauc                     t          j         j        dk    d            t          j                             d                               d          k    d            t          j                             d                              d          k    d            t          j         j        j        z
  dk     fd            j        dk    r: j        d d         }j        d d         t          j        |k    fd	           t          j        j         j        k     fd
           t          d d           t          j         j        t           j        d           j         j	                  S )Nr  c                      dS )NzHtorch.linalg.householder_product: input must have at least 2 dimensions.r4   r4   r:   r8   rh   z,linalg_householder_product.<locals>.<lambda>      Z r:   r  r   c                      dS )Nzbtorch.linalg.householder_product: input.shape[-2] must be greater than or equal to input.shape[-1]r4   r4   r:   r8   rh   z,linalg_householder_product.<locals>.<lambda>  s    t r:   c                      dS )Nz`torch.linalg.householder_product: input.shape[-1] must be greater than or equal to tau.shape[-1]r4   r4   r:   r8   rh   z,linalg_householder_product.<locals>.<lambda>  s    r r:   r3   c                  (    dj          d j          S )Nzptorch.linalg.householder_product: Expected tau to have one dimension less than input, but got tau.ndim equal to  and input.ndim is equal to rB  r   rT  s   r8   rh   z,linalg_householder_product.<locals>.<lambda>  0    \),\ \OTz\ \ r:   c                      d  S )Nzltorch.linalg.householder_product: Expected batch dimensions of tau to be equal to input.shape[:-2], but got r4   actual_batch_tau_shapes   r8   rh   z,linalg_householder_product.<locals>.<lambda>      O6LO O r:   c                  (    dj          d j          S )Nz,torch.linalg.householder_product: tau dtype z does not match input dtype r   r\  s   r8   rh   z,linalg_householder_product.<locals>.<lambda>&  s*    939 9 9+0;9 9 r:   z torch.linalg.householder_productrT  Fr,  r   r   ra   r   )
rY   rj   r   r   r   ra   r  empty_stridedr   r   )r   rT  expected_batch_tau_shaper`  s   `` @r8   linalg_householder_productrf     s   
 
L
aZZ   
L

2%**R..(tt   
L

2#((2,,&rr  
 
L
SX"	
 	
 	
 	
 	
   zA~~#(;ss#3 !$3B3"&>>   	
 	
 	
 
L	U[ 	
 	
 	
 	
 	
   6UEJJJ[*5;%HHHk|	   r:   c                 <   t          | d           t          | dd           |                     | j                  }|                    | j        t          | j        d                     |                     | j        d d         t          j                  }||fS )Nzlinalg.inv_exF)r  r,  r  r   r   r   r   r   r   r   rY   r  )r  rN  r#  rR  s       r8   linalg_inv_ex_metari  6  s    a)))1o%PPPP	AGAMM!'6qw%PPPQQQKKEKK88Ee8Or:   LDpivotsinfo)	hermitianrN  rm  c                z   t          | d           t          | d           t          j        | j        t          | j        d          | j        | j                  }|                     | j        d d         t          j	                  }|                     | j        d d         t          j	                  }|||fS )Nztorch.linalg.ldl_factor_exFr,  rc  r   r   r  )
r   r   rY   rd  r   r   ra   r   r   r   )r   rm  rN  rj  rk  rl  s         r8   linalg_ldl_factor_ex_metaro  B  s     d89994!=>>>		Z*4:GGGj{	
 
 
B ^^DJssO59^==F>>$*SbS/>;;Dvtr:   )rm  c                @    t           d           t           d           t           d           t          j        j        dk    fd            j        d d         }t          j        |j        k    fd           t          j        t          j        j	                  fd           t          j         j	        j	        k     fd           t                     \  }}t          j        |t          |d	          j	        j        
          S )Nztorch.linalg.ldl_solver  c                      d j          dS )NzMtorch.linalg.ldl_solve: Expected B to have at least 2 dimensions, but it has rA  rB  )r  s   r8   rh   z'linalg_ldl_solve_meta.<locals>.<lambda>e  !    6&6 6 6 r:   r   c                      d j          dS )Nzjtorch.linalg.ldl_solve: Expected LD.shape[:-1] and pivots.shape to be the same, but got pivots with shape  insteadr-  rk  s   r8   rh   z'linalg_ldl_solve_meta.<locals>.<lambda>m  %    @)/@ @ @ r:   c                      d j          S )Nz<torch.linalg.ldl_solve: Expected pivots to be integers. Got r   ru  s   r8   rh   z'linalg_ldl_solve_meta.<locals>.<lambda>t  s    ]v|]] r:   c                  (    dj          d j          S )Nz!torch.linalg.ldl_solve: LD dtype z does not match b dtype r   )r  rj  s   r8   rh   z'linalg_ldl_solve_meta.<locals>.<lambda>x  s    _BH__VWV]__ r:   Fr,  rc  )r   r   r  rY   rj   r   r   rS   is_integer_dtypera   _linalg_broadcast_batch_dimsrd  r   r   )rj  rk  r  rm  expected_pivots_shapeB_broadcast_sizerV   s   ```    r8   linalg_ldl_solve_metar}  W  sc    b2333278881b":;;;	L	!	
 	
 	
 	
   HSbSM	L-	
 	
 	
 	
   
Lv|,,]]]]   
L
AG_____   7q"==a*+;uMMMgx	   r:   Pr"  )pivotr  c                    t          j         j        dk     fd           t           j                  }|d         }|d         }t          ||          }||d<   |r                     |          }n                     dg          }||d<                        |          }||d<   ||d<                        |          }|||fS )Nr  c                      d j          dS )Nz@linalg.lu: Expected tensor with 2 or more dimensions. Got size: rt  r-  r	  s   r8   rh   z linalg_lu_meta.<locals>.<lambda>  s    dSTSZddd r:   r  r   r   )rY   rj   r   r   r   r   r   )	r  r  sizesr  r  kr~  r#  r"  s	   `        r8   linalg_lu_metar    s     
L	!dddd  
 MMEb	Ab	A1AE"I KKKKE"I	EAE"IE"I	EAa7Nr:   LU)r  rN  c                    t          j         j        dk     fd           t           j                  }|d         }|d         }t          j        |t          |d           j         j                  }|	                                 t          ||          |d<                        |t           j                  }|	                                                      |t           j                  }|||fS )	Nr  c                      d j          dS )NzFtorch.lu_factor: Expected tensor with 2 or more dimensions. Got size: rt  r-  r	  s   r8   rh   z*linalg_lu_factor_ex_meta.<locals>.<lambda>  s    jYZY`jjj r:   r  r   Fr,  rc  r   )rY   rj   r   r   r   rd  r   ra   r   r.  r   r   r   )	r  r  rN  r  r  r  r  rk  rl  s	   `        r8   linalg_lu_factor_ex_metar    s     
L	!jjjj  
 MMEb	Ab	A		*5EBBBgx	
 
 
B 
IIKKK1E"I[[ei[00F 
IIKKK;;uEI;..Dvtr:   )r   adjointr  c                    t           d           t          j         j        j        k     fd           t          j        j        t          j        k    d            t           d           t           |d           t          j                             d                              d          k    d            t          j         j        d d         j        k    fd           t                     \  }}t          j
        |t          ||           j        j        	          }|                                d
k    r*|s(|                                r|                                }|S )Nztorch.linalg.lu_solvec                  *    dj          d j          dS )NzPlinalg.lu_solve: Expected LU and B to have the same dtype, but found LU of type  and B of type rt  r   )r  r  s   r8   rh   z&linalg_lu_solve_meta.<locals>.<lambda>  s6    O$&HO O=>WO O O r:   c                      dS )NzElinalg.lu_solve: pivots should be a Tensor of scalar type torch.int32r4   r4   r:   r8   rh   z&linalg_lu_solve_meta.<locals>.<lambda>  s    W r:   zlinalg.lu_solver   c                      dS )NzYlinalg.lu_solve: Number of pivots per batch should be same as the dimension of the matrixr4   r4   r:   r8   rh   z&linalg_lu_solve_meta.<locals>.<lambda>  s    k r:   c                      d j          dS )Nzclinalg.lu_solve: Expected LU.shape[:-1] and pivots.shape to be the same, but got pivots with shape rt  r-  ru  s   r8   rh   z&linalg_lu_solve_meta.<locals>.<lambda>  rv  r:   r,  rc  r   )r   rY   rj   ra   r   r   r  r   r   rz  rd  r   r   r   r   conj)r  rk  r  r   r  r|  rV   r  s   ```     r8   linalg_lu_solve_metar    s    26777	L
AG	
 	
 	
 	
 	
   
L	!WW   b1222b!T#4555	L
v{{2&kk   
L
"%	
 	
 	
 	
   7q"==a *+;4xPPPgx	  F ||~~4 	#[[]]FMr:   unpack_dataunpack_pivotsc                 D    t          j         j        dk     fd           |r)t          j        |j        t           j        k    d            t           j                  }|d         }|d         }t          ||          }||d<   |r                     |          }n                     dg          }|r:||d<                        |          }	||d<   ||d<                        |          }
n,                     dg          }	                     dg          }
||	|
fS )Nr  c                      d j          dS )NzFtorch.lu_unpack: Expected tensor with 2 or more dimensions. Got size: rt  r-  )r  s   r8   rh   z lu_unpack_meta.<locals>.<lambda>  s    kY[Yakkk r:   c                      dS )Nztorch.lu_unpack: LU_pivots is expected to be a contiguous tensor of torch.int32 dtype.
Note: this function is intended to be used with the output produced by torch.linalg.lu_factorr4   r4   r:   r8   rh   z lu_unpack_meta.<locals>.<lambda>  s    p r:   r  r   r   )	rY   rj   r   ra   r  r   r   r   r   )r  rk  r  r  r  r  r  r  r~  r#  r"  s   `          r8   lu_unpack_metar    s?    
L
1kkkk    
LEK' 	
 	
 	
 NNEb	Ab	AAq		AE"I LLLL! b	LLb	b	LLLL!LL!a7Nr:   modec                 ~      dk    rd}d}n. dk    rd}d}n# dk    rd}d}nt          j        d fd           ||fS )NreducedTcompleteFrc                      d  dS )Nzqr received unrecognized mode 'z=' but expected one of 'reduced' (default), 'r', or 'complete'r4   )r  s   r8   rh   z _parse_qr_mode.<locals>.<lambda>5  s"    O$ O O O r:   rY   rj   )r  	compute_qr  s   `  r8   _parse_qr_moder  (  s~    y							   	
 	
 	
 gr:   QRr  c                 T   t          | d           t          | d           t          |          \  }}| j        d         }| j        d         }t	          ||          }|rXt          | j                  }|r|n||d<   |                     |          }|                    |t          |d                     n|                     dg          }t          | j                  }	|s|s|n||	d<   |                     |	          }
|
                    |	t          |	d                     ||
fS )Nz	linalg.qrr  r   Fr,  r   )	r  r   r  r   r   r   r   r   r   )r  r  r  reduced_moder  r  r  Q_shaper  R_shaper  s              r8   linalg_qr_metar  =  s+    ![!!!1k***,T22I|	A	AAq		A qw--'.aaQKK  	g:7eTTTUUUUKK 17mmG#;9;!!!GBK	GAMM'6w%PPPQQQa4Kr:   sign	logabsdetc                    t          | d           t          | dd           | j        }|                     |d d                   }|                     |d d         t	          | j                            }t          j        |t          |d          | j        | j	                  }|                     |d d         t          j
                  }||||fS )Nzlinalg.slogdetFr  r   rc  r   )r   r   r   r   rc   ra   rY   rd  r   r   r  )r  r   r  r  r  rk  s         r8   _linalg_slogdetr  Y  s     a)***1.666GE;;uSbSz""DE#2#Joag.F.FGGI		*5%88gx	
 
 
B [[ss5;[77FB&&r:   full_matrices
compute_uvdriverc                    t          | d           t          | d           t          | j        d d                   }| j        d         }| j        d         }t	          j        ||          }|r|||r|n|gz   }|                     |          }	|	                    |t          |d                     ||r|n||gz   }
|                     |
          }t          |           dk    }|                    |
t          |
|                     n,|                     dg          }	|                     dg          }|                     ||gz   t          | j                            }|	||fS )	Nz
linalg.svdr  r   Fr,  r   r   r   )r  r   r   r   rY   r   r   r   r   r   rc   ra   )r  r  r  r  r   r  r  r  U_shaper"  V_shapeVr7  Ss                 r8   _linalg_svd_metar  m  sp    !\"""1l+++agcrcl##J	A	AaA #<111==KK  	g:7eTTTUUU] 91==KK  
 a..F*	g:7gVVVWWWW KKKK 	
J!$OAG,D,DEEAa7Nr:   arg1arg2c                 V   | j         d d         }|j         d d         }t          ||          }t          |          }||                     d          |                     d          gz  }t          |          }||                    d          |                    d          gz  }||fS )Nr  r   )r   r%   r   r   )r  r  arg1_batch_sizesarg2_batch_sizesexpand_batch_portionarg1_expand_sizearg2_expand_sizes          r8   rz  rz    s    
 z#2#z#2#,-=?OPP0112		"660112		"66---r:   c                     |rt          | ||           t          | |          \  }}|| j        k    r| n|                     |          }||j        k    r|n|                    |          }||fS rD   )r  rz  r   rf  )r  r  r  r  r  arg1_broadcastedarg2_broadcasteds          r8   rE  rE    s      1tT4000)EdD)Q)Q&& !DJ..DKK@P4Q4Q  !DJ..DKK@P4Q4Q  ---r:   r   c                 v    | j         d d         }|j        dk    p| j        dz
  |j        k    o
|j         |k    }|S )Nr   r3   )r   r   )r   r   expected_batched_rhs_shapevector_cases       r8   linalg_solve_is_vector_rhsr    sK    !&SbS!1*/ 
Q%*$R8R)R  r:   )r   rN  r  r  rk  rl  c                    t           d           t          j         j        j        k     fd           t	                     }|r                    d          n}	t           |	|d           t          |	           \  }
}t          j        |p| d            |r
|
d d         n|
}t          j        |t          ||           j        j
                  } j        }t          j        |t          |d           j         j
                  }                     |d d         t          j                  }                     |d d         t          j                  }||||f}||||f}t          d	 |D                       rjt          ||          D ]Y\  }}t!          ||j                   |                    |j        |                                           t'          ||d
           Z|S )Nzlinalg.solvec                  *    d j          dj          dS )NzKlinalg.solve: Expected A and B to have the same dtype, but found A of type r  rt  r   )r  r  s   r8   rh   z"_linalg_solve_ex.<locals>.<lambda>  s0    9w9 9'(w9 9 9 r:   r   c                      dS )Nzlinalg.solve: Vector broadcasting of the left hand side is not supported for left=False. In this case linalg.solve is equivalent to B / A.squeeze(-1)r4   r4   r:   r8   rh   z"_linalg_solve_ex.<locals>.<lambda>  s    K r:   rc  Fr   r  c              3      K   | ]}|d uV  	d S rD   r4   rP   r=   s     r8   rv   z#_linalg_solve_ex.<locals>.<genexpr>  s&      
&
&Q1D=
&
&
&
&
&
&r:   )	copy_fromcopy_toexact_dtype)r   rY   rj   ra   r  	unsqueezer  rz  rd  r   r   r   r   r  rK  rL  r!   r   r   r#   )r  r  r   rN  r  r  rk  rl  r  B_B_broad_shaperV   result_shaperesult_r   LU_pivots_info_r   resr  os   ``                    r8   _linalg_solve_exr    s5    1n---	L	17	
 	
 	
 	
 	
   -Q22K'	.RQBaT>2223B::M1	LK	
 	
   *5G="%%-L!*<TBBgx	  G GE

*5%88gx	  C kk%*EKk88GKKcrc
%+K66E2vt
$CC%
(C

&
&#
&
&
&&& FSMM 	F 	FDAqa)))MM!'188::...QuEEEEEJr:   )r   unitriangularr   r  r   c                D   ||                      dg          }t          |t                    st          dt	          |                     t          | ||d           t          || d           \  }}|                    dd                                          o|	                                }|rt          ||j                  }nYt          ||j                  rD|                    |                    dd          j                   |                    dd           |S )Nr   zout must be TensorLike, got zlinalg.solve_triangularr  r   )r   rs   r   r   r|   r  rE  r;  r  is_conjr!   r   r"   r   
transpose_)	r  r  r%  r   r  r   r  A_avoid_copy_As	            r8   linalg_solve_triangular_metar    s    {kk1#c:&& IGDIIGGHHHaD";<<<.q!T::FB<<B''5577HBJJLLL #RX..  RX.. 	#KKR,,2333NN2r"""Jr:   XM)r  r;  c                     t          j         j        dk     fd           t          j        j        dk    fd           t           d           j        t           j        k    rvt                     \  }}t          j        |t          |d           j	         j
                  }t          j        |t          |d          j	        j
                  }nkj        t           j        k    sj        t           j        k    r+t          j                   }                     dg          }nt          j        dd	            ||fS )
Nr  c                      d j          dS )NzMtorch.triangular_solve: Expected b to have at least 2 dimensions, but it has rA  rB  r   s   r8   rh   z'triangular_solve_meta.<locals>.<lambda>!  s!    9)9 9 9 r:   c                      d j          dS )NzMtorch.triangular_solve: Expected A to have at least 2 dimensions, but it has rA  rB  r	  s   r8   rh   z'triangular_solve_meta.<locals>.<lambda>(  rr  r:   triangular_solveFr,  rc  r   c                      dS )Nz+triangular_solve: Got an unexpected layout.r4   r4   r:   r8   rh   z'triangular_solve_meta.<locals>.<lambda>B  s    $Q r:   )rY   rj   r   r  r   stridedrz  rd  r   ra   r   
sparse_csr
sparse_bsrr   r   )	r   r  r%  r;  r  self_broadcast_sizeA_broadcast_sizesolutioncloned_coefficients	   ``       r8   triangular_solve_metar    s    
L	Q	
 	
 	
 	
   
L	!	
 	
 	
 	
   4$6777x5=  0LTST0U0U--&$./BeTTT*;	
 
 
 #0!./?5QQQ'8	
 
 
 
U%	%	%U5E)E)E#D))!^^QC00UQQRRR'''r:   c                 ~   t          | d           t          | d           |                     | j        d d                   }|                     | j                  }|                    | j        t          | j        d                     |                     | j        d d         t          j                  }|||fS )Nz
linalg.detr  Fr,  r   r   rh  )r  detr  rk  s       r8   _linalg_det_metar  G  s    a&&&1l+++
++agcrcl
#
#C	
QW		BNN1775QQQRRR[["U[[99FF?r:   c                     t          j         j        dk    d            t          j        j        dk    d            |rdndt          j        j                 j        d         k    fd           t          j        j                  j        d         k    fd           t          j        j        d          j        d         k    d            t          j         j        j        z
  d	k     fd
           t          j         j        j        k     fd            j        dk    re j        d d         }j        d d         t          j        |k    fd           j        d d         t          j        |k    fd           t          j        j         j        k     fd           t          j        j         j        k     fd           t          d d           t          d d           t          j        j        t          j        d          j        j                  S )Nr  c                      dS )Nz3torch.ormqr: input must have at least 2 dimensions.r4   r4   r:   r8   rh   zormqr.<locals>.<lambda>_      !V r:   c                      dS )Nz3torch.ormqr: other must have at least 2 dimensions.r4   r4   r:   r8   rh   zormqr.<locals>.<lambda>b  r  r:   r  r   c                      d  dS )Ntorch.ormqr: other.shape[z0] must be greater than or equal to tau.shape[-1]r4   left_size_conditions   r8   rh   zormqr.<locals>.<lambda>h  s    q,?qqq r:   c                      d  dS )Nr  z"] must be equal to input.shape[-2]r4   r  s   r8   rh   zormqr.<locals>.<lambda>l  s    c,?ccc r:   c                      dS )NzHtorch.ormqr: tau.shape[-1] must be less than or equal to input.shape[-1]r4   r4   r:   r8   rh   zormqr.<locals>.<lambda>q  rW  r:   r3   c                  (    dj          d j          S )Nz[torch.ormqr: Expected tau to have one dimension less than input, but got tau.ndim equal to r[  rB  r\  s   r8   rh   zormqr.<locals>.<lambda>v  r]  r:   c                  (    dj          d j          S )Nzhtorch.ormqr: Expected other to have the same number of dimensions as input, but got other.ndim equal to r[  rB  r   r   s   r8   rh   zormqr.<locals>.<lambda>}  s1    `+0:` `SXS]` ` r:   c                      d  S )NzWtorch.ormqr: Expected batch dimensions of tau to be equal to input.shape[:-2], but got r4   r_  s   r8   rh   zormqr.<locals>.<lambda>  ra  r:   c                      d  S )NzYtorch.ormqr: Expected batch dimensions of other to be equal to input.shape[:-2], but got r4   )actual_batch_other_shapes   r8   rh   zormqr.<locals>.<lambda>  s    Q6NQ Q r:   c                  (    d j          dj          S )NzPtorch.ormqr: Expected input and tau to have the same dtype, but input has dtype z and tau has dtype r   r\  s   r8   rh   zormqr.<locals>.<lambda>  s0    O#(;O OCF9O O r:   c                  (    d j          dj          S )NzRtorch.ormqr: Expected input and other to have the same dtype, but input has dtype z and other has dtype r   r  s   r8   rh   zormqr.<locals>.<lambda>  s0    S#(;S SEJ[S S r:   ztorch.ormqrrT  r   Fr,  rc  )	rY   rj   r   r   ra   r  rd  r   r   )	r   rT  r   r   r;  expected_batch_shaper  r`  r  s	   ```   @@@r8   ormqrr  U  s    
L
aVV   
L
aVV   !%,"""	L'(CIbM9qqqq   
L'(EKO;cccc  
 
L	"R(ZZ  
 
L
SX"	
 	
 	
 	
 	
   
L
ej 	
 	
 	
 	
 	
   zA~~${3B3/!$3B3"&::   	
 	
 	
 $);ss#3 $(<<   	
 	
 	
 
L	U[ 	
 	
 	
 	
 	
   
Lu{"	
 	
 	
 	
 	
   M3u555M5%999[*5;%HHHk|	   r:   c                    t          j        t                    dz  k    fd            j        }|dz   k    }|}| }|r/t	          d|          D ]}|o                     |          dk    }n-t	          |          D ]}|o                     |          dk    }t          j        |p| fd           d S )Nr  c                  4    dd z   dt                     S )Nzpadding size is expected to be r  z, but got: r   )r   paddings   r8   rh   z,_padding_check_valid_input.<locals>.<lambda>  s"    T!c'TTc'llTT r:   r3   r   c                  0    d dz    d dz    dj          S )N	Expected r3   zD or r  zcD (batch mode) tensor with possibly 0 batch size and other non-zero dimensions for input, but got: r-  )r   r   s   r8   rh   z,_padding_check_valid_input.<locals>.<lambda>  sC    Oa O OcAg O OAFO O r:   )rY   rj   r   r   r   r   )r   r  r   	input_dimis_batch_modevalid_batch_modevalid_non_batch_moder   s   ```     r8   _padding_check_valid_inputr    s!   	LGCTTTTT  
 
I#'*M$,, Oq)$$ 	G 	GA/FEJJqMMQ4F	G y!! 	O 	OA#7#NEJJqMMQ<N   
L00	
 	
 	
 	
 	
    r:   c                   	
 d}dd} j         dk    r                     d          }dz  |dz  }t           |d           |\  	
                     |          }                               	z   
z   |r%t          j        	k     o
k      	
fd           t          j        dk    fd            j         dk    r                     |f          S                      ||f          S )Nr   r3   r0   r   c                  *    d d d  dj          S NzcArgument #4: Padding size should be less than the corresponding input dimension, but got: padding (rz   ) at dimension 
 of input r-  dim_wr   pad_lpad_rs   r8   rh   z_pad1d_common.<locals>.<lambda>  M    c%*c c.3c cDIc cUZU`c c r:   c                      d  d S )Nz
input (W: z%) is too small. Calculated output W: r4   )input_woutput_ws   r8   rh   z_pad1d_common.<locals>.<lambda>  s    UWUU8UU r:   r  )r   r   r  rY   rj   r   )r   r  is_reflection	dim_planenbatchnplaner  r  r  r  r  s   `     @@@@@r8   _pad1d_commonr     sI   IEFzQA
Q	ug15555LE5ZZ	""FjjG&H 
GO/      	
 	
 	
 
LAUUUUU  
 zQ12229:::r:   c                 &    t          | |d          S NTr  )r   r   r  s     r8   meta_reflection_pad1dr%         t<<<<r:   c                 ~     t          j         j        t           j        k     fd           t	           |d          S )Nc                  >    d j                                          dS )Nz)"replication_pad1d" not implemented for ''ra   __str__r   s   r8   rh   z(meta_replication_pad1d.<locals>.<lambda>  !    Xek>Q>Q>S>SXXX r:   Fr#  )rY   rj   ra   boolr   r$  s   ` r8   meta_replication_pad1dr/    H     
Luz!XXXX   u====r:   c                    d|s't          j        t          |          dk    d            j        dk    rdz  |\                                }|z   z   |r%t          j        |k     o|k     fd           t          j                                       k     fd                               j                  S )Nr3   r  c                      dS )Nz padding size is expected to be 2r4   r4   r:   r8   rh   z(_pad1d_backward_common.<locals>.<lambda>	  s    0R r:   r0   c                  *    d d d  dj          S r  r-  r  s   r8   rh   z(_pad1d_backward_common.<locals>.<lambda>  r  r:   c                  :    d d                                 S Nz(grad_output width unexpected. Expected: , Got: r   r  grad_outputr  s   r8   rh   z(_pad1d_backward_common.<locals>.<lambda>  '    e8eeKL\L\]bLcLcee r:   )rY   rj   r   r   r   r   r   )	r8  r   r  r  r  r  r  r  r  s	   ``   @@@@r8   _pad1d_backward_commonr:    s   E TS\\Q&(R(RSSSzQ
LE5jjG&H 
GO/      	
 	
 	
 
LK$$U+++eeeeee  
 ??5;'''r:   
grad_inputc                 (    t          | ||d          S r"  r:  r8  r   r  s      r8   meta_reflection_pad1d_backwardr?  $  s     "+ugTRRRRr:   c                 (    t          | ||d          S )NFr#  r=  r>  s      r8   meta_replication_pad1d_backwardrA  *  s     "+ugUSSSSr:   c                   	
 ddd}d}t           |d            j        }|dk    r$                     d          }dz  dz  |dz  }|\                       |          }                               	                               
	z   z   
z   z   |rJt          j        
k     o
k      fd           t          j        	k     o	k      fd           t          j        dk    pdk    	
fd            j        d	k    r                     |f          S                      ||f          S )
Nr  r3   r   r      c                  *    d d d  dj          S r  r-  r  s   r8   rh   z_pad2d_common.<locals>.<lambda>J  r  r:   c                  *    d d d  dj          S NzcArgument #6: Padding size should be less than the corresponding input dimension, but got: padding (rz   r  r  r-  dim_hr   pad_bpad_ts   r8   rh   z_pad2d_common.<locals>.<lambda>Q  r  r:   c                       d  d d d S )Nz
input (H:  W: z%) is too small. Calculated output H: r4   )input_hr  output_hr  s   r8   rh   z_pad2d_common.<locals>.<lambda>Y  s?    = = =g = =$,= =2:= = r:   r0   r  r   r   rY   rj   r   )r   r  r  
dim_slicesr  r   r  rH  r  rM  r  rN  r  rI  r  r  rJ  s   `      @@@@@@@@@@r8   _pad2d_commonrQ  0  s   EEJFug15555:DqyyA

a
!(E5%ZZ
##FjjGjjG&H&H 
GO/      	
 	
 	
 	GO/      	
 	
 	
 
LA&Q	
 	
 	
 	
 	
 	
 	
   zQ(;<<<(CDDDr:   c                 &    t          | |d          S r"  )rQ  r$  s     r8   meta_reflection_pad2drS  e  r&  r:   c                 ~     t          j         j        t           j        k     fd           t	           |d          S )Nc                  >    d j                                          dS )Nz)"replication_pad2d" not implemented for 'r)  r*  r,  s   r8   rh   z(meta_replication_pad2d.<locals>.<lambda>p  r-  r:   Fr#  )rY   rj   ra   r.  rQ  r$  s   ` r8   meta_replication_pad2drV  k  r0  r:   c                 Z    t          j        |          }t          j        |          }||fS rD   rY   r   )grad_wsaved_vsaved_gsaved_normsr   grad_vgrad_gs          r8   meta_weight_norm_backwardr_  u  s.     g&&Fg&&F6>r:   c                     ddd}|j         }|                                dk    rdz  dz  |dz  }|\  }}}}|         }	|         }
|	|z   |z   |
|z   |z   t          j                                       k     fd           t          j                                       k     fd           |                    |j                   S )Nr  r3   r   rC  c                  :    d d                                 S r5  r   r7  s   r8   rh   z%meta_pad2d_backward.<locals>.<lambda>  r9  r:   c                  :    d d                                 S Nz)grad_output height unexpected. Expected: r6  r   rH  r8  rN  s   r8   rh   z%meta_pad2d_backward.<locals>.<lambda>  '    fHff[M]M]^cMdMdff r:   )r   r   rY   rj   r   r   )r8  r   r  r  rg   r  r  rJ  rI  rM  r  rH  r  rN  r  s   `          @@@@r8   meta_pad2d_backwardrf  ~  s    EEIJxxzzQ

Q	!(E5%GG&H&H	LK$$U+++eeeeee   
LK$$U+++ffffff   >>$*%%%r:   c          	      R   	
 d	ddd}t           |d            j        dk    }|r)                     d          }	dz  	dz  dz  |dz  }|\                       |          }                               
                                                    	          
z   z   z   z   z   z   |rot          j        k     ok     	 fd           t          j        k     ok      fd           t          j        
k     o
k      fd	           t          j        dk    pdk    pdk    
fd
           |r                     ||f          S                      |f          S )Nr0   r  r3   r   r      c                  *    d d d  dj          S r  r-  r  s   r8   rh   z_pad3d_common.<locals>.<lambda>  r  r:   c                  *    d d d  dj          S rF  r-  rG  s   r8   rh   z_pad3d_common.<locals>.<lambda>  r  r:   c                  *    d d d  dj          S )NzcArgument #8: Padding size should be less than the corresponding input dimension, but got: padding (rz   r  r  r-  )dim_dr   pad_bkpad_fs   r8   rh   z_pad3d_common.<locals>.<lambda>  sM    d%*d d.4d dEJd dV[Vad d r:   c                  ,    d  d d d d d S )Nz
input (D:  H: rL  z%) is too small. Calculated output D: r4   )input_drM  r  output_drN  r  s   r8   rh   z_pad3d_common.<locals>.<lambda>  se    K K Kg K K7 K K$,K K2:K K@HK K r:   rO  )r   r  r  r  
batch_moder  r  rl  rH  r  rq  rM  r  rr  rN  r  rI  rm  rn  r  r  rJ  s   `      @@@@@@@@@@@@@@@r8   _pad3d_commonrt    s   EEEIug15555qJ A


Q	07-E5%vZZ	""FjjGjjGjjG'H&H&H 
GO/      	
 	
 	
 	GO/      	
 	
 	
 	GO0 0      	
 	
 	
 
LA7Q7(a-	
 	
 	
 	
 	
 	
 	
 	
 	
    G(HMNNN(HEFFFr:   c                 &    t          | |d          S r"  )rt  r$  s     r8   meta_reflection_pad3drv    r&  r:   c                 ~     t          j         j        t           j        k     fd           t	           |d          S )Nc                  >    d j                                          dS )Nz)"replication_pad3d" not implemented for 'r)  r*  r,  s   r8   rh   z(meta_replication_pad3d.<locals>.<lambda>  r-  r:   Fr#  )rY   rj   ra   r.  rt  r$  s   ` r8   meta_replication_pad3dry    r0  r:   c                 Z    t          j        t          |          dk    d            |j        dk    rt	          d|j                    j        |j        k    rt	          d j         d|j                   ddd|j        d	k    rdz  dz  dz  |\  }}}}}}|                              }	|                              }
|                              }|	|z   |z   |
|z   |z   ||z   |z   t          j                                       k     fd
           t          j                                       k     fd           t          j                                       k     fd           |                    |j                  S )N   c                      dS )Nz padding size is expected to be 6r4   r4   r:   r8   rh   z%meta_pad3d_backward.<locals>.<lambda>  s    ,N r:   r0   zinput.ndim must be > 3, got z,grad_output.ndim must equal input.ndim, got  != r  r3   rh  c                  :    d d                                 S r5  r   r7  s   r8   rh   z%meta_pad3d_backward.<locals>.<lambda>	  r9  r:   c                  :    d d                                 S rc  r   rd  s   r8   rh   z%meta_pad3d_backward.<locals>.<lambda>	  re  r:   c                  :    d d                                 S )Nz(grad_output depth unexpected. Expected: r6  r   )rl  r8  rr  s   r8   rh   z%meta_pad3d_backward.<locals>.<lambda>!	  r9  r:   )rY   rj   r   r   r   r   r   r   )r8  r   r  r  r  rJ  rI  rn  rm  rq  rM  r  rl  rH  r  rr  rN  r  s   `           @@@@@@r8   meta_pad3d_backwardr    s     
LW"$N$NOOOzQHEJHHIII5:%%];;K]]QVQ[]]
 
 	
 EEEzQ


07-E5%vjjGjjGjjG'H&H&H	LK$$U+++eeeeee   
LK$$U+++ffffff   
LK$$U+++eeeeee  
 ??5;'''r:   r  pc                 j   t          j        |                                 d            |                     d          }|dk    r4|                     dg                              t           j                  S |                     ||dz
  z  dz  f                              t           j                  S )Nc                      dS )Nz(_pdist_forward requires contiguous inputr4   r4   r:   r8   rh   z%meta__pdist_forward.<locals>.<lambda>+	  s    &P r:   r   r3   r   r  )rY   rj   r  r   r   rd  rJ  )r   r  r  s      r8   meta__pdist_forwardr  '	  s     
LPP   			!AAvv~~qc""%%E4R%SSS~~qAE{a/122558 6 
 
 	
r:   gradpdistc                     t          j        |                                d            t          j        |                                d            t          j        |t           j                  S )Nc                      dS )Nz._pdist_backward requires self to be contiguousr4   r4   r:   r8   rh   z&meta__pdist_backward.<locals>.<lambda>:	  s    &V r:   c                      dS )Nz/_pdist_backward requires pdist to be contiguousr4   r4   r:   r8   rh   z&meta__pdist_backward.<locals>.<lambda>=	  s    'X r:   r   )rY   rj   r  r   rJ  )r  r   r  r  s       r8   meta__pdist_backwardr  6	  ss     
LVV   
LXX   D0NOOOOr:   )r  r  c          
      J    ddl m}m}                     d          }                    d          }                    d          }	 |t	          j         | j        |||	f                              r                     |||	f           t	          j        	                                dk    d            t	          j        	                                dk    d            t          j        s7t	          j         j        j        cxk    o
j        k    nc  fd           j        }
j        |
d         |
d         t	          j        d         k    od         k    fd	                                                                           S )
Nr   )guard_or_truesym_eqr3   r  r0   c                      dS Nzbatch1 must be a 3D tensorr4   r4   r:   r8   rh   zmeta_baddbmm.<locals>.<lambda>L	      ,H r:   c                      dS Nzbatch2 must be a 3D tensorr4   r4   r:   r8   rh   zmeta_baddbmm.<locals>.<lambda>M	  r  r:   c                  8    dj          d j          dj          S )Nz+Input dtypes must be the same, got: input: z
, batch1: z
, batch2: r   )batch1batch2r   s   r8   rh   zmeta_baddbmm.<locals>.<lambda>Q	  s*    ~$*~~X^Xd~~pvp|~~ r:   c            	      :    d d d d          d d          d	S Nz@Expected size for first two dimensions of batch2 tensor to be: [rz   z] but got: [r   r3   ].r4   batch2_sizesbscontraction_sizes   r8   rh   zmeta_baddbmm.<locals>.<lambda>Y	  sY    [[ [&[ [4@O[ [GSTU[ [ [ r:   )ra  r  r  r   rY   sym_notr   rf  rj   r   
exp_config&skip_dtype_check_in_meta_registrationsra   r   )r   r  r  r  r  r  r  dim1dim2dim3batch1_sizesr  r  r  s   ```        @@@r8   meta_baddbmmr  B	  s    LKKKKKKK;;q>>D;;q>>D;;q>>D}U]66$*tT46H#I#IJJKK /{{D$-..	L"$H$HIII	L"$H$HIII< 
J&,6666&,6666~~~~~~	
 	
 	
 <L<L	aB#A	LQ2E,q/5E"E	
 	
 	
 	
 	
 	
   >>$))++&&&r:   c                B    t          j        | t           j                  S r   r  r   r  s     r8   meta_bernoullir  a	  s     D0GHHHHr:         ?c                     | S rD   r4   r   r  r  s      r8   meta_bernoulli_r  h	      Kr:   c                 B    t          j        | t           j                  S r   r  r  s      r8   meta_bernoulli_pr  m	  s     D0GHHHHr:   c                 *    t          j        |           S rD   rX  r  s     r8   meta_poissonr  s	       D!!!r:   c                     t          j        |
|                                 k     d            t          j        | t           j                  }t          j        |           |fS )Nc                      dS )NzJError in fused_moving_avg_obs_fake_quant_cpu: ch_axis must be < self.dim()r4   r4   r:   r8   rh   z6meta__fused_moving_avg_obs_fq_helper.<locals>.<lambda>	      \ r:   r   )rY   rj   r   r   r.  )r   observer_onfake_quant_onrunning_minrunning_maxscale
zero_pointaveraging_const	quant_min	quant_maxch_axisper_row_fake_quantsymmetric_quantmasks                 r8   $meta__fused_moving_avg_obs_fq_helperr  y	  s]      
L$((**\\   D
333DT""D))r:   c                    t          j        |                                 dk    d            t          j        |                                dk    d            | j        \  |j        \  t          j        k    fd           |Nt          j        || j        k    p.|t           j        k    o| j        t           j        t           j        fv d            || j        n|}|                     f|          S )Nr  c                      dS )Nza must be 2Dr4   r4   r:   r8   rh   zmeta_mm.<locals>.<lambda>	      ~ r:   c                      dS )Nzb must be 2Dr4   r4   r:   r8   rh   zmeta_mm.<locals>.<lambda>	  r  r:   c            	      "    d d  d d d	S )Nz/a and b must have same reduction dim, but got [rz   z] X [r  r4   )M1M2Nr~  s   r8   rh   zmeta_mm.<locals>.<lambda>	  s-    [![[r[[PR[[VW[[[ r:   c                      dS )NzFout_dtype must be the same as input dtype or fp32 for fp16/bf16 inputsr4   r4   r:   r8   rh   zmeta_mm.<locals>.<lambda>	  s    \ r:   r   )	rY   rj   r   r   ra   r  r  r  r   )r6   r7   rz  rQ   r  r  r  r~  s       @@@@r8   meta_mmr  	  s    
LA55666	LA55666GEArGEB	L
b[[[[[[[     U]* ?Gu~>>\\	
 	
 	
 (/177YL;;1v\;222r:   c                      |r.t           fdt           j                  D                       S t          j         j                  S )Nc              3   >   K   | ]}|vrj         |         nd V  dS rE  r-  )rP   r   dimsr   s     r8   rv   z+_compute_reduction_shape.<locals>.<genexpr>	  s5      UUqatmmTZ]]UUUUUUr:   )ri   r   r   rS   compute_reduction_output_shaper   )r   r  r  s   `` r8   r  r  	  sR     VUUUUUE$)DTDTUUUUUU/
DAAAr:   strc                     t          | t          j        j                  r| j        j        S t          | d          r1t          | j        d          r| j        j        dk    r| j        j        S dS )Nr   r|   r   r   )rs   rY   _subclasses
FakeTensorfake_devicer|   hasattrr   )rm  s    r8   r   r   	  sn    &%+677 	!&&!!FM6** M&((}!!vr:   input_tensorr   r  dilationis_transposedgroupsoutput_paddingc                    dt           dt           dt           dt           dt           dt           fd}dt           dt           dt           dt           dt           dt           dt           fd	}	|j        d
d          }
| j        d
d          |r||j        d         z  }n@|j        d         }t          j        |j        d         |z  | j        d         k    d            | j        d         |gt	          |t
                    r|gt                    z  }n,t          |          dk    r|d         gt                    z  }t	          |t
                    r|gt                    z  }n,t          |          dk    r|d         gt                    z  }t	          |t
                    r|gt                    z  }n,t          |          dk    r|d         gt                    z  }d }|rXt	          |t
                    r|gt                    z  }n/t          |          dk    r|d         gt                    z  }n|}t          t                              D ]}|rH                     |	|         ||         ||         |
|         ||         ||                              L                     ||         ||         ||         |
|         ||                              ddl	m
} ddlm} t	          | |          r| j        n| j        }|j        dk    ot          j        j        d u }|s.t          j         |d d
d          D              fd           S )Nlnr  r   r  r  r@   c                 6    | d|z  z   ||dz
  z  z
  dz
  |z  dz   S )a  
        Formula to apply to calculate the length of some dimension of the output

        See: https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html

        Args:
            ln: length of the dimension
            p: padding in that dim
            d: dilation in that dim
            k: kernel size in that dim
            s: stride in that dim
        Returns:
            The output length
        r  r3   r4   )r  r  r   r  r  s        r8   _formulaz+calc_conv_nd_return_shape.<locals>._formula	  s.     QU
Q!a%[(1,2Q66r:   rE   c                 <    | dz
  |z  d|z  z
  ||dz
  z  z   |z   dz   S )a  
        Formula to apply to calculate the length of some dimension of the output
        if transposed convolution is used.
        See: https://pytorch.org/docs/stable/generated/torch.nn.ConvTranspose2d.html

        Args:
            ln: length of the dimension
            p: padding in that dim
            d: dilation in that dim
            k: kernel size in that dim
            s: stride in that dim
            op: output padding in that dim

        Returns:
            The output length
        r3   r  r4   )r  r  r   r  r  rE   s         r8   _formula_transposedz6calc_conv_nd_return_shape.<locals>._formula_transposed	  s2    " Q!|a!e#a1q5k1B6::r:   r  r3   r   c                      dS )NzInvalid channel dimensionsr4   r4   r:   r8   rh   z+calc_conv_nd_return_shape.<locals>.<lambda>	  s    0 r:   )r  )sym_orr   c                     g | ]}|d k    	S r   r4   r  s     r8   rR   z-calc_conv_nd_return_shape.<locals>.<listcomp>?
  s    222qQU222r:   c                  @    dt                      ddd           dS )NzGiven input size per channel: z&. Calculated output size per channel: r  z. Output size is too small)r   )r  	ret_shapes   r8   rh   z+calc_conv_nd_return_shape.<locals>.<lambda>@
  s6     (T$ZZ ( (3<QRR=( ( ( r:   )r   r   rY   rj   rs   r   r   r   appendtorch._subclasses.fake_tensorr  ra  r  r  r   r|   versionhip)r  rv  r   r  r  r  r  r  r  r  kernel_sizeout_channelsoutput_padding_listr   r  r  r   is_cudnnr  r  s                     @@r8   calc_conv_nd_return_shaper  	  s   7S 7S 7S 7S 7S 7S 7 7 7 7"; ; ; ; ; ; ;QT ; ; ; ;& ,qrr"Kabb!D 
Q/|ALOf$(:1(==00	
 	
 	

 #A&5I&'"" )CII%	V		)s4yy('7## +)c$ii'	W		1:,T*(G$$ -:D		)	X!		QK=3t99,,0 1ng.. 	1#1"2SYY">  A%%#1!#4"5D		"A"03t99   	##GAJ QKN1I'*     a'!*hqk;q>6RS9UU    988888<<<<<< lJ//	!     {f$B):d)BH 
F22IabbM2223( ( ( ( (	
 	
 	
 r:   c                 \    t           j                            |           t           j        k    S rD   rY   _prims_commonr   channels_lasttens    r8   is_channels_lastr  H
  s!    44S99U=PPPr:   running_meanrunning_vartrainingexponential_average_factorepsilonc                 v     j         }||j         n|j         }	||j         n|j         }
 fd}                     |                               |                      }|r+                     |	          }                     |
          }n*                     d          }                     d          }|||fS )Nc                      t                     rt          j        S                      t          j                  rt          j        S t          j        S r   )r  rY   r  r  r   )r  s   r8   pick_memory_formatz2meta_miopen_batch_norm.<locals>.pick_memory_format_
  sJ    L)) 	'&&%%E4K%LL 	+**&&r:   r   r   )r   r   rd  )r  rv  rx  r  r  r  r  r  r   save_mean_shapesave_var_shaper  r   	save_meansave_vars   `              r8   meta_miopen_batch_normr  L
  s     "I -9,Dl((&,O*5*A[&&v|N' ' ' ' ' 
 
 
+
+
.
.=O=O=Q=Q
.
R
RC 0 **?;;	)).99 **400	))$//	8##r:   c	           
          t          | |||||||r|nd           }	ddlm}
 d}d} |
|                     |          dk              rd|	|<   |                     |	          }|S )Nr   guard_or_falser3   )r  ra  r  r   r   )r  rv  rx  r   r  r  r  r  r  	shape_outr  input_channels_dimoutput_channels_dimr   s                 r8   	meta_convr  r
  s     *'1T	 	I EDDDDD~l''(:;;q@AA +)*	%& 
 
 
+
+CJr:   mkldnnc
           
          t          | ||||d|g           }
|                     |
          }t          j        }|                                 dk    rt          j        }|                    |          }|S )NFrh  r   )r  r   rY   r  r   channels_last_3drd  )r  rv  rx  r  r   r  r  attrscalars	algorithmr  r   out_memory_formats                r8   meta_mkldnn_convolution_defaultr  
  s{     .&&'8UFB
 
	 $$Y//!/"" % 6ff#4f55
r:   c                 f    |                      g | j        d d         |j        d         R           S Nr   r   r   r   )r  rv  rx  r  r  r  s         r8   meta_linear_pointwise_defaultr  
  s8     %%&Q(:3B3(?&Qa&Q&QRRRr:   r   c                 f    |                      g | j        d d         |j        d         R           S r  r  )r  packed_weightorig_weightrx  r   s        r8   meta_mkl_linearr  
  s>    ))@,$SbS)@;+<Q+?@@  r:   onednnc           
         t          | ||||	d|
d           }|| j        }|t          j        t          j        t          j        t          j        t          j        fvrt          d|           | 	                    ||          }t          |          dvr t          dt          |           d          t          j        t          j        t          j        dt          |                   }|                    |          }|S )NFOoutput_dtype must be one of float32, bfloat16, uint8, int8, float8_e4m3fn, got r   )r0   rC  rh  z3Expect output to be 3d/4d/5d for conv1d/2d/3d, got r   r   )r  ra   rY   r  r  uint8r  r  r   r   r   r   r  r  rd  )r=   x_scalex_zpww_scalew_zprx  r   r  r  r  output_scaleoutput_zero_pointoutput_dtyper  r  r  r  r   formats                       r8   meta_qconv_pointwiser)  
  s   , .	
 	
	 7LMNKJ 
 
 
 !pbnpp   kk)<k88y>>** Wc)nnWWW   &"%
 
 i..	
 ff6f**
r:   c                 8    |dk    rt          d| d          |S )Nsumz#binary_op_name must be 'sum', got 'r)  r   )r=   r   r!  r"  r#  r$  accumrx  r   r  r  r  r%  r&  r'  accum_scaleaccum_zero_pointbinary_op_namer  unary_op_nameunary_op_argsunary_op_algorithms                         r8   meta_qconv2d_pointwise_binaryr4    s6    4 U"" GnGGG   r:   c                    t          | j                  }|j        d         |d<   |	t          j        t          j        t          j        t          j        t          j        fvrt          d|	           | 	                    ||	          }|S )Nr3   r   zOoutput_dtype must be one of float32, bfloat16, int8, uint8, float8_e4m3fn, got r   )
r   r   rY   r  r  r  r  r  r   r   )r=   r   r!  r"  r#  r$  rx  r%  r&  r'  post_op_namepost_op_argspost_op_algorithmr  r   s                  r8   meta_qlinear_pointwiser9  #  s    " AG}}71:RMNJK 
 
 
 !pbnpp   kk,lk;;
r:   c                 &   |dk    r|S t          | j                  }|j        d         |d<   |
t          j        t          j        t          j        t          j        t          j        fvrt          d|
           | 	                    ||
          }|S )Nr+  r3   r   r  r   )
r   r   rY   r  r  r  r  r  r   r   )r=   r   r!  r"  r#  r$  x_2rx  r%  r&  r'  x2_scalex2_zpr0  r  r1  r2  r3  r  r   s                       r8   meta_qlinear_pointwise_binaryr>  D  s    , U""JAG}}71:RMNKJ 
 
 
 !pbnpp   kk,lk;;
r:   c                 x    t          | j                  }|j        d         |d<   |                     |          }|S )Nr3   r   )r   r   r   )r=   r"  rx  r  r   s        r8   meta_linear_dynamic_fp16r@  l  s7     AG}}71:Rkk,''
r:   	quantizedr4   r   r3   c                 4   t          | |||||          \  }}}|                                 dk    r|                     d          nd}	t          j        }
|                                 dk    r|||g}n|	|||g}t          j        || j        | j        |
          S NrC  r3   r0   r  )#max_pool2d_checks_and_compute_shaper   r   rY   r  r   ra   r   r   r  r   r  r  	ceil_modenInputPlaneoutputHeightoutputWidthr  r   r   s               r8   meta_quantized_max_pool2drL  }  s     0;9
 
		
 $)99;;!#3#3B+99;;!{;DDK{CD{+<'	
 
 
 	
r:   c                     t          j                                         dk     fd           t          j                                        dk    fd           t          j         j        t           j        t           j        t           j        fv  fd           t          j        j        t           j        k    fd           t          j        j        t           j        k    fd           t          j        j         j        k    fd            	                     
                    d          
                    d           j        	          S )
Nr  c                  4    d                                   dS )Nzx must be a 2D tensor, got r  r   r=   s   r8   rh   z/meta_int4mm_packed_weight_cpu.<locals>.<lambda>      +S+S+S+S r:   c                  4    d                                   dS )Nzw must be a 2D tensor, got r  r   r"  s   r8   rh   z/meta_int4mm_packed_weight_cpu.<locals>.<lambda>  rP  r:   c                      d j          S Nz#expected x to be f32/f16/bf16, got r   rO  s   r8   rh   z/meta_int4mm_packed_weight_cpu.<locals>.<lambda>  s    C!'CC r:   c                      d j          S Nzexpected w to be uint8, got r   rR  s   r8   rh   z/meta_int4mm_packed_weight_cpu.<locals>.<lambda>  s    ,T17,T,T r:   c                      d j          S )Nz q_group_size must be int64, got r   )q_group_sizes   r8   rh   z/meta_int4mm_packed_weight_cpu.<locals>.<lambda>  s    K|7IKK r:   c                      d j          S )Nz5q_scale_and_zeros must have the same dtype as x, got r   )q_scale_and_zeross   r8   rh   z/meta_int4mm_packed_weight_cpu.<locals>.<lambda>  s    eL]Lcee r:   r   r   )rY   rj   r   ra   r  r  r  r  r   r   r   r=   r"  rX  rZ  s   ````r8   meta_int4mm_packed_weight_cpur\    sI   QUUWW\#S#S#S#STTTQUUWW\#S#S#S#STTTGu}enEECCCC	
 	
 	
 	Gu{"$T$T$T$T	
 	
 	
 	%+-KKKK	
 	
 	
 	#qw.eeee	
 	
 	
 {{166!99affQiiqw{???r:   c                      t          j                                         k    o j                 k     fd           d S )Nc                  n    d  d d dd                                  d dj                  z   S )NzExpected a tensor of dimension z and tensor.size[z] == rz   zbut got : dimension z] = r   r   )r   dim_sizer   rm  s   r8   rh   z check_dim_size.<locals>.<lambda>  sU    _#____W[___
f
f
f
f
ffl[cNd
f
fg r:   )rY   rj   r   r   )rm  r   r`  r   s   ````r8   check_dim_sizera    so    	L

>X 6$ >	g 	g 	g 	g 	g 	g 	g    r:   r  r  ra   c                 *    t          j        |           S rD   rX  )r   r  r  ra   s       r8   meta_quantize_per_tensorrc    s     E"""r:   c                     d } |d|          \  }}	t          j        t          |          dv d            t          j         j        t           j        t           j        t           j        t           j        fv fd           t          |          dk    r||	}}
n3t          |          dk    r|d         |d         }}
n |d|          \  }
} |d	|          \  }}t          j        |d u p|dk    d
                                             dk    r 	                    d          nd} 	                    d          } 	                    d          } 	                    d          }t          ||||
d|          }t          ||	||d|          }t          j                   }t           ||	|
|||dd||||||                                            dk    r|||g}n||||g}t          j        | j         j        |          S )Nc                      t          j        t          |          dv  fd           |d         }t          |          dk    r|n|d         }||fS )Nr3   r  c                      d  dS )Nzavg_pool2d: 4 must either be a single int, or a tuple of two intsr4   r  s   r8   rh   z1meta_avg_pool2d.<locals>.unpack.<locals>.<lambda>      ]4]]] r:   r   r3   rY   rj   r   r  r  HWs   `   r8   unpackzmeta_avg_pool2d.<locals>.unpack  `    HH]]]]	
 	
 	
 FSQAACF!tr:   r  r   r3   r  c                      dS NzOavg_pool2d: stride must either be omitted, a single int, or a tuple of two intsr4   r4   r:   r8   rh   z!meta_avg_pool2d.<locals>.<lambda>      a r:   c                  >    d j                                          dS )Nz""avg_pool2d" not implemented for 'r)  r*  r,  s   r8   rh   z!meta_avg_pool2d.<locals>.<lambda>  !    Qu{7J7J7L7LQQQ r:   r   r3   r   r  c                      dS Nzdivisor must be not zeror4   r4   r:   r8   rh   z!meta_avg_pool2d.<locals>.<lambda>      * r:   rC  rE  r  r   r0   r  )rY   rj   r   ra   r  uint16uint32r2  r   r   pooling_output_shaperS   r   pool2d_shape_checkr   r   )r   r  r   r  rH  count_include_paddivisor_overridero  kHkWdHdWpadHpadWr  rI  inputHeight
inputWidthrJ  rK  r   r   s   `                     r8   meta_avg_pool2dr    sR      VM;//FB	LFy aa   
LEKu|U\RRQQQQ   6{{aRB	V		F1IB&))B	7++JD$	LD 9$4$9**  
  %yy{{a//UZZ^^^QF**R..K**R..KBJ'Rr1iPPL&z2tRINNK/66M



		  $ yy{{a\;7\;?;k|#	   r:   c                     t          | ||||||dd|	|
||||           |                                 }|	}t          |||dz
  |           t          |||dz
  |           t          |||dz
  |           d S )Nr3   r0   r  )r~  r   ra  )r   
gradOutputr  r  r  r  r  r  r  rI  r  r  rJ  rK  
mem_formatr   nOutputPlanes                    r8   avg_pool2d_backward_shape_checkr    s    " 



		  $ 99;;DL:tTAX|<<<:tTAX|<<<:tTAX{;;;;;r:   c                 j   t          j        t          |          dk    pt          |          dk    d            |d         }t          |          dk    r|n|d         }	t          j        t          |          dk    p%t          |          dk    pt          |          dk    d            t          |          dk    r|n|d         }
t          |          dk    r|	nt          |          dk    r|
n|d         }t          j        t          |          dk    pt          |          dk    d            |d         }t          |          dk    r|n|d         }t          j        |d u p|dk    d            |j        }|                                dk    r|d	         nd}|d
         }|d         }|d         }t          ||||
d|          }t          ||	||d|          }t          j        |          }t          || |||	|
|||||||||           t          j	        ||j
        |j        |          S )Nr3   r  c                      dS )NzKavg_pool2d: kernel_size must either be a single int, or a tuple of two intsr4   r4   r:   r8   rh   z*meta_avg_pool2d_backward.<locals>.<lambda>L  s    ] r:   r   c                      dS rs  r4   r4   r:   r8   rh   z*meta_avg_pool2d_backward.<locals>.<lambda>R  rt  r:   c                      dS )NzGavg_pool2d: padding must either be a single int, or a tuple of two intsr4   r4   r:   r8   rh   z*meta_avg_pool2d_backward.<locals>.<lambda>X  s    Y r:   c                      dS rx  r4   r4   r:   r8   rh   z*meta_avg_pool2d_backward.<locals>.<lambda>_  ry  r:   rC  rE  rz  r  r   r  )rY   rj   r   r   r   r}  rS   r   r  r   ra   r   )gradOutput_r   r  r   r  rH  r  r  r  r  r  r  r  r  
input_sizer  rI  r  r  rJ  rK  r  s                         r8   meta_avg_pool2d_backwardr  >  sz    
LKA6[!1!1Q!6]]   
QB;1$$+a.B	LFq@CKK1,@Fq0@aa   6{{aVAYB6{{a3v;;!+;+;RRB	LG.S\\Q.YY   1:Dw<<1$$44'!*D	LD 9$4$9**  
 J$yy{{a//Z^^QFR.KR.KBJ'Rr1iPPL&z2tRINNK,U33J#



  $ ;k| 	   r:   c                     t          j        t          |          dv d            |d         }t          |          dk    r|n|d         }t          |          dk    r|n|d         }	t          j        | pt          |          dv d            t          j         j        t           j        t           j        t           j        t           j        fv fd           |s|n|d         }
|s|nt          |          dk    r|
n|d         }|s|	nt          |          dk    r|
n|d         }t          j        t          |          dv d            |d         }t          |          dk    r|n|d         }t          |          dk    r|n|d         }t          j         j        d	v d
            t          j        | p|dk    d             	                    d          } 	                    d          } 	                    d          } 	                    d          } 	                    d          }t          ||||
d|          }t          ||||d|          }t          ||	||d|          }t           ||||	|
|||||ddd||||||dd            j        dk    r                     ||||f          S                      |||||f          S )Nr3   r0   c                      dS NzFavg_pool3d: kernel_size must be a single int, or a tuple of three intsr4   r4   r:   r8   rh   z!meta_avg_pool3d.<locals>.<lambda>      X r:   r   r3   r  c                      dS NzJavg_pool3d: stride must be omitted, a single int, or a tuple of three intsr4   r4   r:   r8   rh   z!meta_avg_pool3d.<locals>.<lambda>  r  r:   c                  >    d j                                          dS )Nz""avg_pool3d" not implemented for 'r)  r*  r,  s   r8   rh   z!meta_avg_pool3d.<locals>.<lambda>  rv  r:   c                      dS NzBavg_pool3d: padding must be a single int, or a tuple of three intsr4   r4   r:   r8   rh   z!meta_avg_pool3d.<locals>.<lambda>      T r:   rC  rh  c                      dS Nz9non-empty 4D or 5D (batch mode) tensor expected for inputr4   r4   r:   r8   rh   z!meta_avg_pool3d.<locals>.<lambda>      K r:   c                      dS rx  r4   r4   r:   r8   rh   z!meta_avg_pool3d.<locals>.<lambda>  ry  r:   rE  rz  r  r   zavg_pool3d()T)check_input_sizerC  )rY   rj   r   ra   r  r{  r|  r2  r   r   r}  pool3d_shape_checkr   )r   r  r   r  rH  r  r  kTr  r  dTr  r  padTr  r  r  nslicesitimeiheightiwidthotimeoheightowidths   `                       r8   meta_avg_pool3dr    s2    
LKF"XX   
QB;1$$+a.B;1$$+a.B	L
+c&kkV+\\   
LEKu|U\RRQQQQ   	(vayB	Fc&kkQ&6&6F1IB	Fc&kkQ&6&6F1IB	LGTT   1:Dw<<1$$44'!*Dw<<1$$44'!*D	L
fKK  
 
L5 0A 5**  
 ZZ]]FjjnnGJJrNNEjjnnGZZ^^F D"aCCE"7Bb!YGGG!&"dB9EEF





			-   2 zQ@AAAHIIIr:   c                    t          j        t          |          dv d            |d         }t          |          dk    r|n|d         }	t          |          dk    r|n|d         }
t          j        | pt          |          dv d            |s|n|d         }|s|	nt          |          dk    r|n|d         }|s|
nt          |          dk    r|n|d         }t          j        t          |          dv d            |d         }t          |          dk    r|n|d         }t          |          dk    r|n|d         }t          j        |j        dv d	            t          j        | p|dk    d
            |                    d          }|                    d          }|                    d          }|                    d          }t          ||||d|          }t          ||	||d|          }t          ||
||d|          }t          || |||	|
||||||||||||d           |                    |j                  S )Nr  c                      dS r  r4   r4   r:   r8   rh   z*meta_avg_pool3d_backward.<locals>.<lambda>  r  r:   r   r3   r  c                      dS r  r4   r4   r:   r8   rh   z*meta_avg_pool3d_backward.<locals>.<lambda>  r  r:   c                      dS r  r4   r4   r:   r8   rh   z*meta_avg_pool3d_backward.<locals>.<lambda>  r  r:   r  c                      dS r  r4   r4   r:   r8   rh   z*meta_avg_pool3d_backward.<locals>.<lambda>  r  r:   c                      dS rx  r4   r4   r:   r8   rh   z*meta_avg_pool3d_backward.<locals>.<lambda>  ry  r:   rE  rz  r  r   zavg_pool3d_backward())	rY   rj   r   r   r   r}  avg_pool3d_backward_shape_checkr   r   )r8  r   r  r   r  rH  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  otime_for_shape_checkoheight_for_shape_checkowidth_for_shape_checks                           r8   meta_avg_pool3d_backwardr    s    
LKF"XX   
QB;1$$+a.B;1$$+a.B	L
+c&kkV+\\   	(vayB	Fc&kkQ&6&6F1IB	Fc&kkQ&6&6F1IB	LGTT   1:Dw<<1$$44'!*Dw<<1$$44'!*D	L
fKK  
 
L5 0A 5**  
 jjnnGJJrNNEjjnnGZZ^^F0D"aSS27Bb!YWW1&"dB9UU#





'  , ??5;'''r:   c                     t          j         j        dk    p
 j        dk     fd            j        d d         t	          |          z   }t          j                   }t          j        | j         j	        |          S )Nr0   rC  c                      d j          S )Nz"Expected 3D or 4D tensor, but got r-  r   s   r8   rh   z*meta_adaptive_avg_pool2d.<locals>.<lambda>5      ATZAA r:   r  r  )
rY   rj   r   r   ri   rS   r   r   ra   r   )r   output_sizer  r   s   `   r8   meta_adaptive_avg_pool2dr  1  s    	L	Q($)q.AAAA   :crc?U;%7%77L/55M ;j{#	   r:   c                      t          j         j        dk    p
 j        dk     fd                                 j        d d         t          |          z             S )NrC  rh  c                      d j          S )Nz"Expected 4D or 5D tensor, but got r-  r   s   r8   rh   z*meta_adaptive_avg_pool3d.<locals>.<lambda>G  r  r:   rz  )rY   rj   r   r   r   ri   )r   r  s   ` r8   meta_adaptive_avg_pool3dr  C  sb    	L	Q($)q.AAAA   >>$*SbS/E+,>,>>???r:   c                      j         }t          d|          D ]2t          j                                       dk     fd           3t          j        |dk    p|dk    fd           t          j        j         j        k     fd           t          j        }t                    rt          j        }	                    j
                                      |          S )	Nr3   r   c                       d j          d dS )Nz{adaptive_avg_pool2d_backward(): Expected grad_output to have non-zero                       size for non-batch dimensions,  with dimension  being emptyr-  )grad_outr   s   r8   rh   z4meta__adaptive_avg_pool2d_backward.<locals>.<lambda>R  s1     f6>nf fVWf f f r:   r0   rC  c                      d j          S )NzBadaptive_avg_pool2d_backward(): Expected 3D or 4D tensor, but got r-  r   s   r8   rh   z4meta__adaptive_avg_pool2d_backward.<locals>.<lambda>W  s    aUYU_aa r:   c                  (    dj          d j          S Nexpected dtype z! for `grad_output` but got dtype r   )r  r   s   r8   rh   z4meta__adaptive_avg_pool2d_backward.<locals>.<lambda>[  s    _$*__x~__ r:   r   )r   r   rY   rj   r   ra   r   r  r  r   r   rd  )r  r   r   r   r   s   ``  @r8   "meta__adaptive_avg_pool2d_backwardr  L  s   =D1d^^ 
 
MM!q f f f f f	
 	
 	
 	

 
L	TQYaaaa   
L
hn$_____   +M ,+>>$*%%((}(EEEr:   c                 b    t          | d           t          j        |t          j                  S )Nadaptive_avg_pool3d_backwardr   )!_adaptive_pool_empty_output_checkrY   r   rJ  r8  r   s     r8   "meta__adaptive_avg_pool3d_backwardr  c  s.     &k3QRRRD0NOOOOr:   r8  c                       j         }t          d|          D ]3t          j                                       dk     fd           4d S )Nr3   r   c                  $      dj          d dS )Nzc(): Expected grad_output to have non-zero size for non-batch dimensions, but grad_output has sizes r  r  r-  )r  r8  r   s   r8   rh   z3_adaptive_pool_empty_output_check.<locals>.<lambda>o  s>     ` `-8->` `PQ` ` ` r:   )r   r   rY   rj   r   )r8  r  r   r   s   `` @r8   r  r  j  s{    D1d^^ 
 
Q!#     	
 	
 	
 	

 
r:   c                 8     j         }t          j        |dv  fd           t          d|          D ]2t          j                                       dk     fd           3t          j        t          |          dk    d            d}d}d} j         dk    r                     d          }|dz  }                     |dz
            }|\  }} j         d	k    r?|||f}                     |          }	                     |t          j        
          }
|	|
fS ||||f}t          j	                   }                     |          
                    |          }	                     |t          j        
          
                    |          }
|	|
fS )Nr0   rC  c                      d j          S )Nz:adaptive_max_pool2d(): Expected 3D or 4D tensor, but got: r-  r,  s   r8   rh   z*meta_adaptive_max_pool2d.<locals>.<lambda>|      ZU[ZZ r:   r3   r   c                       dj          d  dS )Nzjadaptive_max_pool2d(): Expected input to have non-zero size for non-batch dimensions, but input has sizes r  r  r-  r   r   s   r8   rh   z*meta_adaptive_max_pool2d.<locals>.<lambda>  4    T',{T TDET T T r:   r  c                      dS )NzCadaptive_max_pool2d(): internal error: output_size.size() must be 2r4   r4   r:   r8   rh   z*meta_adaptive_max_pool2d.<locals>.<lambda>      U r:   rC  r0   r   r   )r   rY   rj   r   r   r   r   r   rS   r   rd  )r   r  r   dimHsizeBsizeDosizeHosizeWr   r   r   r   r   s   `           @r8   meta_adaptive_max_pool2dr  v  s    :D	LZZZZ   1d^^ 
 
JJqMMA    	
 	
 	
 	
 
LKAUU  
 DEEzQ

1	JJtax  E NFFzQFF+	ooi((//)5;/??G|E662	3E::ooi((++-+HH//)5;/??BB' C 
 
 G|r:   c                 :     j         }t          j        |dv  fd           t           d           t          j        j         j        k     fd           t          j                  }                    j                  	                    |          S )Nr  c                      d j          S )NzKadaptive_max_pooling2d_backward(): Expected 3D or 4D grad_output, but got: r-  r8  s   r8   rh   z3meta_adaptive_max_pool2d_backward.<locals>.<lambda>  s    q^i^oqq r:   adaptive_max_pool2d_backwardc                  (    dj          d j          S r  r   )r8  r   s   r8   rh   z3meta_adaptive_max_pool2d_backward.<locals>.<lambda>  s    c%+ccP[Pacc r:   r   )
r   rY   rj   r  ra   rS   r   r   r   rd  )r8  r   r   r   r   s   ``   r8   !meta_adaptive_max_pool2d_backwardr    s     D	Lqqqq  
 &k3QRRR	L{((ccccc  
 /66M??5;''***GGGr:   c                 :     j         }t          j        |dv  fd           t          d|          D ]2t          j                                       dk     fd           3t          j        t          |          dk    d            d}d}d}|dk    r                     d          }|dz  }                     |          }|\  }}}|d	k    r||||f}	n|||||f}	                     |	          }
                     |	t          j        
          }|
|fS )Nr  c                      d j          S )Nz:adaptive_max_pool3d(): Expected 4D or 5D tensor, but got: r-  r,  s   r8   rh   z*meta_adaptive_max_pool3d.<locals>.<lambda>  r  r:   r3   r   c                       dj          d  dS )Nzjadaptive_max_pool3d(): Expected input to have non-zero size for non-batch dimensions, but input has sizes r  r  r-  r  s   r8   rh   z*meta_adaptive_max_pool3d.<locals>.<lambda>  r  r:   r0   c                      dS )NzCadaptive_max_pool3d(): internal error: output_size.size() must be 3r4   r4   r:   r8   rh   z*meta_adaptive_max_pool3d.<locals>.<lambda>  r  r:   rh  rC  r   )r   rY   rj   r   r   r   r   r   )r   r  r   dimDr  r  osizeTr  r  r   r   r   r   s   `           @r8   meta_adaptive_max_pool3dr    sg    :D	LZZZZ   1d^^ 
 
JJqMMA    	
 	
 	
 	
 
LKAUU  
 DEEqyy

1	JJtE(FFFqyyFFF3		E666:	
//)
$
$Cooiu{o;;G<r:   c                 V    t          | d           |                    |j                  S )Nadaptive_max_pool3d_backward)r  r   r   )r8  r   r   s      r8   !meta_adaptive_max_pool3d_backwardr    s)     &k3QRRR??5;'''r:   c                 N    |t          d          |                     |          S )Nz:cannot repeat_interleave a meta tensor without output_size)rc  r   )repeatsr  s     r8   meta_repeat_interleave_Tensorr    s+    WXXX[)))r:   c                 ^   | j         j        st          d| j                    |j         j        st          d|j                    t          |                     t          | j                             |                    t          |j                             t          j                  }|S )Nz!real must be floating point, got z!imag must be floating point, got rL   )ra   r  r   rW   rd  r   r   rT   )realimagr  s      r8   meta_complexr    s     :' OMMMNNN:' OMMMNNN+DJ7788+DJ77886>  F
 Mr:   )
fill_valuer  c                   t          |           dv r5|                     ||                                 ft          j                  S t          j        ||                                 fd|ft          j        | j                  S )N)r   mpsr	  r   r3   ra   r   )r   r   r   rY   r   rd  r   )r   r   r  s      r8   nonzero_staticr    sx     4111~~tTXXZZ0
~CCC"488::I*;	
 
 
 	
r:   c                    t          j        t          j        d            t          j        |                                 |                                 fd|                                 ft           j        | j                  S )Nc                      dS )NaY  The register_meta function for torch.nonzero() raises unimplemented by default, as a correct data-independent implementation does not exist. This implementation returns a fake value, assuming all elements of the tensor are non-zero. To enable this registration, please set 'torch.fx.experimental._config.meta_nonzero_assume_all_nonzero' to True.r4   r4   r:   r8   rh   znonzero.<locals>.<lambda>  s
     S r:   r3   r  )	rY   _check_not_implementedr  meta_nonzero_assume_all_nonzerord  r   r   r   r   r   s    r8   nonzeror    sy     
 2	S 	S   	txxzz"	
DJJLLj{	   r:   c           
      ,	    t          j        t                    d            g }t                    D ]d\  Ft          j        j        t           j        t           j        t           j        t           j        fv d            j        t           j        t           j        fv rǉ                                }t          |          t          j
        j        z    j        k     fd           t          j                  D ]dt          j
        j                  j        z            k     fd           |                    |                    d                     e8|                               O|                               f|t          j        t                     j        k     fd           dd lm} t%           |j                   t                     j        k     r-                    d            t                     j        k     -d}d}D ]|dk    rd}|dk    rd	} nd
}|sg }g }t                    D ]1\  *|                               |                               2t                    D ]1\  *|                               |                               2                     |           |g g g t                    D ]_\  }	Dr!                     j        |	                    *                     j        |	                    Kt%          j                  `fd}
                     z   z             }ddlm}  |                                 dk              r|S  |
           }t3          j        |          \  }}t%          |          t%          t          t          |                              k    r}t3          j        |j        |          }t3          j        |          }t3          j        |t3          j        |                    }|                    |                                |          }|S )Nc                      dS )Nz#at least one index must be providedr4   r4   r:   r8   rh   z#meta_index_Tensor.<locals>.<lambda>'  s    (M r:   c                      dS )Nz?tensors used as indices must be long, int, byte or bool tensorsr4   r4   r:   r8   rh   z#meta_index_Tensor.<locals>.<lambda>/  s    Y r:   c                      d j          S )N)too many indices for tensor of dimension rB  r   s   r8   rh   z#meta_index_Tensor.<locals>.<lambda>6  s    S	SS r:   c            	      :    dj          d  dj          dz    S )NzThe shape of the mask 
 at index z0 does not match the shape of the indexed tensor r-  )r   r   jr  r   s   r8   rh   z#meta_index_Tensor.<locals>.<lambda>;  sP     !h !h !hPQ !h !hJN*!h !h`ade`e!h !h r:   r3   c                  :    dj          dt                      dS )Nr  z (got r{   )r   r   )r   r   s   r8   rh   z#meta_index_Tensor.<locals>.<lambda>F  s$    \DI\\SQX\\\\\ r:   r   Fr  Tc                    z   z   }t          |                                           }dgt                    z  |t                    t          | j                  t                    z
  <   |                     ||          S )zI
        This follows restride_src in TensorAdvancedIndexing.cpp
        r   )r   r   r   r   
as_strided)r   r   r   after_shapebefore_shapereplacement_shapes      r8   _restride_srcz(meta_index_Tensor.<locals>._restride_src  s     00;>t{{}}%%KL#PSQ
 Q
 K
L!!C
OOc+6F6F$FFG ug...r:   r  ) rY   rj   r.  	enumeratera   r   r   r  r  r   r   r   r   r   r  selecttorch._refsre  r   r&   r   r   ra  r  r   rS   3compute_elementwise_output_logical_to_physical_perm
apply_permr   invert_permr  r   )r   r   r  r  refsstatehas_contiguous_subspacer  transposed_indicesr   r  r   r  restrided_selfpermrV   
perm_shaperp  r  r  r   r   r  r  r  s   ``                @@@@@@@r8   meta_index_Tensorr%  %  s   	Lg M MNNN #%Fg&& ! !5L
EIuz5:NNYY   {uz5:666--//KK"
Ndi/SSSS   uz** 8 8A&A$*QU*;;h h h h h h h h  
 MM'..A"6"677778 e$$$$MM%    G	LG	!\\\\\  
 (4('233G
g,,
"
"t g,,
"
" E# ' 'A:: aZZ}  ! #'
 # %!'** 	1 	1HAu A"))%000!'** 	1 	1HAu}A"))%000||D!!$ !LK#%(( 2 2
U=  5""4:c?3333##DJsO4444 $U[ 1 1
/ 
/ 
/ 
/ 
/ 
/ 
/ ..(99KG
H
HCDDDDDD~djjlla'(( 

 #]4((NGWWGD! DzzT%D		**++++%ci66
6zBB
%j%2CD2I2IJJ
nnSXXZZ44Jr:   c                 |   d }d }d }d } |||          }|
d         r;|                      |                                                              |          }|
d         r;|                      |                                                              |          }|
d         r|                      |          }|||fS )Nc                    t          |           }t          |          }|t          j        k    s|t          j        k    rt          j        S |t          j        k    s|t          j        k    rt          j        S t          j        S rD   )r   rY   r  r  r   )t1t2fmt1fmt2s       r8   _conv_memory_formatz6meta_convolution_backward.<locals>._conv_memory_format  sm    $R(($R((5&&&$%2E*E*E&&5)))TU5K-K-K))&&r:   r   r   r3   r  )r   r   rd  )grad_output_input_weight_bias_sizes_optr   r  r  
transposedr  r  output_maskbackend_grad_inputbackend_grad_weightbackend_grad_biasr,  r   s                   r8   meta_convolution_backwardr6    s      ' ' ' ('88M1~ 
)33FKKMMBBEE' F 
 
 1~ 
*44W\\^^DDGG' H 
 
 1~ C(22>BB 35FGGr:   c                                        d          }                     d          }|                     ||f          } t          j                                        dk    d            t          j                                        dk    d            t          j                             d                               d          k    fd           t          j                             d                               d          k    fd           t          j        |                      d          |k    o|                      d          |k    d	            |                     |                                            S )
Nr3   r  r0   c                      dS r  r4   r4   r:   r8   rh   zmeta_addbmm.<locals>.<lambda>  r  r:   c                      dS r  r4   r4   r:   r8   rh   zmeta_addbmm.<locals>.<lambda>  r  r:   r   c                  `    d                      d           d                     d           S )Nz8batch1 and batch2 must have same number of batches, got r   r   r   r  r  s   r8   rh   zmeta_addbmm.<locals>.<lambda>  s3    p6;;WX>>pp`f`k`klm`n`npp r:   c            
          d                      d           d                      d           d                     d           d                     d           d	S )Nz#Incompatible matrix sizes for bmm (r3   r=   r  r   r{   r   r;  s   r8   rh   zmeta_addbmm.<locals>.<lambda>  sl    6&++a.. 6 66;;q>> 6 6;;q>>6 6$*KKNN6 6 6 r:   c                      dS )Nz.self tensor does not match matmul output shaper4   r4   r:   r8   rh   zmeta_addbmm.<locals>.<lambda>  s    @ r:   )r   rf  rY   rj   r   r   )r   r  r  r  r  r  r  s    ``    r8   meta_addbmmr>    sg    ;;q>>D;;q>>D;;d|$$D	L"$H$HIII	L"$H$HIII	LA&++a..(ppppp   
LA&++a..(	
 	
 	
 	
 	
   
L		!51!5@@   >>$))++&&&r:   c                 P    |                      |                                           S rD   r   r   )r   r!  kwargss      r8   meta_randint_likerB    s    >>$))++&&&r:   )
grad_scale	found_infc       	         t    | |||||fD ]-t          j        t          t                    fd           .d S )Nc                  (    dt                      S Nz'exponent must be a tensor list but got r|   ls   r8   rh   z#meta__fused_adam_.<locals>.<lambda>      Gd1ggGG r:   rY   rj   rs   r   )r   gradsexp_avgsexp_avg_sqsmax_exp_avg_sqsstate_stepslrbeta1beta2weight_decayepsamsgradmaximizerC  rD  rJ  s                  @r8   meta__fused_adam_rY    s]    & E8[/;O 
 
q$GGGG	
 	
 	
 	

 
r:   c       	             | |||||fD ]-t          j        t          t                    fd           .d } ||            ||           ||           ||           ||          fS )Nc                  (    dt                      S rG  rH  rI  s   r8   rh   z"meta__fused_adam.<locals>.<lambda>+  rK  r:   c                     d | D             S )Nc                 6    g | ]}t          j        |          S r4   rX  )rP   r  s     r8   rR   z=meta__fused_adam.<locals>.empty_like_list.<locals>.<listcomp>/  s#    999 ##999r:   r4   )tensor_lists    r8   empty_like_listz)meta__fused_adam.<locals>.empty_like_list.  s    99[9999r:   rL  )r   rM  rN  rO  rP  rQ  rR  rS  rT  rU  rV  rW  rX  rC  rD  r_  rJ  s                   @r8   meta__fused_adamr`    s    & E8[/;O 
 
q$GGGG	
 	
 	
 	

: : : 	!!$$(( r:   c                     t          j                                         dk    d            t          j                                        dk    d            t          j         j        t           j        t           j        fv  fd           t          j        j        t           j        u fd           t          j                             d                              d          k     fd                                                     d                              d          ft           j        	          S )
Nr  c                      dS )Nza must be a 2D tensorr4   r4   r:   r8   rh   zmeta__int_mm.<locals>.<lambda>=      '> r:   c                      dS )Nzb must be a 2D tensorr4   r4   r:   r8   rh   zmeta__int_mm.<locals>.<lambda>>  rc  r:   c                      d j          S )Nz'expected self to be int8 or uint8, got r   )r6   s   r8   rh   zmeta__int_mm.<locals>.<lambda>A  s    C!'CC r:   c                      d j          S )Nzexpected mat2 to be int8, got r   )r7   s   r8   rh   zmeta__int_mm.<locals>.<lambda>E  s    ::: r:   r3   r   c            
          d                      d           d                      d           d                     d           d                     d           d	S )Nz'Incompatible matrix sizes for _int_mm (r   r=   r3   r   r{   r   r5   s   r8   rh   zmeta__int_mm.<locals>.<lambda>I  sl    ,affQii , ,!&&)) , ,66!99, , vvayy, , , r:   r   )	rY   rj   r   ra   r  r  r   r   r  r5   s   ``r8   meta__int_mmrh  :  s)    
LA>>???	LA>>???	L	EJ,,CCCC   
L	5:::::   
L	q		QVVAYY	
 	
 	
 	
 	
   ;;q		166!99-U[;AAAr:   c                 j    t          j                                         dk    d            t          j         j        t           j        u  fd                                d          }                     d          dz  }                     |dz  ||dz  z  d|dz  ft           j        	          S )
Nr  c                      dS Nzw must be a 2D tensorr4   r4   r:   r8   rh   z2meta__convert_weight_to_int4pack.<locals>.<lambda>S  rc  r:   c                      d j          S rV  r   rR  s   r8   rh   z2meta__convert_weight_to_int4pack.<locals>.<lambda>V      8qw88 r:   r   r3             r   )rY   rj   r   ra   r  r   r   r  r"  inner_k_tilesr  r  s   `   r8    meta__convert_weight_to_int4packrs  Q  s    	LA>>???	L	5;8888   	
q		A	q		AA;;F-"$%Q		
 k    r:   c                 N    t          j                                         dk    d            t          j         j        t           j        u  fd                                d          }                     d          }                     ||dz  ft           j                  S )Nr  c                      dS rk  r4   r4   r:   r8   rh   z:meta__convert_weight_to_int4pack_for_cpu.<locals>.<lambda>g  rc  r:   c                      d j          S Nzexpected w to be int32, got r   rR  s   r8   rh   z:meta__convert_weight_to_int4pack_for_cpu.<locals>.<lambda>j  rm  r:   r   r3   r   )rY   rj   r   ra   r  r   r   r  rq  s   `   r8   (meta__convert_weight_to_int4pack_for_cpurx  e  s    	LA>>???	L	5;8888   	
q		A	q		A;;	
AFk    r:   c                     t          j                                         dk    d            j        j        dk    rdndt          j                                        k    fd           t          j         j        t           j        t           j        t           j        fv  fd           t          j        j        t           j	        u fd           j        j        dk    r
                    d          n
                    d          d	z  }                      
                    d          | j        
          S )Nr  c                      dS Nzx must be a 2D tensorr4   r4   r:   r8   rh   z*meta__weight_int4pack_mm.<locals>.<lambda>v  rc  r:   r	  rC  c                      d  dS )Nzw must be a zD tensorr4   )expected_dims   r8   rh   z*meta__weight_int4pack_mm.<locals>.<lambda>x  s    2W2W2W2W r:   c                      d j          S rT  r   rO  s   r8   rh   z*meta__weight_int4pack_mm.<locals>.<lambda>{      ?ag?? r:   c                      d j          S rw  r   rR  s   r8   rh   z*meta__weight_int4pack_mm.<locals>.<lambda>  rm  r:   r   rn  r   )rY   rj   r   r  r|   ra   r  r  r  r  r   r   )r=   r"  rX  rZ  dim_nr}  s   ``   @r8   meta__weight_int4pack_mmr  t  s#   	LA>>???*e3311L	LL(*W*W*W*WXXX	L	EM5=%.AA????   
L	5;8888   +u44AFF1III!&&))a-E;;qvvayy%qw;777r:   c                     t          j                                         dk    d            t          j                                        dk    d            t          j         j        t           j        t           j        t           j        fv  fd           t          j        j        t           j        u fd                                 	                    d          	                    d           j                  S )Nr  c                      dS r{  r4   r4   r:   r8   rh   z2meta__weight_int4pack_mm_for_cpu.<locals>.<lambda>  rc  r:   c                      dS rk  r4   r4   r:   r8   rh   z2meta__weight_int4pack_mm_for_cpu.<locals>.<lambda>  rc  r:   c                      d j          S rT  r   rO  s   r8   rh   z2meta__weight_int4pack_mm_for_cpu.<locals>.<lambda>  r  r:   c                      d j          S rV  r   rR  s   r8   rh   z2meta__weight_int4pack_mm_for_cpu.<locals>.<lambda>  rm  r:   r   r   )
rY   rj   r   ra   r  r  r  r  r   r   r[  s   ``  r8    meta__weight_int4pack_mm_for_cpur        	LA>>???	LA>>???	L	EM5=%.AA????   
L	5;8888   ;;qvvayy!&&))17;;;;r:   c                     t          j                                         dk    d            t          j                                        dk    d            t          j         j        t           j        t           j        t           j        fv  fd           t          j        j        t           j        u fd                                 	                    d          	                    d           j                  S )Nr  c                      dS r{  r4   r4   r:   r8   rh   z;_weight_int4pack_mm_with_scales_and_zeros.<locals>.<lambda>  rc  r:   c                      dS rk  r4   r4   r:   r8   rh   z;_weight_int4pack_mm_with_scales_and_zeros.<locals>.<lambda>  rc  r:   c                      d j          S rT  r   rO  s   r8   rh   z;_weight_int4pack_mm_with_scales_and_zeros.<locals>.<lambda>  r  r:   c                      d j          S rw  r   rR  s   r8   rh   z;_weight_int4pack_mm_with_scales_and_zeros.<locals>.<lambda>  rm  r:   r   r   )
rY   rj   r   ra   r  r  r  r  r   r   )r=   r"  rX  qScaleqZeross   ``   r8   )_weight_int4pack_mm_with_scales_and_zerosr    r  r:   r6   r7   c                     | |z   dz
  |z  |z  S r2   r4   r5   s     r8   kai_roundupr    s    UQY1!!r:   c                 0  	
 | dk    r||k    r+d}d}d}dddd fdfd} ||||||          S |dz  d	k    rH||z  d	k    rAd}d}d}dddd		fd
}	
fdd 
	fd	fd |||||||          S d S d S d S )NrC  rn  ro  r  c                 H    t          ||z  d          }t          | |          S )NrC  r  )r  krsrkr_sr_roundedup4s       r8   kai_k_roundedupz3get_kai_packed_weight_size.<locals>.kai_k_roundedup  s)     $/rBw#:#: "1&6777r:   c                 v     | ||          }|dz  dk    rt          d|           ||dz  z   z   z   z  S )Nr  r   zk_internal must be even, got r,  )	r  nrr  r  
k_internalr  kai_num_bytes_biaskai_num_bytes_multiplier_rhskai_num_bytes_sum_rhss	        r8   9kai_get_rhs_packed_stride_rhs_pack_nxk_qsi4cxp_qsu4cxs1s0z]get_kai_packed_weight_size.<locals>.kai_get_rhs_packed_stride_rhs_pack_nxk_qsi4cxp_qsu4cxs1s0  sl     -_QB77
Nq((()U)U)UVVV1_23+, )) r:   c                 L    t          | |          |z  }| ||||          z  S rD   r  )r  r  r  r  r  num_rowsr  s         r8   7kai_get_rhs_packed_size_rhs_pack_nxk_qsi4cxp_qsu4cxs1s0z[get_kai_packed_weight_size.<locals>.kai_get_rhs_packed_size_rhs_pack_nxk_qsi4cxp_qsu4cxs1s0  sA     'q"--3 OO2r2 r:   rp  r   c                    ||z  dk    rt          d| d| d          |	z  dk    rt          d| d	 d          |z  dk    rt          d| d d          t          | |          |z  }| |||||          z  S Nr   bl (z) must be divisible by kr (r{   znr (z+) must be divisible by kai_nr_multiple_of (+) must be divisible by kai_bl_multiple_of (r   r  )
r  r  r  r  r  blr  kai_bl_multiple_of;kai_get_rhs_packed_stride_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0kai_nr_multiple_ofs
          r8   9kai_get_rhs_packed_size_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0z]get_kai_packed_weight_size.<locals>.kai_get_rhs_packed_size_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0  s     G>>()T)T)Tr)T)T)TUUU++11(crccN`ccc   ++11(crccN`ccc   'q"--3 QQ2r2r r:   c                    ||z  dk    rt          d| d| d          |
z  dk    rt          d| d
 d          |z  dk    rt          d| d d           	            } | |          } ||          }|||z  z   z   z  S r  r,  )r  r  r  r  r  num_bytes_multiplier_rhsnum_blocks_per_rownum_bytes_per_blockr  #kai_get_bf16_datatype_size_in_bytesr  kai_num_blocks_per_rowr  kai_num_bytes_per_blockr  s           r8   r  z_get_kai_packed_weight_size.<locals>.kai_get_rhs_packed_stride_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0  s    G>>()T)T)Tr)T)T)TUUU++11(crccN`ccc   ++11(crccN`ccc  
 ,O+N+P+P(%;%;Ar%B%B"&=&=0' '# (+==+,() r:   c                      dS )Nr  r4   r4   r:   r8   r  zGget_kai_packed_weight_size.<locals>.kai_get_bf16_datatype_size_in_bytes  s    qr:   c                 h    |z  dk    rt          d| d d          t          | |          |z  S )Nr   r  r  r{   r  )r  r  r  s     r8   r  z:get_kai_packed_weight_size.<locals>.kai_num_blocks_per_row  sT    ++11(crccN`ccc   #1b))R//r:   c                 R    | z  dk    rt          d|  d d          | dz  |z   S )Nr   r  r  r{   r  r,  )r  r  r  s     r8   r  z;get_kai_packed_weight_size.<locals>.kai_num_bytes_per_block  sO    ++11(crccN`ccc   a#;;;r:   r4   )n_bitsr  K	groupsizekai_nrkai_krkai_srr  r  r  r  r  r  r  r  r  r  r  r  r  s            @@@@@@@@@@@r8   get_kai_packed_weight_sizer    s   {{>>FFF$%!+,(!"8 8 8       
 
 
 
 
 KJ1fff   ^q  Q]a%7%7FFF$%!!"!"!#      .          :  0 0 0 0 0< < < < < ML1fffi  { {^ ! %7%7r:   c                 r    t          j         j        t           j        u  fd           t           j        j                                        r||k    r|j        t           j        k    s-||k     rg|dz  dk    r^||z  dk    rU|j        t           j        k    r@t          d|||          } 
                    t          |          t           j                  S                                  |                                z   }|||                                z  } 
                    |t           j                  S )Nc                      d j          S rV  r   )weightss   r8   rh   z2meta__dyn_quant_pack_4bit_weight.<locals>.<lambda>0  s    >w}>> r:   rp  r   rC  r   )rY   rj   ra   r  r   kleidiair   r]   r  r  r   r   r   )r  scales_zerosrx  
block_sizein_featuresout_featurespacked_weight_sizes   `      r8    meta__dyn_quant_pack_4bit_weightr  *  s6    
L$>>>>   ~++-- M	{	"	"|'9U['H'H$$R1$$j(A--"en44 8|[*
 
   %7!8!8 LLL <+=+=+?+??djjll*/u{CCCr:   c                 T    t          j                                         dk    d            t          j         j        t           j        k    p j        t           j        k    ok     fd                                d          }                     || j                  S )Nr  c                      dS )Nzinput must be a 2D tensorr4   r4   r:   r8   rh   z-meta__dyn_quant_matmul_4bit.<locals>.<lambda>M  s    )D r:   c                  $    dj          d  d S )NzPexpected input to be f32 or bf16 (bf16 requires block_size == in_features), got z with block_size=z and in_features=r   )r  r  inps   r8   rh   z-meta__dyn_quant_matmul_4bit.<locals>.<lambda>Q  s=    Z9Z Z/9Z ZLWZ Z r:   r   r   )rY   rj   r   ra   r  r  r   r   )r  packed_weightsr  r  r  r  s   ` ``  r8   meta__dyn_quant_matmul_4bitr  E  s     
La!D!DEEE	L	em	# 	GI'EJ+,E	
 	
 	
 	
 	
 	
   	A==L	=:::r:   c                     t          j                                         dk    d            t          j         j        t           j        t           j        t           j        fv  fd           t          j                                        dk    d            t          j        j        t           j        u fd                                 	                    d          	                    d           j                  S )Nr  c                      dS r{  r4   r4   r:   r8   rh   z*meta__weight_int8pack_mm.<locals>.<lambda>\  rc  r:   c                      d j          S rT  r   rO  s   r8   rh   z*meta__weight_int8pack_mm.<locals>.<lambda>_  r  r:   c                      dS rk  r4   r4   r:   r8   rh   z*meta__weight_int8pack_mm.<locals>.<lambda>a  rc  r:   c                      d j          S )Nzexpected w to be int8, got r   rR  s   r8   rh   z*meta__weight_int8pack_mm.<locals>.<lambda>d  s    7ag77 r:   r   r   )
rY   rj   r   ra   r  r  r  r  r   r   )r=   r"  q_scaless   `` r8   meta__weight_int8pack_mmr  Z  s    	LA>>???	L	EM5=%.AA????   
LA>>???	L	5:7777   ;;qvvayy!&&))17;;;;r:   c                     t          j                                         dk     fd           t          j                                        dk    fd           t          j                             d                              d          k     fd           t          j        t	          j         j                   fd           t          j        t	          j        j                  fd           t          j        |dk    d	            t          j        d
v fd                                d          }                    d          } j        d d         }j        d d         }t          t          j	        ||                    }|
                    ||g                                |          S )Nr  c                  4    d                                   dS )Nz1cdist only supports at least 2D tensors, X1 got: r  r   x1s   r8   rh   z$meta_cdist_forward.<locals>.<lambda>m      OBFFHHOOO r:   c                  4    d                                   dS )Nz1cdist only supports at least 2D tensors, X2 got: r  r   x2s   r8   rh   z$meta_cdist_forward.<locals>.<lambda>q  r  r:   r   c                  `    d                      d           d                     d           S )Nz4X1 and X2 must have the same number of columns. X1: r   z X2: r   )r  r  s   r8   rh   z$meta_cdist_forward.<locals>.<lambda>u  s2    frwwr{{ffY[Y`Y`acYdYdff r:   c                      d j          S )Nz3cdist only supports floating-point dtypes, X1 got: r   r  s   r8   rh   z$meta_cdist_forward.<locals>.<lambda>y      PbhPP r:   c                      d j          S )Nz3cdist only supports floating-point dtypes, X2 got: r   r  s   r8   rh   z$meta_cdist_forward.<locals>.<lambda>}  r  r:   r   c                      dS )Nz)cdist only supports non-negative p valuesr4   r4   r:   r8   rh   z$meta_cdist_forward.<locals>.<lambda>  s    !L r:   )Nr   r3   r  c                      d  S )Nz(possible modes: None, 0, 1, 2, but was: r4   )compute_modes   r8   rh   z$meta_cdist_forward.<locals>.<lambda>  s    I<II r:   r  )rY   rj   r   r   rS   is_float_dtypera   r   r   broadcast_shapesextendr   )	r  r  r  r  r1r2batch_tensor1batch_tensor2r  s	   `` `     r8   meta_cdist_forwardr  i  s   	L
AOOOO   
L
AOOOO   
L
rwwr{{"fffff   
LRX&&PPPP   
LRX&&PPPP   
LaLLMMM	L'IIII   
B	BHSbSMMHSbSMM.}mLLMMLR!!!<<%%%r:   c                 @   |j         d         }|j         d         }|j         d         }|j         d d         }|j         d d         }	t          t          j        ||	                    }
|
                                }|                    ||g           t          j        |
          }|dk    s|dk    s|dk    s|dk    rt          j        |          S |t          |j                   k    r|	                    |          }t          j
        |t          j                  S )Nr   r  r   r   )r   r   rY   r  copyr  mathprod
zeros_likerf  r   r   )r  r  r  r  cdistc1r  r  r  r  r  tensor1_expand_sizebatch_products                r8   meta_cdist_backwardr    s
    
"B	"B	"BHSbSMMHSbSMM 6}m T TUU.3355Bx(((I233M	Qww"''R1WW(:(:###d28nn,,YY*++Be.EFFFFr:   c	                 r    t          j        j        t           j        t           j        fv fd           t          j        j        t           j        t           j        fv fd           t          j        t          j         j                   fd                               d          }	|rt          j        |	dk    d            |	dz  }	                     |	                     d                    }
t          j        |t          k    d            t          j        j
        dk    fd           t          j                                                                        k    fd	           fd
d fd}t                    dk    r                                        d                    }                                                              }|t          k    r+                    |	                     d                    }n                    d          }n | |
|          }|t          t          fv s|s)                                        d                    }n                    d          }                    |	          }j        d         }|t          k    rC|rt          j        |dk    d            |dz  }                    | j        d                   }n'                    |                                          }|
|||fS )Nc                      d j          S )Nz(expected indices to be long or int, got r   )r   s   r8   rh   z$meta_embedding_bag.<locals>.<lambda>      J7=JJ r:   c                      d j          S )Nz(expected offsets to be long or int, got r   )r  s   r8   rh   z$meta_embedding_bag.<locals>.<lambda>  r  r:   c                      d j          S )Nz/expected weight to be floating point type, got r   )rv  s   r8   rh   z$meta_embedding_bag.<locals>.<lambda>  s    P&,PP r:   r   r3   c                      dS Nz1include_last_offset: numBags should be at least 1r4   r4   r:   r8   rh   z$meta_embedding_bag.<locals>.<lambda>  s    G r:   c                      dS )Nz@embedding_bag: per_sample_weights only supported with mode='sum'r4   r4   r:   r8   rh   z$meta_embedding_bag.<locals>.<lambda>  s    V r:   c                      d j          dS )Nz1expected per_sample_weights to be 1D tensor, got r  rB  )per_sample_weightss   r8   rh   z$meta_embedding_bag.<locals>.<lambda>  s    bHZH_bbb r:   c                  ^    d                                  d                                   dS )Nz%expected per_sample_weights.numel() (z$ to be the same as indices.numel() (r{   r   )r   r  s   r8   rh   z$meta_embedding_bag.<locals>.<lambda>  sF    I8J8P8P8R8R I I6=mmooI I I r:   c                 P     | ||          o|                     d          dk    S Nr   r3   r   )rg  r  r   padding_idxis_fast_path_index_selects       r8   is_fast_path_index_select_scalez;meta_embedding_bag.<locals>.is_fast_path_index_select_scale  s0    %%c6;??XELLQROOWXDX	
r:   c                     | j         t          j        k    s| j         t          j        k    o7|                     d          dk    o|                    d          dk    o|dk     S Nr3   r   )ra   rY   r]   r[   r   )rg  r   r  s      r8   r  z5meta_embedding_bag.<locals>.is_fast_path_index_select  s_    Y%+%@ej)@  

1" a  A%  a		
r:   c                 >    | | |||          S  | ||          S rD   r4   )rg  r  r   r  r  r	  s       r8   is_fast_pathz(meta_embedding_bag.<locals>.is_fast_path  s5    223v{SSS,,S&+FFFr:   r   c                      dS r  r4   r4   r:   r8   rh   z$meta_embedding_bag.<locals>.<lambda>  s    O r:   )rY   rj   ra   r   r   rS   r  r   r   MODE_SUMr   r   r   MODE_MAX	MODE_MEANr   )rv  r   r  scale_grad_by_freqr  sparser  include_last_offsetr  num_bagsr   r  
offset2bagbag_sizemax_indicesfast_path_sumnumBagsr  r	  s   ```   `          @@r8   meta_embedding_bagr    s    
L%*ei00JJJJ   
L%*ei00JJJJ   
LV\**PPPP  
 ||AH MGG	
 	
 	
 	AhA77F%HVV	
 	
 	
 	#q(bbbb	
 	
 	
 	$$&&'--//9    	
 	
 	

 
 
 
 


 
 
G G G G G G 7u$$&&w||A77
$$W\\^^448!++Hfkk!nnEEKK!++A..KK$V-?UUIx(((( **7<<??;;JJ **1--J$$X..-"8" qLOO   1!++GV\!_EEKK!++HMMOO<<K:x44r:   c                     t          | ||g|R  \  }}}}t          |          dk    r'|                    |                                          }||||fS )Nr   )r  r   r   r   )rv  r   r  rU   r   r  r  r  s           r8   meta_embedding_bag_forward_onlyr    sm    0B1#'1 1 1-FJ+ 7u$$$$W\\^^44:x44r:   c                 r    |r|S | j         j        s| j         j        r| j         S |rt          j        S | j         S rD   )ra   r  r   rY   r   )r   ra   promote_int_to_longs      r8   _get_reduction_dtyper     sH     {$ (> {	 z;r:   r   c                    t          | |d          }t          j        | j        |          }t	          | ||          }|                     ||          S )NT)r  r   )r   rS   r  r   r  r   )r   r  r  ra   r'  r  s         r8   meta_nansumr"    sS     (u$OOOLT22D+E4AAL??<|?<<<r:   c           	          t          j        | j        t          t	          |                                                               }|                     |          S rD   )rS   r  r   ri   r   r   r   )r   r  s     r8   meta_medianr$  (  sG    7U5--.. L ??<(((r:   c                    t          |           dk    rt          j        d           t          j        | j        |f          }t          | ||          }|                     |          |                     |t          j                  fS )Nr   zmedian CUDA with indices outputr   )	r   rS   alert_not_deterministicr  r   r  r   rY   r   )r   r   r  r  s       r8   meta_median_mode_dimr'  0  s~     5V##%&GHHH

u{SF
3
3C+E3@@L%%EJ77 r:   c                     | S rD   r4   r   s    r8   meta_logical_not_r)  F  r  r:   c                    t          j        t                    |                                 k    d            t	                    D ]"\  t          j        dk    fd           #t                    |                                 z
  }d|z  t          | j                  z   fdt          t                              D             }|                     |          S )Nc                      dS )NzZNumber of dimensions of repeat dims can not be smaller than number of dimensions of tensorr4   r4   r:   r8   rh   zmeta_repeat.<locals>.<lambda>O  s    l r:   r   c                      d d  S )Nz"Repeats cannot be negative, found r  r4   )r   reps   r8   rh   zmeta_repeat.<locals>.<lambda>T  s    KKKKK r:   rB  c                 2    g | ]}|         |         z  S r4   r4   )rP   r   padded_sizer  s     r8   rR   zmeta_repeat.<locals>.<listcomp>[  s&    LLL1;q>GAJ.LLLr:   )	rY   rj   r   r   r  ri   r   r   r   )r   r  num_new_dimensionstarget_sizer   r/  r-  s    `  @@@r8   meta_repeatr2  K  s    	LG

"ll   G$$ 
 
31HKKKKK	
 	
 	
 	
 W

2++eDJ.?.??KLLLLLc'll8K8KLLLK>>+&&&r:   c                     | S rD   r4   r   s    r8   
meta_zero_r4  _  r  r:   c                 n    t          |t          j                  rt          | j        |j                   | S rD   )rs   rY   r   rl   r   r   r   s     r8   meta_binop_inplacer7  d  s1     %&& 9
EK888Kr:   c                    d }d }d } ||           r ||          rt          d           ||           r ||          st          d          t          |t          j                  rt	          | j        |j                   | S )a*  
    Some checks for inplace ops.
    Checks for promotion rules for some dtypes.
    int.add/sub_(float) and bool.add/sub_(others) are rejected.
    Promoting in these in-place operations would require reallocating
    and copying over elements, hence not allowed.
    Checks for alpha param.
    c                     t          | t                    rt          j        | j                  S t          | t
                    S rD   )rs   r   rS   ry  ra   r   ru   s    r8   is_integericz.meta_binop_inplace_alpha.<locals>.is_integeric  s7    c:&& 	,)#)444c7+++r:   c                     t          | t                    rt          j        | j                  S t          | t
                    S rD   )rs   r   rS   r  ra   r   r:  s    r8   
is_floaticz,meta_binop_inplace_alpha.<locals>.is_floatic  s7    c:&& 	.'	222c9---r:   c                     t          | t                    rt          j        | j                  S t          | t
                    S rD   )rs   r   rS   is_boolean_dtypera   r   r:  s    r8   is_booleanicz.meta_binop_inplace_alpha.<locals>.is_booleanic  s7    c:&& 	-)#)444c8,,,r:   z]Promotion of int.add/sub_(float) in in-place ops are not possible due to element size change.z_Promotion of book.add/sub_(others) in in-place ops are not possible due to element size change.)rc  rs   rY   r   rl   r   )r   r   r  r;  r=  r@  s         r8   meta_binop_inplace_alpharA  u  s    $, , ,. . .- - - |D 
jj// 
k
 
 	

 |D 
,,u"5"5 
m
 
 	
 %&& 9
EK888Kr:   c                 :    t          | |t          j                  S Nr  rW   r   rT   r   r   r  s      r8   meta_binop_alpharF    s&     e$C$K   r:   c                 8    t          | t          j                  S rC  rD  )r   rA  s     r8   
meta_roundrH    s"    <D   r:   c                 \    t          j        t          j        j                   fd           t          t           j                  r2t          j        t          j        j                   fd           d S t          j        t          t                     fd           d S )Nc                        dj          S )Nz7: Expected input tensor to have an integral dtype. Got r   )r  r   s   r8   rh   z#shift_dtype_check.<locals>.<lambda>  s    7__SWS]__ r:   c                        dj          S )Nz6: Expected shift value to have an integral dtype. Got r   r  r  s   r8   rh   z#shift_dtype_check.<locals>.<lambda>  s    waaVYV_aa r:   c                        d S )Nz): Expected shift value to be an int. Got r4   rL  s   r8   rh   z#shift_dtype_check.<locals>.<lambda>  s    wNNNN r:   )rY   rj   rS   ry  ra   rs   r   r   )r  r   r  s   ```r8   shift_dtype_checkrN    s    	Ltz**_____   #u|$$ 	
"39--aaaaa	
 	
 	
 	
 	

 	sG$$NNNNN	
 	
 	
 	
 	
r:   c                 \    t          d| |           t          | |t          j                  S )Nrshiftr  rN  rW   r   rT   r6  s     r8   meta_rshiftsrR    6    he,,,e$C$K   r:   c                 \    t          d| |           t          | |t          j                  S )Nlshiftr  rQ  r6  s     r8   meta_lshiftsrV    rS  r:   c                 6    |                      | j                  S rD   r  r   s    r8   	meta_zerorX    s    >>$*%%%r:   c                     | S rD   r4   r   r  s     r8   
meta_fill_r[    r  r:   c                 *    t          j        |           S rD   rX  rZ  s     r8   	meta_fillr]        D!!!r:   c                     | S rD   r4   r   s    r8   
meta_relu_r`    r  r:   c                 :    t          | |t          j                  S rC  rD  rE  s      r8   meta__add_relurb    s&     e$C$K   r:         ?UUUUUU?c                 *    t          j        |           S rD   rX  r   noiselowerr%  r  r  s         r8   meta_rrelu_with_noiseri    s    
 D!!!r:   c                 R    t          j        |           t          j        |          fS rD   rX  rf  s         r8    meta_rrelu_with_noise_functionalrk    s%     D!!5#3E#:#:::r:   c                     | S rD   r4   )r   rh  r%  r  r  s        r8   meta_rrelu_with_noise_rm  	  s	     Kr:   c                 *    t          j        |           S rD   rX  r   r   r   
accumulates       r8   meta_index_putrq    r^  r:   c                 :    t          | j        |j                   | S rD   rl   r   )r   r  values      r8   meta_masked_fill_ru    s    DJ
333Kr:   c                     |                      |                                                               t          j        |                     }|S r   )r   r   rd  rS   r   )r   r  r  masked_scales       r8   meta__masked_scalerx    sF    >>$))++..111$77 2  L r:   c                      t          j        |j        t           j        t           j        fv d            t          j         j        j        k     fd            S )Nc                      dS )NzMask must be bool or uint8r4   r4   r:   r8   rh   z&meta_masked_scatter_.<locals>.<lambda>&  s    9U r:   c                  (    d j          dj          S )NzEmasked_scatter: expected self and source to have same dtypes but got r   r   )r   r  s   r8   rh   z&meta_masked_scatter_.<locals>.<lambda>*  s(     :*: :+1<: : r:   )rY   rj   ra   r.  r  )r   r  r  s   ` `r8   meta_masked_scatter_r|  #  ss    	L
uz5;//1U1U   
L
fl"	: 	: 	: 	: 	:  
 Kr:   c                     t          | |          \  } }t          j        | t          j                  }t	          |||          S r   )r&   rY   r   r   r|  )r   r  r  r   s       r8   meta_masked_scatterr~  0  sA     "$--JD$d%2IJJJFf555r:   c                 ,    |                      |          S rD   r  )r   r  r  s      r8   meta_masked_scatter_backwardr  8  s    >>%   r:   c                     | S rD   r4   ro  s       r8   meta_index_put_r  =  r  r:   c           	      D    ddl m}m} t          j                                         dk    d            t          j                                        dk    d            t          j         j        j        k     fd                                            }                                |d         |d         |d         }d         }	||	ft          j         | |d                    |d                             fd	           |r j        t          j        k    p j        t          j	        k    o|t          j
        k    }
t          j        | j        k    p|
d
                                                              |          }n                              }|scat          j                                        dk    d            t          j         |                                          fd           |S )Nr   )sym_andr  r0   c                      dS r  r4   r4   r:   r8   rh   z)common_meta_baddbmm_bmm.<locals>.<lambda>E  r  r:   c                      dS r  r4   r4   r:   r8   rh   z)common_meta_baddbmm_bmm.<locals>.<lambda>F  r  r:   c                  (    d j          dj          S )Nzexpected scalar type z but found r   r;  s   r8   rh   z)common_meta_baddbmm_bmm.<locals>.<lambda>I  s    OOOOO r:   r  r3   c            	      :    d d d d          d d          d	S r  r4   r  s   r8   rh   z)common_meta_baddbmm_bmm.<locals>.<lambda>W  sU     RSU R RR R+7?R R>J1oR R R r:   c                      dS )Nzfout_dtype only supported for torch.float32 output with float16/bfloat16 inputs or same as input dtypesr4   r4   r:   r8   rh   z)common_meta_baddbmm_bmm.<locals>.<lambda>`  s    | r:   c                      dS )Nzself must be a 3D tensorr4   r4   r:   r8   rh   z)common_meta_baddbmm_bmm.<locals>.<lambda>h  s    6P r:   c                  8    d  d                                  S )Nz*Expected an input tensor shape with shape z but got shape: r   )r  self_baddbmms   r8   rh   z)common_meta_baddbmm_bmm.<locals>.<lambda>k  s%    sss^j^o^o^q^qss r:   )ra  r  r  rY   rj   r   ra   r   r  r  r  r   rd  )r  r  is_bmmr  rz  r  r  r  res_rowsres_colssupported_out_dtyper   r  r  r  r  s   `` `        @@@@r8   common_meta_baddbmm_bmmr  B  sb   EEEEEEEE	L"$H$HIII	L"$H$HIII	L$OOOOO  
 ;;==L;;==L	aB#AAHAHx*K	L|A++VVLOEU-V-VWW	R 	R 	R 	R 	R 	R  
  /LEM)KV\U^-K)5=( 	 	%<)<||	
 	
 	
 !!+..11)<< !!+.. 
l.\%%''1,.P.PQQQF<$$&&44sssss	
 	
 	

 Mr:   c                 $    t          | |d          S )NTr  )r   r  s     r8   meta_bmmr  q  s     #4t444r:   c                 (    t          | |d|          S )NT)rz  r  )r   r  rz  s      r8   meta_bmm_dtyper  w  s     #4tyIIIIr:   c                     | |z  }| |z  }|dk    r-t          |dk               t          |dk               k    r|dz  }|S r  )r.  )r=   r>   qr  s       r8   div_rtnr  }  sK    	QA	AA 	Avv4A;;$q1u++--	QHr:   c                     t          | |z   |z   ||dz
  z  z
  dz
  |r|dz
  ndz   |          dz   }|r|dz
  |z  | |z   k    r|dz  }|S r  )r  )	inputSize
kernelSizer  r  r   r  rH  
outputSizes           r8   pooling_output_shape_pad_lrr    s     	 *q.)* 	
 '-vzzA/ 	
 	
 		   Nf$	E(999!OJr:   c           	          t          j        |dk    d            t          j        dk    fd           t          j        dz
  z  dz   dz  k    fd           t          | ||          S )Nr   c                      dS )Nzstride should not be zeror4   r4   r:   r8   rh   z&pooling_output_shape.<locals>.<lambda>  s    &A r:   c                      d  S )Nz'pad must be non-negative, but got pad: r4   pads   r8   rh   z&pooling_output_shape.<locals>.<lambda>  s    #RS#R#R r:   r3   r  c                      d d d  S )NzApad should be at most half of effective kernel size, but got pad=z, kernel_size=z and dilation=r4   )r  r  r  s   r8   rh   z&pooling_output_shape.<locals>.<lambda>  s;    @PS @ @%@ @5=@ @ r:   )rY   rj   r  )r  r  r  r   r  rH  s    `` ` r8   r}  r}    s    	L1AABBB	LRRRRSSS	La8+a/A55	
 	
 	
 	
 	
 	
   ':sC9  r:   c           	      j   	
                                   }	t          j        dk    odk    fd           t          j        dk    odk    fd           t          j        dk    odk    fd                                d          dk    o                     d          dk    }|t          j        k    r8t          j        |dk    o|o                     d          dk     fd	           nXt          j        |dk    r                     d          dk    r|p |dk    o|o                     d          dk     fd
           t          j        dz  k    odz  k    fd           t          j        dk    odk    
	fd           d S )Nr   c                      d  d S )Nz5kernel size should be greater than zero, but got kH: , kW: r4   )r  r  s   r8   rh   z$pool2d_shape_check.<locals>.<lambda>  s    VVVRTVV r:   c                      d  d S )Nz0stride should be greater than zero, but got dH: , dW: r4   )r  r  s   r8   rh   z$pool2d_shape_check.<locals>.<lambda>  s    Q2QQRQQ r:   c                      d  d S )Nz9dilation should be greater than zero, but got dilationH: , dilationW: r4   )	dilationH	dilationWs   r8   rh   z$pool2d_shape_check.<locals>.<lambda>  s    oIoodmoo r:   r3   r  rC  r0   c                  2    d                                   S )NzExpected 4D (batch mode) tensor expected for input with channels_last layout with optional 0 dim batch size for input, but got: r   r,  s   r8   rh   z$pool2d_shape_check.<locals>.<lambda>  s"     RCH::<<R R r:   c                  2    d                                   S )NzYExpected 3D or 4D (batch mode) tensor with optional 0 dim batch size for input, but got: r   r,  s   r8   rh   z$pool2d_shape_check.<locals>.<lambda>  s    ~pupzpzp|p|~~ r:   c                       d d d d  S )NzKpad should be smaller than or equal to half of kernel size, but got padW = z	, padH = z, kW = z, kH = r4   )r  r  r  r  s   r8   rh   z$pool2d_shape_check.<locals>.<lambda>  s>     >> >!%> >.0> >9;> > r:   c                  .    d d  d d d d dS NzGiven input size: (r=   z). Calculated output size: (z). Output size is too smallr4   )r  r  rI  r  rJ  rK  s   r8   rh   z$pool2d_shape_check.<locals>.<lambda>  sZ     #k # #K # #* # #$0# #3?# #BM# # # r:   )r   rY   rj   r   r  )r   r  r  r  r  r  r  r  r  rI  r  r  rJ  rK  r   r   
valid_dimsr  s   ``````````````   @r8   r~  r~    sc   " 99;;DL	L
Q26VVVVV   
L
Q26QQQQQ   
LA')a-ooooo  
 A!#:

1(:J+++AI;*;A!);R R R R	
 	
 	
 	
 	QY<5::a==A-<* A	?j?UZZ]]a-?~~~~	
 	
 	
 
L
a4+B!GtO	> 	> 	> 	> 	> 	> 	>   
Lq.\Q.	# 	# 	# 	# 	# 	# 	# 	# 	#    r:   r  r  r  r  r  r  r  pTpHpW	dilationTr  r  r  r  r  r  r  r  r  c           
      8   	
  j         }t          j        dk    odk    odk    fd           t          j        dk    odk    odk    fd           t          j        dk    odk    odk    fd           t          j        |dv  fd           t          |          D ]@|dk    rdk    rt          j                                       dk     fd           A|r-t          j        k    ok    ok    fd	           t          j        d
z  k    od
z  
k    od
z  	k    	
fd           t          j        dk    odk    odk    fd           d S )Nr   c                      d d  d S )Nz5kernel size should be greater than zero, but got kT: z, kH: r  r4   )r  r  r  s   r8   rh   z$pool3d_shape_check.<locals>.<lambda>  s4    ,, ,, ,'), , r:   c                      d d  d S )Nz0stride should be greater than zero, but got dT: z, dH: r  r4   )r  r  r  s   r8   rh   z$pool3d_shape_check.<locals>.<lambda>  s$    WrWWWWSUWW r:   c                      d d  d S )Nz9dilation should be greater than zero, but got dilationT: z, dilationH: r  r4   )r  r  r  s   r8   rh   z$pool3d_shape_check.<locals>.<lambda>  s;    V#V V2;V VJSV V r:   r  c                        dj          S )Nz/: Expected 4D or 5D tensor for input, but got: r-  )r  r   s   r8   rh   z$pool3d_shape_check.<locals>.<lambda>   s    7XX5;XX r:   rh  c                  J      dj          d                               dS )NzZ: Expected input's non-batch dimensions to have positive length, but input has a shape of z and non-batch dimension z has length zero!)r   r   )r  r   r   s   r8   rh   z$pool3d_shape_check.<locals>.<lambda>)  sG     M M-2[M M,1JJqMMM M M r:   c                  .    d d  d d d d dS )Nzinput image (T: rp  rL  z ) smaller than kernel size (kT:  kH:  kW: r{   r4   )r  r  r  r  r  r  s   r8   rh   z$pool3d_shape_check.<locals>.<lambda>3  s]    <5 < <g < <6 < <$&< <-/< <68< < < r:   r  c                  ,    d d d  d d d S )NzHpad should be smaller than or equal to half of kernel size, but got kT: r  r  z padT: z padW: z padH: r4   )r  r  r  r  r  r  s   r8   rh   z$pool3d_shape_check.<locals>.<lambda>;  sh    KK KK K%'K K02K K;=K KFHK K r:   r3   c                  :    d d d  d d d d d dS r  r4   )r  r  r  r  r  r  r  s   r8   rh   z$pool3d_shape_check.<locals>.<lambda>C  sv    (' ( (E ( (G ( (f ( ((/( (27( (:A( (DJ( ( ( r:   )r   rY   rj   r   r   )r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r   s   `````````````````````  @r8   r  r    s   0 :D	L
Q$26$b1f	
 	
 	
 	
 	
 	
   
L
Q$26$b1f	
 	
 	
 	
 	
 	
   
LA9)a-9IM	
 	
 	
 	
 	
 	
   
LXXXXX  
 4[[ 
 
199aJJqMMA     	
 	
 	
 	
  
RK:GrM:fl        	
 	
 	
 
L
Q"6a26"q&B,	
 	
 	
 	
 	
 	
 	
 	
 	
   
L
3v{3w!|	
 	
 	
 	
 	
 	
 	
 	
 	
 	
    r:   c                    | j         }t          | |||||||	|
||||||||||||           t          |||dz
  |           t          |||dz
  |           t          |||dz
  |           t          |||dz
  |           t          |||dz
  |           t          |||dz
  |           t          |||dz
  |           t          |||dz
  |           d S )NrC  r0   r  r3   r   r  ra  )r   r8  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   s                           r8   max_pool3d_backward_shape_checkr  K  s$   2 :D








+  0 ;dQh888;dQh666;dQh888;dQh7777D$(G4447D$(E2227D$(G4447D$(F33333r:   c                    | j         }t          | ||||||||	|
|ddd|||||||d           t          |||dz
  |           t          |||dz
  |           t          |||dz
  |           t          |||dz
  |           d S )Nr3   TrC  r0   r  r  )r   r8  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   s                       r8   r  r    s    * :D








			-  2 ;dQh888;dQh666;dQh888;dQh77777r:   c                 P   d } |d|          \  }}t          j        t          |          dv d            t          |          dk    r||}
}	n |d|          \  }	}
 |d|          \  }} |d|          \  }}|                     d	          }|                     d
          }|                     d          }t	          j        |           }|t           j        k    r-t          j        |                                 dk    d            nQ|t           j        k    r+t          j        |                                 dv d            nt          j        dd            t          ||||	||          }t          ||||
||          }t          | |||	|
||||||||||           |||fS )Nc                      t          j        t          |          dv  fd           |d         }t          |          dk    r|n|d         }||fS )Nrf  c                      d  dS )Nzmax_pool2d: rh  r4   ri  s   r8   rh   zEmax_pool2d_checks_and_compute_shape.<locals>.unpack.<locals>.<lambda>  rj  r:   r   r3   rk  rl  s   `   r8   ro  z3max_pool2d_checks_and_compute_shape.<locals>.unpack  rp  r:   r  rq  c                      dS )NzOmax_pool2d: stride must either be omitted, a single int, or a tuple of two intsr4   r4   r:   r8   rh   z5max_pool2d_checks_and_compute_shape.<locals>.<lambda>  rt  r:   r   r   r  r  rz  r  r   rC  c                      dS )NzMnon-empty 4D (batch mode) tensor expected for input with channels_last layoutr4   r4   r:   r8   rh   z5max_pool2d_checks_and_compute_shape.<locals>.<lambda>  s    c r:   r  c                      dS )Nz9non-empty 3D or 4D (batch mode) tensor expected for inputr4   r4   r:   r8   rh   z5max_pool2d_checks_and_compute_shape.<locals>.<lambda>      O r:   Fc                      dS )NzAUnsupported memory format. Supports only ChannelsLast, Contiguousr4   r4   r:   r8   rh   z5max_pool2d_checks_and_compute_shape.<locals>.<lambda>  s    W r:   )rY   rj   r   r   rS   r   r  r   r   r}  r~  )r   r  r   r  r  rH  ro  r  r  r  r  r  r  r  r  rI  r  r  r   rJ  rK  s                        r8   rF  rF    s      VM;//FB	LFy aa   6{{aRB&))B	7++JD$!6*h77Iy**R..K**R..KBJ/66M+++IIKK1cc	
 	
 	
 	
 
%1	1	1IIKK6!OO	
 	
 	
 	

 	WW	
 	
 	

 (Rr9iXXL&z2tRIVVK



  $ k11r:   c                 P    t          |||||          \  }t          j        j         j        k     fd           |j        fd}	 |	             |	|           t          j                  }
t          j        j        j        j	        |
          S )Nc                  (    dj          d j          S )NzExpected dtype z  for `gradOutput` but got dtype r   r  s   r8   rh   z7meta_max_pool2d_with_indices_backward.<locals>.<lambda>!  s    a$*aakN_aa r:   c                     t          | dz
             t          | dz
             t          | dz
             d S )Nr0   r  r3   )ra  )r  r  r   rJ  rK  s    r8   _check_dim_sizez>meta_max_pool2d_with_indices_backward.<locals>._check_dim_size'  sQ    q$q,777q$q,777q$q+66666r:   r  )
rF  rY   rj   ra   r   rS   r   r   r   r   )r8  r   r  r   r  r  rH  r   rI  r  r   r  r   rJ  rK  s   ``         @@@@r8   %meta_max_pool2d_with_indices_backwardr    s      	,k67Hi	 		
 
L
k''aaaaa  
 L9D7 7 7 7 7 7 7 7
 OK   OG/55M;
j{#	   r:   c                    t          | |||||          \  }}}|                                 dk    r|                     d          nd}	t          j        |           }
|                                 dk    r|||g}n|	|||g}t          j        || j        | j        |
          t          j        |t
          j	        | j        |
          fS rD  )
rF  r   r   rS   r   rY   r   ra   r   r   rG  s               r8   meta_max_pool2d_with_indicesr  8  s     	,{FGXy	 		
  %yy{{a//UZZ^^^QF/66Myy{{a\;7\;?+<'		
 	
 	
 	+<'		
 	
 	
 r:   c                    
 t          j         j        dv  fd            j        }t          |dz
  |          D ]2
t          j                             
          dk    
 fd           3t          j        t                    dk    d            t          j        t          |          dk    d                                 d	          }                     d
                               d          |dk    r                     d          }nd}t          j         j        j        k    d            t          j        j        dk    fd                               d          }                    d          }                    d          
t          j        ||k    d            t          j        ||k    d            t          j        
dk    
fd           t          j        |d         d         z   dz
  k    fd           t          j        |d         d         z   dz
  k    fd                                            dk    r|||d         |d         g}	n||d         |d         g}	t          j        |	 j         j	                  t          j        |	t           j
         j	                  fS )Nr  c                      d j          S )Nz:fractional_max_pool2d: Expected 3D or 4D tensor, but got: rB  r   s   r8   rh   z,meta_fractional_max_pool2d.<locals>.<lambda>c  s    XTYXX r:   r0   r   c                  :    d                                  d  dS )Nz_fractional_max_pool2d: Expected input to have non-zero  size for non-batch dimensions, but got r  z emptyr   )r   r   s   r8   rh   z,meta_fractional_max_pool2d.<locals>.<lambda>j  s7     ^7;yy{{^ ^TU^ ^ ^ r:   r  c                      dS )NzNfractional_max_pool2d: kernel_size musteither be a single int or tuple of Intsr4   r4   r:   r8   rh   z,meta_fractional_max_pool2d.<locals>.<lambda>q  	     2 r:   c                      dS )NzOfractional_max_pool2d: output_size must either be a single int or tuple of Intsr4   r4   r:   r8   rh   z,meta_fractional_max_pool2d.<locals>.<lambda>v  r  r:   rz  r  r   rC  r3   c                      dS )Nz6Expect _random_samples to have the same dtype as inputr4   r4   r:   r8   rh   z,meta_fractional_max_pool2d.<locals>.<lambda>  s    H r:   c                      d j          S )Nz1Expect _random samples to have 3 dimensions got, rB  )random_sampless   r8   rh   z,meta_fractional_max_pool2d.<locals>.<lambda>  s    YNDWYY r:   c                      dS )Nz=Expect _random_samples.size(0) no less then input batch size.r4   r4   r:   r8   rh   z,meta_fractional_max_pool2d.<locals>.<lambda>  r   r:   c                      dS )Nz<Expect _random_samples.size(1) equals to input channel size.r4   r4   r:   r8   rh   z,meta_fractional_max_pool2d.<locals>.<lambda>      N r:   c                      d  dS )Nz/Expect _random_samples.size(2) equals to 2 got .r4   )r   s   r8   rh   z,meta_fractional_max_pool2d.<locals>.<lambda>  s    !WST!W!W!W r:   c                       dd          d  S )Nz%fractional_max_pool2d: kernel height r   z' is too large relative to input height r4   )input_heightr  s   r8   rh   z,meta_fractional_max_pool2d.<locals>.<lambda>  s    }A}}o{}} r:   c                       dd          d  S )Nz$fractional_max_pool2d: kernel width r3   z& is too large relative to input width r4   )input_widthr  s   r8   rh   z,meta_fractional_max_pool2d.<locals>.<lambda>  s    z{1~zzmxzz r:   r  )rY   rj   r   r   r   r   ra   r   r   r   r   )r   r  r  r  r   input_channelsinput_batchr  cr   r   r  r  s   `` `      @@@r8   meta_fractional_max_pool2dr  _  sQ   	L	VXXXX   9D4!8T"" 
 
IIaLL1^ ^ ^ ^ ^	
 	
 	
 	
 
LKA	2 	2  
 
LKA	2 	2   YYr]]N99R==L))B--Kqyyiill	L
n**HH   
Lq YYYY  
 	AAAAAA	L	[OO   
L	^NN   
LaWWWWXXX	LAQ'!+|;}}}}}   
LAQ'!+{:zzzzz  
 xxzzQ^[^[^LAA? 	*;	
 	
 	

 	+;	
 	
 	
 r:   c                    t          j        t          |          dv d            |d         }t          |          dk    r|n|d         }t          |          dk    r|n|d         }t          j        | pt          |          dv d            |s|n|d         }	|s|nt          |          dk    r|	n|d         }
|s|nt          |          dk    r|	n|d         }t          j        t          |          dv d            |d         }t          |          dk    r|n|d         }t          |          dk    r|n|d         }t          j        t          |          dv d            |d         }t          |          dk    r|n|d         }t          |          dk    r|n|d         }t          j        | j        d	v d
            | j        dk    r|                     d          nd}|                     d          }|                     d          }|                     d          }|                     d          }t          ||||	||          }t          ||||
||          }t          ||||||          }t          | |||||	|
|||||||||||||d           | j        dk    o!t          j        |           t           j	        k    }| j        dk    r||||f}n|||||f}| 
                    |          }| 
                    |t           j                  }|r@|                    t           j	                  }|                    t           j	                  }||fS )Nr  c                      dS NzMmax_pool3d: kernel_size must either be a single int, or a tuple of three intsr4   r4   r:   r8   rh   z.meta_max_pool3d_with_indices.<locals>.<lambda>      _ r:   r   r3   r  c                      dS NzQmax_pool3d: stride must either be omitted, a single int, or a tuple of three intsr4   r4   r:   r8   rh   z.meta_max_pool3d_with_indices.<locals>.<lambda>      c r:   c                      dS NzImax_pool3d: padding must either be a single int, or a tuple of three intsr4   r4   r:   r8   rh   z.meta_max_pool3d_with_indices.<locals>.<lambda>      [ r:   c                      dS NzJmax_pool3d: dilation must be either a single int, or a tuple of three intsr4   r4   r:   r8   rh   z.meta_max_pool3d_with_indices.<locals>.<lambda>  r  r:   r  c                      dS r  r4   r4   r:   r8   rh   z.meta_max_pool3d_with_indices.<locals>.<lambda>  r  r:   rh  rE  rz  r  r   zmax_pool3d_with_indices()rC  r   r   )rY   rj   r   r   r   r}  r  rS   r   r  r   r   rd  )r   r  r   r  r  rH  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r   r   s                                 r8   meta_max_pool3d_with_indicesr    s    
LKF"__   
QB;1$$+a.B;1$$+a.B	L
+c&kkV+cc   	(vayB	Fc&kkQ&6&6F1IB	Fc&kkQ&6&6F1IB	LG[[   
B7||q  gajB7||q  gajB	LH\\   I ]]a//		Xa[I ]]a//		Xa[I	L
fKK  
  %zQUZZ^^^AFjjnnGJJrNNEjjnnGZZ^^F BIyIIE"7BB	9MMG!&"b"iKKF








#+  4 	
aXE7>>%BXX  zQeWf5		WeWf=	
//)
$
$Cooiu{o;;G Cff5#9f::**5+A*BB<r:   c                    t          j        t          |          dv d            |d         }t          |          dk    r|n|d         }	t          |          dk    r|n|d         }
t          j        | pt          |          dv d            |s|n|d         }|s|	nt          |          dk    r|n|d         }|s|
nt          |          dk    r|n|d         }t          j        t          |          dv d            |d         }t          |          dk    r|n|d         }t          |          dk    r|n|d         }t          j        t          |          dv d            |d         }t          |          dk    r|n|d         }t          |          dk    r|n|d         }t          j        |j        d	v d
            |                    d          }|                    d          }|                    d          }|                    d          }|                     d          }|                     d          }|                     d          }t          || ||||	|
|||||||||||||||d           |j        dk    o!t          j        |          t           j        k    }|	                    |j
                  }|r |                    t           j                  }|S )Nr  c                      dS r  r4   r4   r:   r8   rh   z7meta_max_pool3d_with_indices_backward.<locals>.<lambda>&  r  r:   r   r3   r  c                      dS r  r4   r4   r:   r8   rh   z7meta_max_pool3d_with_indices_backward.<locals>.<lambda>.  r  r:   c                      dS r  r4   r4   r:   r8   rh   z7meta_max_pool3d_with_indices_backward.<locals>.<lambda>6  r  r:   c                      dS r  r4   r4   r:   r8   rh   z7meta_max_pool3d_with_indices_backward.<locals>.<lambda>>  r  r:   r  c                      dS r  r4   r4   r:   r8   rh   z7meta_max_pool3d_with_indices_backward.<locals>.<lambda>F  r  r:   rE  rz  r  r   z"max_pool3d_with_indices_backward()rh  r   )rY   rj   r   r   r   r  rS   r   r  r   r   rd  )r8  r   r  r   r  r  rH  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r;  s                                r8   %meta_max_pool3d_with_indices_backwardr    s4    
LKF"__   
QB;1$$+a.B;1$$+a.B	L
+c&kkV+cc   	(vayB	Fc&kkQ&6&6F1IB	Fc&kkQ&6&6F1IB	LG[[   
B7||q  gajB7||q  gajB	LH\\   I ]]a//		Xa[I ]]a//		Xa[I	L
fKK  
 jjnnGJJrNNEjjnnGZZ^^FR  Er""Gb!!F#








,/  8 	
aXE7>>%BXX  --J I]]1G]HH
r:   gridc                 "    t          j         j        j        k     fd           t          j         j        t           j        k    oj        t           j        k     fd           t          j         j        d         j        d         k     fd           t          j        j        d          j        dz
  k     fd           t          d j                  D ]*t          j         j                 dk     fd           +d S )	Nc                  (    dj          d j          S )NzNgrid_sampler(): expected input and grid to be on same device, but input is on z and grid is on r@  r  r   s   r8   rh   z+check_grid_sampler_common.<locals>.<lambda>|  s0    A\A A37;A A r:   c                  (    dj          d j          S )NzTgrid_sampler(): expected input and grid to have torch.strided layout, but input has z and grid has )r   r  s   r8   rh   z+check_grid_sampler_common.<locals>.<lambda>  s0    CC C59[C C r:   r   c                  (    dj          d j          S )NzZgrid_sampler(): expected grid and input to have same batch size, but got input with sizes  and grid with sizes r-  r  s   r8   rh   z+check_grid_sampler_common.<locals>.<lambda>  s0    O %O OBF*O O r:   r   r  c                  .    dj         dz
   d j         S )Nz+grid_sampler(): expected grid to have size r  z, in last dimension, but got grid with sizes )r   r   r  s   r8   rh   z+check_grid_sampler_common.<locals>.<lambda>  s.    ?%*q. ? ?26*? ? r:   c                       dj          d  dS )NzYgrid_sampler(): expected input to have non-empty spatial dimensions, but input has sizes r  r  r-  r  s   r8   rh   z+check_grid_sampler_common.<locals>.<lambda>  r  r:   )rY   rj   r   r   r  r   r   r   )r   r  r   s   ``@r8   check_grid_sampler_commonr  y  st   	L#	
 	
 	
 	
 	
   
L%F$+*F	
 	
 	
 	
 	
   
LA$*Q-'	
 	
 	
 	
 	
   
L
2%*q.(	
 	
 	
 	
 	
   1ej!! 
 
KNQ    	
 	
 	
 	

 
r:   c                       e Zd ZdZdZdZdS )GridSamplerInterpolationr   r3   r  N)r}   
__module____qualname__BILINEARNEARESTBICUBICr4   r:   r8   r  r    s        HGGGGr:   r  interpolation_modec                      t          j         j        dk    o j        j        k     fd           t          j         j        dk    o|t          j        j        k     d            d S )Nrh  c                  (    dj          d j          S )Nzdgrid_sampler(): expected 5D input and grid with same number of dimensions, but got input with sizes r  r-  r  s   r8   rh   z'check_grid_sampler_3d.<locals>.<lambda>  s+    149K1 1$(J1 1 r:   c                      dS )Nz<grid_sampler(): bicubic interpolation only supports 4D inputr4   r4   r:   r8   rh   z'check_grid_sampler_3d.<locals>.<lambda>  r  r:   )rY   rj   r   r  r  rt  )r   r  r  s   `` r8   check_grid_sampler_3dr    s    	L
a3EJ$)3	
 	
 	
 	
 	
   
LJ!O M"&>&F&LL	
 	ON    r:   c                     |d         }|r!t          j        |t           j                  }nd }t          j        |t           j                  }	||	fS Nr   r   )rY   r  r   r   
r8  r   r  r  padding_modealign_cornersr2  input_requires_gradr;  	grad_grids
             r8   grid_sampler_2d_backward_metar    sX     &a. %e5;RSSS


 U5LMMMI	""r:   c                     t          | |           t          | ||           | j        d         }| j        d         }|j        d         }|j        d         }|j        d         }	|                     |||||	f          S )Nr   r3   r  r0   )r  r  r   r   )
r   r  r  r  r  r  Cout_Dout_Hout_Ws
             r8   grid_sampler_3dr%    s{     eT***%'9:::AAAAJqMEJqMEJqME??Aq%6777r:   r  c                     t          ||           t          |||           |d         }|r!t          j        |t          j                  }nd }t          j        |t          j                  }	||	fS r  )r  r  rY   r  rJ  r   r  s
             r8   grid_sampler_3d_backwardr'    s     eT***%'9:::%a. %!?
 
 


 
 U5STTTIy  r:   c                     |                     d          }|st          j        |          }||d<   t          j        | g|R i |S )Nra   )r`   rS   	get_dtyperY   r   )r   r  rU   rA  ra   s        r8   fullr*    sS    JJwE ,
++F7O;t-d---f---r:   c                 n   |t           j        k    rt          j        |d u d            t          j        d|| j        n|||| j        n||          }| j        rN|                    |                                 | 	                                | 
                                           n;|                    |                                 |                                 d           |                    d           |S t          j                            | |||||          }|                    d           |S )Nc                      dS )Nz9memory format option is only supported by strided tensorsr4   r4   r:   r8   rh   zzeros_like.<locals>.<lambda>  r  r:   r   r  Tr  )rY   
sparse_coorj   r   ra   r   	is_sparsesparse_resize_and_clear_r   
sparse_dim	dense_dimr   _coalesced_r-   r   defaultfill_)r   ra   r   r   r   r   r  s          r8   r  r    sA    !!!T!OO	
 	
 	

 k %$**5"(.4;;f!
 
 
 > 	E((		T__..0@0@    ((dhhjj!DDD

/
!
!# "  C IIaLLLJr:   r   c                    |t          j                    }|t          j                    }|t           j        }t          j        | ||||          S r  rY   r   get_default_devicer  r   r   ra   r   r   r   r   s         r8   	meta_onesr9  /  Z     }'))~)++~;E&J   r:   c                    |t          j                    }|t          j                    }|t           j        }t          j        | ||||          S r  r6  r8  s         r8   
meta_zerosr<  E  r:  r:   c                      t          |           S rD   _scatter_meta_output)r   rg  r   r   s       r8   meta_select_scatterr@  [      %%%r:   c                      t          |           S rD   r>  )r   rg  r   r   r~   steps         r8   meta_slice_scatterrD  `  rA  r:   c                     ddl m}  ||           s,t          j        |           dk    r|                                 S t          j        |           S )Nr   r_  r3   )ra  r`  rY   rb  r   rS   clone_preserve_strides)r   r`  s     r8   r?  r?  e  s^    KKKKKK
 ! && 5+LT+R+RVW+W+Wzz||'---r:   dim_post_exprwrap_scalarc           	          |dk    r|st          d| d          d}| }|dz
  }| |k     s| |k    rt          d|  d| d| d          | dk     r| |z  } | S )	Nr   zdim_post_expr=z <= 0 but wrap_scalar is Falser3   zdim z out of bounds (rz   r{   r,  )r   rG  rH  r   r  s        r8   r   r   q  s     	 NNNN   .C
!
C
SyyC#IIFCFFFFFFFGGG
Qww}Jr:   c                 P    |                                  dk    rdn| j        |         S r  r_  )r  r   s     r8   ensure_nonempty_sizerK    s"    111!'#,.r:   c                 p    t                                           d          }t                                          d          }t          j        ||k    d            t	          |          D ]Ck    r;t          j        t                    t                     k     fd           Dd S )Nr3   c                      dS )NzDIndex tensor must have the same number of dimensions as input tensorr4   r4   r:   r8   rh   z$gather_shape_check.<locals>.<lambda>  s    V r:   c                  :    d dj          dj          d  z   S )Nz!Size does not match at dimension z expected index  to be no larger than self  apart from dimension r-  )r   r   r   r   s   r8   rh   z$gather_shape_check.<locals>.<lambda>  s6    \A\\u{\\W
WWRUWWX r:   )r  r   rY   rj   r   rK  )r   r   r   	self_dims
index_dimsr   s   ```  @r8   gather_shape_checkrS    s    DHHJJ""IUYY[[!$$J	LZVV   9  88L$UA..2FtQ2O2OOX X X X X X X   r:   c                 p   ddl m} t          ||                                           } |                                dk              }|sQt          j        j        t
          j        k    pj        t
          j	        k    fd           t          | |           |                     j                  S )Nr   r  c                      d j          S )Nz8gather(): Expected dtype int32/int64 for index, but got r   r   s   r8   rh   zmeta_gather.<locals>.<lambda>  s    \u{\\ r:   )ra  r  r   r   r   rY   rj   ra   r   r   rS  r   r   )r   r   r   sparse_gradr  wrapped_dimis_index_emptys     `    r8   meta_gatherrY    s    DDDDDD dhhjj11K#^EKKMMQ$677N 5K5:%A	)A\\\\	
 	
 	
 	4e444>>%+&&&r:   c                     |r@| dk    rdS | dk    rdS | dk    rdS | dk    rdS | d	k    rd
S t          j        dd            d S | dk    rdS | dk    rdS t          j        dd            d S )Nr+  
REDUCE_ADDr  REDUCE_MULTIPLYrS  REDUCE_MEANamaxREDUCE_MAXIMUMaminREDUCE_MINIMUMFc                      dS )Nz=reduce argument must be either sum, prod, mean, amax or amin.r4   r4   r:   r8   rh   z#get_operator_enum.<locals>.<lambda>  s    S r:   addmultiplyc                      dS )Nz/reduce argument must be either add or multiply.r4   r4   r:   r8   rh   z#get_operator_enum.<locals>.<lambda>  s    $U r:   r  )reduce_use_new_optionss     r8   get_operator_enumrh    s     e<$$ =####SS	
 	
 	
 	e<
""$$UUUVVVr:   c                 *    ddl m}  ||                                dk              r@t          j        |j        t          j        k    p|j        t          j        k     fd           |(t          j        |j        |j        k     fd           d S d S )Nr   )r  c                        dS )Nz((): Expected dtype int32/int64 for indexr4   method_names   r8   rh   z,scatter_gather_dtype_check.<locals>.<lambda>  s    {LLL r:   c                        dS )Nz0(): Expected self.dtype to be equal to src.dtyper4   rk  s   r8   rh   z,scatter_gather_dtype_check.<locals>.<lambda>  s    {TTT r:   )ra  r  r   rY   rj   ra   r   r   )rl  r   r   src_optr  s   `    r8   scatter_gather_dtype_checkro    s    CCCCCC}U[[]]a'(( 
K5:%A	)ALLLL	
 	
 	

 J'-'TTTT	
 	
 	
 	
 	
 r:   c                 "    t          | d          S r2   )r  r   s    r8   ensure_nonempty_dimrq    s    sA;;r:   c                 V    ddl m}  |                                dk              rd S t          j        t                                                     t                                                    k    d            t                                                     }t          |          D ]G}|k    r	t          |          }t           |          }t          j        ||k     fd           Ht          j        t                                                     t                                                    k    d            t          |          D ]A}t          |          }t          |          }	t          j        ||	k    fd           @d S d S )Nr   r  c                      dS )NzCIndex tensor must have the same number of dimensions as self tensorr4   r4   r:   r8   rh   z%scatter_shape_check.<locals>.<lambda>  r  r:   c                  4    dj          dj          d  z   S )NExpected index rO  rP  r-  )r   r   r   s   r8   rh   z%scatter_shape_check.<locals>.<lambda>  s,    ZekZZdjZZ,s,,- r:   c                      dS )NzBIndex tensor must have the same number of dimensions as src tensorr4   r4   r:   r8   rh   z%scatter_shape_check.<locals>.<lambda>  s    X r:   c                  (    d j          dj          S )Nru  z to be no larger than src r-  )r   rn  s   r8   rh   z%scatter_shape_check.<locals>.<lambda>  s    `%+``QXQ^`` r:   )	ra  r  r   rY   rj   rq  r   r   rK  )
r   r   r   rn  r  rQ  r   index_d_sizeself_d_size
src_d_sizes
   ````      r8   scatter_shape_checkr{    s   DDDDDD~ekkmmq()) 	LDHHJJ''+>uyy{{+K+KKUU  
 $DHHJJ//I 9 	
 	
88+E155*433K'- - - - - -	
 	
 	
 	
 

++/B7;;==/Q/QQXX	
 	
 	
 y!! 	 	A/q99L-gq99JL
*`````    
	 	r:   c                     t          ||                                           }t          d| ||           t          | |||           |t	          ||           d S d S )Nscatter)r   r   ro  r{  rh  )r   r   r   rg  rf  rg  rW  s          r8   scatter_meta_implr~    se     dhhjj11Ky$s;;;k5#666'?33333 r:   c                 \    t          | |||d           |                     | j                  S Nrc  r~  r   r   r   r   r   rg  s       r8   meta_scatter_addr    s,    dCU333>>$*%%%r:   c                 ,    t          | |||d           | S r  r~  r  s       r8   meta_scatter_add_r    s    dCU333Kr:   c                     t          |t          j                  r|nd }t          | ||||           |                     | j                  S rD   )rs   rY   r   r~  r   r   r   r   r   src_or_valuer  rg  s         r8   meta_scatterr    sH     %\5<@@
J,,dCdCV444>>$*%%%r:   c                 h    t          |t          j                  r|nd }t          | ||||           | S rD   )rs   rY   r   r~  r  s         r8   meta_scatter_r  #  s9     %\5<@@
J,,dCdCV444Kr:   queryr   rt  	dropout_p	is_causalreturn_debug_maskc           	         |                      d          }|                      d          }|                      d          }	|                      d          }
|                     d          }t          j        |           }t          j        |||	ft          j        | j                  }|rX|
dk    rdnd}t          j        |	|z            }|dk    rd}n|dk    rd}t          j        |||	|f| j        | j                  }n!t          j        d| j        | j                  }t          j	        j
        rt          j                                        st          |           d	k    rCt          j        d
t          j        d          }t          j        d
t          j        d          }nBt          j        dt          j        d          }t          j        d
t          j        d          }||d d |	||||f	S )Nr   r3   r  r0   r  @         r	  r4   r   )r   rY   r   r   r]   r   r  ceilra   r  r  r   r   r   r   r2  )r  r   rt  r  r  r  r  r   	num_headsmax_seqlen_batch_qhead_dimmax_seqlen_batch_k	attention	logsumexpblocksize_cmax_seqlen_k
debug_maskseedoffsets                      r8   (meta__scaled_dot_product_flash_attentionr  1  s    AJ

1IAzz!}}H! ''I	Y 23k|  I  L%]]ccy!3k!ABB$$LL3&&L[$6E+<
 
 


 [%+elKKK
 } DUZ4466 D+e:L:LPU:U:U{2UZ???Ruz&AAA{Ael6BBBRu|FCCC 	
 
r:   	q_descale	k_descale	v_descalec
           	          | j         t          j        k    r|                     t          j                  } t          | ||||||	          S rD   )ra   rY   r  rd  r  r  )
r  r   rt  r  r  r  r  r  r  r  s
             r8   2meta__scaled_dot_product_flash_attention_quantizedr  q  sN     {e)))((3  r:   	res_shape.c                 h    t           j                  k    rt          j                   }nt	          g d fdd          fdD             }fdt          t                              D             }t          j        | j         j	                  
                    |          }|S )N)r   r3   r  r0   c                 8                                     |          S rD   r  )idxr  s    r8   rh   z,alloc_with_matching_layout.<locals>.<lambda>  s    %,,..*= r:   Tr   c                      g | ]
}|         S r4   r4   )rP   r  r  s     r8   rR   z.alloc_with_matching_layout.<locals>.<listcomp>  s    >>>S)C.>>>r:   c                 :    g | ]}                     |          S r4   r   )rP   r   	dim_orders     r8   rR   z.alloc_with_matching_layout.<locals>.<listcomp>  s%    KKK++KKKr:   r  )ri   r   rY   r   r   r   r   r   ra   r   r   )r  r  r  permuted_shapefinal_permuter  s   ``   @r8   alloc_with_matching_layoutr    s     U[Y&&u%%LL====t
 
 
	 ?>>>I>>>KKKKU3y>>5J5JKKKk%+el
 
 

'-
 
  	 Jr:   	attn_biascompute_log_sumexpc	           	         |                      d          }	|                      d          }
|                      d          }|                     d          }|                     d          }|	|
||f}t          | |          }t          j        |	|
|dft          j        | j                  }t          j        dt          j        d          }t          j        dt          j        d          }||d d ||||d f	S )Nr   r3   r  r   r  r4   r   )r   r  rY   r   r]   r   r   )r  r   rt  r  r  r  r  r  r  r  rm  S_QS_KVD_Vr  r  
logsum_expr  r  s                      r8   (meta__scaled_dot_product_cudnn_attentionr    s     	

1A

1A
**Q--C88A;;D
**R..CAsC I
$UI
6
6C	
AsAk|  J ;rF;;;D[5:f===F 	
 
r:   c           	         d\  }}	}
|                                  dk    r|                                 \  }}	}
}nB|                                  dk    r|                                 \  }	}
}d}nt          d          |                    d          }|                    d          }t          | j                  }||d<   t          | t          |                    }t          j        ||	|
ft          j	        | j
                  }t          j        d	t          j        d
          }t          j        d	t          j        d
          }||d d |
|||d f	S )N)r   r   r   rC  r0   r3   zquery must be 3D or 4Dr  r   r  r4   r   )r   r   rc  r   r   r  ri   rY   r   r]   r   r   )r  r   rt  r  r  r  r  r  r  H_Qr  rV   r  r  r   r  r  r  r  s                      r8   5meta__scaled_dot_product_fused_attention_overrideabler    sF    KAsCyy{{a3QQ			jjllS!344488B<<D
**R..C U[!!IIbM
$UE),<,<
=
=C	
Ck|  J ;rF;;;D[5:f===F 	
 
r:   )r  r  grad_input_maskr  	cum_seq_q	cum_seq_kmax_qmax_kphilox_seedphilox_offsetc                    t          j        |          }t          j        |          }t          j        |          }|t          j        |          nd }||||fS rD   rX  )r  r  r   rt  r  r  r   r  r  r  r  r  r  r  r  r  r  grad_qgrad_kr]  grad_attn_biass                        r8   >meta__scaled_dot_product_fused_attention_overrideable_backwardr    s]    * e$$Fc""Fe$$F4=4IU%i000tN66>11r:   c                     t          j        |          }t          j        |          }t          j        |          }|||fS rD   rX  )r  r  r   rt  r   r  r  r  r  r  r  r  r  r  r  r  r  r]  s                     r8   'meta__scaled_dot_product_flash_backwardr    sA    , e$$Fc""Fe$$F66!!r:   	attn_maskc                 *   |                      d          }|                      d          }|                      d          }	t          j        |           }
t          j        ||	|ft          j        | j                                      dd          }|
|fS )Nr   r3   r  r  )r   rY   r   r   r]   r   r;  )r  r   rt  r  r  r  r  r   r  r  r  r  s               r8   0meta__scaled_dot_product_flash_attention_for_cpur  7  s     AJ

1IA ''I	

 k|   i1oo  	 r:   c
                 D   t          j        |                                d|j        |j                  }
t          j        |                                d|j        |j                  }t          j        |                                d|j        |j                  }|
||fS )Nr   r  r3   r0   r  )rY   empty_permutedr   ra   r   )r  r  r   rt  r   r  r  r  r  r  r  r  r]  s                r8   9meta__scaled_dot_product_flash_attention_for_cpu_backwardr  Y  s    & !

k|	  F !

iz	  F !

k|	  F 66!!r:   dropout_mask
enable_gqac	                      d }	 |	           \   |	|          \  }
} |	|          \  }}j         \  }|j         \  }} fd} |            S )Nc                    |                                  dk    r|                     d          dfS |                                  dk    rd}t          |                                  dz
            D ]}|| j        |         z  }|                     ||                     d          |                     d          |                     d                    dfS | d	fS )
Nr0   r   TrC  r3   rz  r  r   F)r   r  r   r   viewr   )r=   r   r   s      r8   	ensure_4dzBmeta__scaled_dot_product_attention_math_for_mps.<locals>.ensure_4d  s    5577a<<;;q>>4''UUWWq[[J15577Q;'' ) )agaj(

66*affRjj!&&**affRjjII4OOe8Or:   c                  2                        f          }                      f          }
r߉	                                dk    r+|                     d          } |                    d          }nt          	j        d d                   t          | j        dd                   z   }t          	j        d d                   t          |j        dd                   z   }|                     |          } |                    |          }| |fS )Nr0   r   rz  r3   rC  )r   r   squeezer   r   r  )r   attnr   
attn_shaper   max_seq_lengthnum_headq_q_sizer  
unsqueezedvalue_head_sizes       r8   sdpa_general_mpszImeta__scaled_dot_product_attention_math_for_mps.<locals>.sdpa_general_mps  s    llJ&/JKK||Z6>JKK 	-yy{{akk!nn||A SbS!122T#)AaC.5I5II	!%+crc"233d4:ac?6K6KK
hhy))yy,,Dyr:   r-  )r  r   rt  r  r  r  r  r  r  r  k_rV   v_r  r   r  r  r  r  r  r  s   `             @@@@@@@r8   /meta__scaled_dot_product_attention_math_for_mpsr    s    	 	 	 Yu%%NB
IcNNEBIeEB&(h#J&!,.H)Aq./             r:   c                 .   |                      dd          } |                     dd          }|                     dd          }|                     d          }|                     d          }	|                     d          }
|                    d          }t          j        ||	|
|| j        | j                  }t          j        j        r&t          j        	                                r	 |r|	nd}n|rt          j        |	dz            dz  nd}t          j        ||
|ft          j        | j                  }|                     dd          }t          j        dt          j        d	          }t          j        dt          j        d	          }||||fS )
Nr3   r  r   r  r   r  rp  r4   r   )r;  r   rY   r   ra   r   r  r  r   r   r  r  r]   r   )r  r   rt  r  r  r  r  r  r  r  r  Kvr  logsumexp_dimr  r  r  s                    r8   ,meta__scaled_dot_product_efficient_attentionr    sv    OOAq!!E
--1

COOAq!!E

1A

1A

2I	BB
+aIrU\
R
R
RC} LUZ4466 L	 06Q2DK	!b&))B..!	
I}%k|  J --1

C ;rF;;;D[5:f===F
D&((r:   c                    |                     d          }|                     d          }|                     d          }|                     d          }|                     d          }|                     d          }t          j        ||||fd|j        |j                  }t          j        ||||fd|j        |j                  }t          j        ||||fd|j        |j                  }d }||
d         r~|                     d          }|dz  dk    r|n
|dz   |dz  z
  }t          |                                           }||d<   t          j        ||j        |j                  }|d	d |f         }||||fS )
Nr   r3   r  r0   r  r  r   ro  .)r   rY   r  ra   r   r   r   )r  r  r   rt  r  r   r  r  r  r  r  r  r  r   r  r  r  
head_dim_vr  r  r  r]  	grad_biaslastDimlastDimAligned	new_sizess                             r8   +meta__scaled_dot_product_efficient_backwardr    s   ( AJ

1IJJqMMEzz!}}HAJHHQKKE!	Yx0k|	  F !	Yx0iz	  F !	Yz2k|	  F I!3..$$$+bLA$5$57R<'TV,;V))**	&	"KY_Y5E
 
 
	 c8G8m,	669,,r:   c                     t          j        |          }t          j        |          }t          j        |          }|||fS rD   rX  )r  r  r   rt  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r]  s                      r8   'meta__scaled_dot_product_cudnn_backwardr    sA    . e$$Fc""Fe$$F66!!r:   window_size_leftwindow_size_right	seqused_kalibi_slopesblock_tabler1  c                    ||                      d          n|                                dz
  }||                      d          n|}||                     d          n|}|                      d          }|                      d          }t          j        |           }|*t          j        |||ft          j        | j                  }n=|                      d          }t          j        ||ft          j        | j                  }|	rX|dk    rdnd}t          j        ||z            }|dk    rd}n|dk    rd}t          j        ||||f| j	        | j                  }n!t          j        d| j	        | j                  }d	\  }}t          j
        j        rat          j                                        rCt          j        d
t          j        d          }t          j        d
t          j        d          }nBt          j        dt          j        d          }t          j        d
t          j        d          }|||||fS )Nr   r3   r  r   r  r  r  r  NNr4   r   r  )r   r   rY   r   r   r]   r   r  r  ra   r  r  r   r   r   r2  )r  r   rt  r  r  r  r  r  r  r  r  r  r  r  r  r  r1  r   r  r  r  r  r  r  total_qr  r  r  r  r  s                                 r8   meta__flash_attention_forwardr  9  s?   8 #,"3A9J9JQ9NJ*3*;A(1(9!u

2Izz"~~H  ''IK$67+<
 
 
		 **Q--K EL
 
 
	  L%]]ccy!3k!ABB$$LL3&&L[$6E+<
 
 


 [%+elKKK
 LD&} DUZ4466 D{2UZ???Ruz&AAA{Ael6BBBRu|FCCC r:   c                 N    t          |||||||||	|
||||||          \  }}}}}|S rD   )r  )r   r  r   rt  r  r  r  r  r  r  r  r  r  r  r  r  r  r1  rV   r  s                       r8   0meta__flash_attention_forward_no_dropout_inplacer    sV    * :! Ay!Q$ r:   c                     | j         t          j        k    r|                     t          j                  } t          | |||||||||	|||||          S rD   )ra   rY   r  rd  r  r  )r  r   rt  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  s                     r8   'meta__flash_attention_forward_quantizedr    sf    * {e)))(((  r:   c                     t          j        |          }t          j        |          }t          j        |          }|||fS rD   rX  )r  r  r   rt  r   r  r  r  r  r  r  r  r  r  r  r  r  
grad_querygrad_key
grad_values                       r8   meta__flash_attention_backwardr    sA    0 !%((J$$H!%((Jx++r:   cu_seqlens_qcu_seqlens_kmax_seqlen_qr  custom_mask_typecausal_diagonalseqlen_kwindow_sizec                    |                      d          }|                      d          }|                     d          }|                      d          }|                     d          }t          j        ||||| j        | j                  }||                     d          dz
  n|}|}||t          d          |}||n|}|
rt          j        |dz            dz  nd}t          j        |||ft          j        | j                  }t          j        dt          j	        d	          }t          j        dt          j	        d	          }||||||fS )
Nr   r3   r  r   r  z;max_seqlen_q must not be None when cu_seqlens_q is providedrp  r4   r   )
r   rY   r   ra   r   r   r  r  r]   r   )r  r   rt  rx  r  r  r  r  r  r  r  r  r  r	  r
  r  r  r  r  r  r  logsumexp_batch_dimactual_max_seqlen_qactual_max_seqlen_kr  r  r  r  s                               r8   !meta__efficient_attention_forwardr    sk   , 	

1A

1AA

2I	BB
+aIrU\
R
R
RC7C7O,++A..22VW M   +*6*B,,4FM	%*++b00A  	i7k|  J ;rF;;;D[5:f===F
D&*=?RRRr:   bias_requires_gradnum_splits_keyshared_storage_dqdkdvc                    |rt          j        |j        d         |j        d         k    d            t          j        |j        d         |j        d         k    d            t          j        g |j        dd         d|j        d         |j        d         R |j        |j                  }|                    d	d          }|                    d	d          }|                    d	d
          }n<t          j        |          }t          j        |          }t          j        |          }||                    d          }|dz  dk    r|n
|dz   |dz  z
  }t          |                                          }||d<   t          j        ||j        |j                  }|dd |f         }nt          j        d|j                  }||||fS )Nr3   c                      dS )Nz,seqlen must match for `shared_storage_dqdkdvr4   r4   r:   r8   rh   z4meta__efficient_attention_backward.<locals>.<lambda>R  s    B r:   r0   c                      dS )Nz3embedding dim must match for `shared_storage_dqdkdvr4   r4   r:   r8   rh   z4meta__efficient_attention_backward.<locals>.<lambda>V      I r:   r   r  r   r  rz  r  ro  .r4   r@  )
rY   rj   r   r   ra   r   r  r   r   r   )r  r  r   rt  rx  r  r  r  r  r  r  r  r  r  r  r  r  r  chunkr   r  r  r  r  r  r  s                             r8   "meta__efficient_attention_backwardr  6  s   2  -KNcil*BB	
 	
 	
 	KNcil*II	
 	
 	
 Eek!B$EEEKOEU[_EE+<
 
 

 \\"a((
<<A&&\\"a((

%e,,
#C((%e,,
))B--$+bLA$5$57R<'TV,;V%%	&	"K	DKPPP	c8G8m,		K5<888	xY66r:   scale_ascale_bscale_resultuse_fast_accumc                     d }t          j                                         dk    o                                dk     fd           t          j         | j                  o |j                   fd           t	                     dk    s't	                     dk    st	                     dk    rd }	d	 }
d
 }t	                     dk    rt          j         |	                                           p
 |            fd           t          j         |
                                          p
 |          fd           t          j                             d          dz  dk     fd           t          j                            d          dz  dk    o                    d          dz  dk    fd            j        \                      d          j        t           j        k    oj        t           j        k    p)j        t           j	        k    oj        t           j	        k    }
                                dk    rX
                                dk    r@t          j        j        t           j        k    oj        t           j        k    d            n|r)j        t           j	        k    rd}dz  nd} j        t           j        k    rdz  d}t          |          }t          |d          dz  }|t          |          z  |z  |t          |          z  |z  
                                k    rj
                                k    rRt          j                                        d            t          j                                        d            nt          j        dfd           nt          j        j        t           j        k    oj        t           j        k    d            t          j                                        dk    o                                dk    fd                               d          k    r                    d          dk    rp                    d          dk    rW                    d          k    r>t          j                                        o                                d            n                    d          k    rm                    d                              d          cxk    rt          d          k    r,n n)                    d          t          d          k    rn                    d          k    r^                    d                              d          cxk    rt          d          k    rn n                    d          k    rn                    d          t          d          k    r^                    d                              d          cxk    rt          d          k    rn n                    d          k    rnt          j        dfd           ||n j        }t          j                             d                              d          | j                  S )Nc                 x    | t           j        t           j        t           j        t           j        t           j        fv S rD   rY   r  float8_e5m2float8_e4m3fnuzfloat8_e5m2fnuzfloat4_e2m1fn_x2r   s    r8   is_fp8_or_fp4_typez2_check_scaled_mm_sizes.<locals>.is_fp8_or_fp4_type|  2    !!"
 
 	
r:   r  c                  \    d                                  d                                   S Nz%Inputs must be 2D but got self.dim()=z and mat2.dim()=r   r  r   s   r8   rh   z(_check_scaled_mm_sizes.<locals>.<lambda>  -    `

``TXT\T\T^T^`` r:   c                  (    dj          d j          S Nz?Expected both inputs to be fp8 or fp4 types but got self.dtype=z and mat2.dtype=r   r(  s   r8   rh   z(_check_scaled_mm_sizes.<locals>.<lambda>       zRVR\zznrnxzz r:   r   r	  r   c                 >    | d         | d         k    o| d         dk    S r  r4   r  s    r8   is_row_majorz,_check_scaled_mm_sizes.<locals>.is_row_major  "    !9vay(;VAY!^;r:   c                 2    | d         dk    o| d         dk    S r  r4   r  s    r8   is_col_majorz,_check_scaled_mm_sizes.<locals>.is_col_major      !9>3fQi!m3r:   c                 f    |                      d          dk    p|                      d          dk    S r  r   	tensor_2ds    r8   has_zero_dimz,_check_scaled_mm_sizes.<locals>.has_zero_dim  /    >>!$$)CY^^A->->!-CCr:   c                  2    d                                   S Nz#self must be row_major, got stride r  r   s   r8   rh   z(_check_scaled_mm_sizes.<locals>.<lambda>      MdkkmmMM r:   c                  2    d                                   S Nz#mat2 must be col_major, got stride r  r  s   r8   rh   z(_check_scaled_mm_sizes.<locals>.<lambda>  r:  r:   r3   ro  r   c                  4    d                      d           S NzBExpected self.size(1) to be divisible by 16, but got self.size(1)=r3   r   r   s   r8   rh   z(_check_scaled_mm_sizes.<locals>.<lambda>       k]a]f]fgh]i]ikk r:   c                      d j          S Nz?Expected both dimensions of mat2 to be divisible by 16 but got r-  r=  s   r8   rh   z(_check_scaled_mm_sizes.<locals>.<lambda>      fZ^Zdff r:   c                      dS )NzNFor tensorwise scaling, both scale_a and scale_b must be float (fp32) tensors.r4   r4   r:   r8   rh   z(_check_scaled_mm_sizes.<locals>.<lambda>  s    h r:   rp  r  rC  c                      dS )Nzscale_a must be contiguousr4   r4   r:   r8   rh   z(_check_scaled_mm_sizes.<locals>.<lambda>      8 r:   c                      dS )Nzscale_b must be contiguousr4   r4   r:   r8   rh   z(_check_scaled_mm_sizes.<locals>.<lambda>  rF  r:   Fc            	      j    d  d                                  d d                                  d	S )NzTInvalid blockwise scaling configuration. For blockwise scaling, scale_a should have  elements, got z, scale_b should have r  r  )expected_a_sizeexpected_b_sizer  r  s   r8   rh   z(_check_scaled_mm_sizes.<locals>.<lambda>  sd    bFUb bfmfsfsfufub b/>b bOV}}b b b r:   c                      dS )NzKFor rowwise scaling, both scale_a and scale_b must be float (fp32) tensors.r4   r4   r:   r8   rh   z(_check_scaled_mm_sizes.<locals>.<lambda>  s    e r:   c                  \    d                                  d                                 S )NzLFor non-tensorwise scaling, scale tensors must be 2D, but got scale_a.dim()=z and scale_b.dim()=r   r  r  s   r8   rh   z(_check_scaled_mm_sizes.<locals>.<lambda>  s2    ~Y`YdYdYfYf~~nunynyn{n{~~ r:   c                      dS )Nz@Both scale_a and scale_b must be contiguous for rowwise scaling.r4   r4   r:   r8   rh   z(_check_scaled_mm_sizes.<locals>.<lambda>  s    ^ r:   c                     d d d dt           d           d	dt           d           dt          d           d dt           d           d	z   dt           d           d d	t          d           dt           d           d	z   dt           d           d d
                    d           d                    d           d                    d           d                    d           dz   S )N}Invalid scaling configuration. For tensorwise scaling, both scales should be scalar. For rowwise scaling, scale_a should be (, 1), scale_b should be (1, >). For (BlockWise1x128, BlockWise128x128), scale_a should be (rz   r  ), scale_b should be (<). For (BlockWise1x128, BlockWise1x128), scale_a should be (z>). For (BlockWise128x128, BlockWise1x128), scale_a should be (). Got scale_a.size()=(r   r3   ) and scale_b.size()=(r{   r9   r   )_kr  r  r  r  s   r8   rh   z(_check_scaled_mm_sizes.<locals>.<lambda>#  s   rCDr rbcr r WXr r \ddfhk[l[lr r rpS0A0A p pXaQTEUEU p pTUp pYabdfiYjYjp p p	pAS0A0A A AQ A AV^_`beVfVfA Ajrsuwzj{j{A A AAUS0A0A U UQ U U/6||AU UBI,,q//U U/6||AU UBI,,q//U U U
U r:   r  )rY   rj   r   ra   r   r   r   r   float8_e8m0fnur  r   r  r#  r9   r  r   r   )r   r  r  r  rx  r  rz  r  r$  r.  r1  r6  is_blockwise_scalingblock_size_kblock_size_mnnum_k_blockspadded_num_k_blocks
_out_dtyperZ  rJ  rK  r  r  s   ````              @@@@@r8   _check_scaled_mm_sizesrb  r  ss   
 
 
 
L

a+DHHJJ!O`````   
L4:&&I+=+=dj+I+Izzzzz   	DV##t%%t%%	< 	< 	<	4 	4 	4	D 	D 	D t%%LT[[]]++A||D/A/AMMMM   LT[[]]++A||D/A/AMMMM   L		!r!Q&kkkk   L		!r!Q&A499Q<<"+<+Affff   
2IIaLL !55 :MU%99
 !44 9MU%88 	 ==??aGMMOOq$8$8L.Q7=EM3Qhh    " k	 } 333  "!V!:!777aBM#B55L"*<";";a"? M : ::=PP  M : ::=PP 
 ?22MMOO66))++88   ))++88   
           L.Q7=EM3Qee  
 L"9w{{}}'9~~~~~   Q1$$LLOOq((LLOOq((LLOOq(( ))++G0E0E0G0G^^   
 Q1$$LLOOw||AKKKK(2s:K:KKKKKKLLOOx3'7'777 Q1$$LLOOw||AKKKK(2s:K:KKKKKKLLOOq(( Q8As#3#333LLOOw||AKKKK(2s:K:KKKKKKLLOOq((           " (3J;tyy||TYYq\\DKXXXXr:   c           
      .    t          | |||||||          S rD   )rb  )r   r  r  r  rx  r  rz  r  s           r8   meta_scaled_mmrd  6  s'     "dGWdL)^  r:   scale_recipe_ascale_recipe_b	swizzle_a	swizzle_bc           
         	 !"#$% d }d }t          j                                         dk    o                                dk     fd           t          j         | j                  o |j                   fd            j        d          j        d         j        d           | j                  r |j                  rd}|z  d |D             }d	 |D             }rd
 D             nt
          j        g	rd 	D             	nt
          j        g	t                     dk    s't                     dk    st                     dk    rd }d }d }t                     dk    rt          j         |                                           p
 |            fd           t          j         |                                          p
 |          fd           t          j         	                    d          dz  dk     fd           t          j        	                    d          dz  dk    o	                    d          dz  dk    fd           dt          t                   dt          t                   fd}dt          t                   dt          t                   fd}dt          t                   dt          t                   fd}dt          t                   dt          t                   fd}dt          t                   dt          t                   fd}dt          t                   dt          t                   fd}dt          t                   dt          t                   fd}dt          t                   dt          t                   fd } |||          rt          j        d                                         dk    oSd                                         dk    o5d         j        t           j        k    od         j        t           j        k    d!            	n |||          rt          j        d         j        d         k    oqd                                         k    oSd         j        t           j        k    o8d                                          k    od         j        t           j        k     fd"           	n |||          rjd         j        t           j        k    od         j        t           j        k    }d         $$j        d         k    oo$j        d         d#z  k    o[$                    d          dk    oB$                    d          k    p)$j        d         dk    o$                    d          dk    }d         %%j        d          k    oo%j        d         d#z  k    o[%                    d          dk    oB%                    d           k    p)%j        d         dk    o%                    d          dk    }t          j        |o|o| $%fd$           n |||          r~d         j        t           j        k    od         j        t           j        k    }t          d#z  d%          d         $$j        d         k    oo$j        d         d#z  k    o[$                    d          dk    oB$                    d          k    p)$j        d         dk    o$                    d          dk    }d         %%j        d          k    oo%j        d         d#z  k    o[%                    d          dk    oB%                    d           k    p)%j        d         dk    o%                    d          dk    }t          j        |o|o| $%fd&           n |||          r~d         j        t           j        k    od         j        t           j        k    }t          d#z  d%          d         $$j        d         k    oo$j        d         d#z  k    o[$                    d          dk    oB$                    d          k    p)$j        d         dk    o$                    d          dk    }d         %%j        d         k    oo%j        d          d#z  k    o[%                    d          dk    oB%                    d          k    p)%j        d         dk    o%                    d          dk    }t          j        |o|o| $%fd'           n |||          rt           j        j        r_t#           j        d         d(           j        d         z  !t#           j        d         d(           j        d         z  "t
          j        #nt           j        d         d#          t          t#           j        d         d(          d%          z  !t          j        d         d#          t          t#           j        d         d(          d%          z  "t
          j        #t          j        d                                         !k    okd         j        t           j        k    oPd                                         "k    o2d         j        t           j        k    od         #k    o	d         #k    !"#	fd)           n |||          rt          d#          t          t#          d          d%          z  !t           d#          t          t#          d          d%          z  "t
          j        #t          j        d                                         !k    okd         j        t           j        k    oPd                                         "k    o2d         j        t           j        k    od         #k    o	d         #k    !"#	fd*           n |||          rt          d#          t          t#          d          d%          z  !t           d#          t          t#          d          d%          z  "t
          j        #t          j        d                                         !k    o݉d         j        t           j        k    od                                         dk    od         j        t           j        k    od                                         "k    okd         j        t           j        k    oPd                                         dk    o2d         j        t           j        k    od         #k    o	d         #k    !"#	fd+           nt          j        d, fd-           ||n j        }t          j         | j        .          S )/Nc                 x    | t           j        t           j        t           j        t           j        t           j        fv S rD   r  r   s    r8   r$  z5_check_scaled_mm_sizes_v2.<locals>.is_fp8_or_fp4_typeS  r%  r:   c                 "    | t           j        k    S rD   )rY   r#  r   s    r8   is_fp4_typez._check_scaled_mm_sizes_v2.<locals>.is_fp4_type\  s    ...r:   r  c                  \    d                                  d                                   S r'  r   r(  s   r8   rh   z+_check_scaled_mm_sizes_v2.<locals>.<lambda>a  r)  r:   c                  (    dj          d j          S r+  r   r(  s   r8   rh   z+_check_scaled_mm_sizes_v2.<locals>.<lambda>e  r,  r:   r   r3   c                 ,    g | ]}t          |          S r4   r(   rP   sis     r8   rR   z-_check_scaled_mm_sizes_v2.<locals>.<listcomp>t      ???"k"oo???r:   c                 ,    g | ]}t          |          S r4   rp  rq  s     r8   rR   z-_check_scaled_mm_sizes_v2.<locals>.<listcomp>u  rs  r:   c                 ,    g | ]}t          |          S r4   r)   rq  s     r8   rR   z-_check_scaled_mm_sizes_v2.<locals>.<listcomp>x      999[__999r:   c                 ,    g | ]}t          |          S r4   rv  rq  s     r8   rR   z-_check_scaled_mm_sizes_v2.<locals>.<listcomp>~  rw  r:   r   r	  r   c                 >    | d         | d         k    o| d         dk    S r  r4   r  s    r8   r.  z/_check_scaled_mm_sizes_v2.<locals>.is_row_major  r/  r:   c                 2    | d         dk    o| d         dk    S r  r4   r  s    r8   r1  z/_check_scaled_mm_sizes_v2.<locals>.is_col_major  r2  r:   c                 f    |                      d          dk    p|                      d          dk    S r  r   r4  s    r8   r6  z/_check_scaled_mm_sizes_v2.<locals>.has_zero_dim  r7  r:   c                  2    d                                   S r9  r  r   s   r8   rh   z+_check_scaled_mm_sizes_v2.<locals>.<lambda>  r:  r:   c                  2    d                                   S r<  r  r=  s   r8   rh   z+_check_scaled_mm_sizes_v2.<locals>.<lambda>  r:  r:   ro  c                  4    d                      d           S r?  r   r   s   r8   rh   z+_check_scaled_mm_sizes_v2.<locals>.<lambda>  r@  r:   c                      d j          S rB  r-  r=  s   r8   rh   z+_check_scaled_mm_sizes_v2.<locals>.<lambda>  rC  r:   recipe_arecipe_bc                     t          |           dk    o>t          |          dk    o+| d         t          j        k    o|d         t          j        k    S r  )r   r(   
TensorWiser  r  s     r8   is_tensorwisez0_check_scaled_mm_sizes_v2.<locals>.is_tensorwise  sR    H" :MMQ&:QK;#99: QK;#99	r:   c                     t          |           dk    o>t          |          dk    o+| d         t          j        k    o|d         t          j        k    S r  )r   r(   RowWiser  s     r8   
is_rowwisez-_check_scaled_mm_sizes_v2.<locals>.is_rowwise  sR    H" 7MMQ&7QK;#667 QK;#66	r:   c                     t          |           dk    o>t          |          dk    o+| d         t          j        k    o|d         t          j        k    S r  )r   r(   BlockWise1x32r  s     r8   is_mxz(_check_scaled_mm_sizes_v2.<locals>.is_mx  sR    H" =MMQ&=QK;#<<= QK;#<<	r:   c                     t          |           dk    o>t          |          dk    o+| d         t          j        k    o|d         t          j        k    S r  )r   r(   BlockWise1x16r  s     r8   is_nv_single_levelz5_check_scaled_mm_sizes_v2.<locals>.is_nv_single_level  sT     H" =MMQ&=QK;#<<= QK;#<<	r:   c                     t          |           dk    ojt          |          dk    oW| d         t          j        k    oA| d         t          j        k    o+|d         t          j        k    o|d         t          j        k    S )Nr  r   r3   )r   r(   r  r  r  s     r8   is_nvz(_check_scaled_mm_sizes_v2.<locals>.is_nv  s~    H" :MMQ&:QK;#<<: QK;#99: QK;#<<	:
 QK;#99r:   c                     t          |           dk    o>t          |          dk    o+| d         t          j        k    o|d         t          j        k    S r  )r   r(   BlockWise1x128r  s     r8   is_1x128_1x128z1_check_scaled_mm_sizes_v2.<locals>.is_1x128_1x128  sR    H" >MMQ&>QK;#==> QK;#==	r:   c                     t          |           dk    o>t          |          dk    o+| d         t          j        k    o|d         t          j        k    S r  )r   r(   r  BlockWise128x128r  s     r8   is_1x128_128x128z3_check_scaled_mm_sizes_v2.<locals>.is_1x128_128x128  sU    H" @MMQ&@QK;#==@ QK;#??	r:   c                     t          |           dk    o>t          |          dk    o+| d         t          j        k    o|d         t          j        k    S r  )r   r(   r  r  r  s     r8   is_128x128_1x128z3_check_scaled_mm_sizes_v2.<locals>.is_128x128_1x128  sR    H" >MMQ&>QK;#??> QK;#==	r:   c                      dS )Nz\For Tensorwise scaling, both scale_a and scale_b must be single element float (fp32) tensorsr4   r4   r:   r8   rh   z+_check_scaled_mm_sizes_v2.<locals>.<lambda>  s    v r:   c            	          dj         d          dd                                          d j         d          dd                                          d	S )Nz'For Rowwise scaling, scale_a must have r   z elements (got: z), and scale_b must have r3   r{   )r   r   )r  r  r  r   s   r8   rh   z+_check_scaled_mm_sizes_v2.<locals>.<lambda>  s~    ddjm d d]def]g]m]m]o]o d d/3z!}d dNUVWjN^N^N`N`d d d r:   r  c                  z    d d dz   dj          d dj         d d dz   dj          d dj         dS )Nz>For 1x128 x 1x128 blockwise scaling, scale a must have shape [rz   r  ] (got: ) and stride [1, )scale b must have shape [r{   r   r   )r  r  r  sasbs   r8   rh   z+_check_scaled_mm_sizes_v2.<locals>.<lambda>  s    z01z z56#Xz zGIxz zbcz zmomvz z01z z56#Xz zGIxz zbcz zmomvz z z r:   rC  c                  z    d ddz   dj          d dj         d d dz   dj          d dj         dS )Nz]For 128x128 x 1x128 blockwise scaling, L4 = {round_up(K / 128, 4)}, scale a must have shape [rz   r  r  r  r  r{   r  r  L4r  r  r  r  s   r8   rh   z+_check_scaled_mm_sizes_v2.<locals>.<lambda>7  s    z02z z673hz zHJz zcez zoqoxz z01z z56#Xz zGIxz zbcz zmomvz z z r:   c                  z    d d dz   dj          d dj         d ddz   dj          d dj         dS )Nz]For 1x128 x 128x128 blockwise scaling, L4 = {round_up(K / 128, 4)}, scale a must have shape [rz   r  r  r  r  r{   r  r  s   r8   rh   z+_check_scaled_mm_sizes_v2.<locals>.<lambda>V  s    |01| |56#X| |GIx| |bc| |momv| |02| |673h| |HJ| |ce| |oqox| | | r:   rp  c                      d  dd                                           d dd                                           dt          j         dd         j         dd         j         d d	d          dd          d
S )Nz!for MX scaling scale_a must have  (got: r   ) and scale_b must have z). Scales must have types z (for self: 	, mat_b: z) Must have swizzle type  (got self: r{   )r   rY   r[  ra   expected_scale_a_elemsexpected_scale_b_elemsexpected_swizzler  r  rg  rh  s   r8   rh   z+_check_scaled_mm_sizes_v2.<locals>.<lambda>q  s    s8N s sW^_`WaWgWgWiWi s s-Cs sLSTUJL\L\L^L^s s"'"6s sDKAJDTs s_fgh_i_os s /?s s MVVWLs s dmmncos s s r:   c                      d  dd                                           d dd                                           d dd          dd          dS )	Nz.for single-level NV scaling scale_a must have r  r   r  ). Must have swizzle type r  r  r{   r  r  s   r8   rh   z+_check_scaled_mm_sizes_v2.<locals>.<lambda>  s    iE[ i idklmdndtdtdvdv i i-Ci iLSTUJL\L\L^L^i i$4i iBKA,i iYbcdYei i i r:   c                      d  dd                                           d dd                                           d dd          dd          dS )	Nz!for NV scaling scale_a must have r  r   r  r  r  r  r{   r  r  s   r8   rh   z+_check_scaled_mm_sizes_v2.<locals>.<lambda>  s    i8N i iW^_`WaWgWgWiWi i i-Ci iLSTUJL\L\L^L^i i$4i iBKA,i iYbcdYei i i r:   Fc                     d d d dt           d           d	dt           d           dt          d           d dt           d           d	z   dt           d           d d	d
                             d
           dd
                             d           dd
                             d
           dd
                             d           dz   S )NrQ  rR  rS  rz   r  rT  rU  rV  rW  r   r3   rX  r{   rY  )r  r  r  r  r  s   r8   rh   z+_check_scaled_mm_sizes_v2.<locals>.<lambda>  s   m?@m m^_m m STm m X``acfWgWgm m mkHQ,<,< k kC@P@P k kPQk kU]^_adUeUek k k	kWHQ,<,< W W W W+21:??1+=+=W WAHQRASASW W+21:??1+=+=W WAHQRASASW W WW r:   r  )rY   rj   r   ra   r   r)   
NO_SWIZZLEr   r   r   r   r(   r   r  r?   r  r  r9   SWIZZLE_32_4_4r[  r  r   r   )&r   r  r  re  r  rf  rx  rz  rg  rh  r  r$  rl  K_packed_multiplierr.  r1  r6  r  r  r  r  r  r  r  r  types_ok
scale_a_ok
scale_b_okra  r  r  r  r  r  r  r  r  r  s&   ``` `   ``                   @@@@@@@@@r8   _check_scaled_mm_sizes_v2r  F  sv   
 
 
/ / / 
L

a+DHHJJ!O`````   
L4:&&I+=+=dj+I+Izzzzz   	
1A
1A
1A {4: !;;tz#:#: !	  ?????N?????N 
99y999		 "
	  
99y999		 "
	
 	DV##t%%t%%	< 	< 	<	4 	4 	4	D 	D 	D t%%LT[[]]++A||D/A/AMMMM   LT[[]]++A||D/A/AMMMM   L		!r!Q&kkkk   L		!r!Q&A499Q<<"+<+Affff  
	D$5 	kAR 	 	 	 		k!2 	d;>O 	 	 	 		D- 	k9J 	 	 	 		;'	373D	 	 	 		D- 	k9J 	 	 	 		T+%6 	${BS 	 	 	 		tK'8 	DDU 	 	 	 		tK'8 	DDU 	 	 	 	 =88 A	L
  ""a' 6AJ$$&&!+6AJ$56 AJ$5vv    Z77 x	L
 #q( 6AJ$$&&!+6AJ$56 AJ$$&&!+6 AJ$5      
 
 
 
 ^NN;; l	
 
 EM1Wgaj6F%-6W  Bq  THQK18+TIIaLLA%T YYq\\Q&R28A;!+;+Q		!PQ@Q	  Bq  THQK18+TIIaLLA%T YYq\\Q&R28A;!+;+Q		!PQ@Q	  L6Z6J           nn== N	 
 EM1Wgaj6F%-6W  !c'1%%BBr! UHQK18+UIIaLLA%U YYq\\R'SBHQK1,<,R1QRAR	  Bq  THQK18+TIIaLLA%T YYq\\Q&R28A;!+;+Q		!PQ@Q	  L6Z6J            nn== o	 
 EM1Wgaj6F%-6W  !c'1%%BBq  THQK18+TIIaLLA%T YYq\\Q&R28A;!+;+Q		!PQ@Q	  Br! UHQK18+UIIaLLA%U YYq\\R'SBHQK1,<,R1QRAR	  L6Z6J            U>>22 P	}  >)1$*Q-)D)DtzRS})T&)1$*Q-)D)DtzRS})T&#.#9  )1$*Q-)E)ETZ]B//I I *& *2$*Q-)E)ETZ]B//I I *& $/#= L
  ""&<< 5AJ$(<<5AJ$$&&*@@5 AJ$(<<5 aL$44	5
 aL$44              ?? 4	%-a%5%5!RRS8T8T%T"%-a%5%5!RRS8T8T%T"*9L
  ""&<< 5AJ$(;;5AJ$$&&*@@5 AJ$(;;5 aL$44	5
 aL$44             U>>22 #	%-a%5%5!RRS8T8T%T"%-a%5%5!RRS8T8T%T"*9L
  ""&<< 	5AJ$(;;	5AJ$$&&!+	5 AJ$5	5 AJ$$&&*@@		5
 AJ$(;;	5 AJ$$&&!+	5 AJ$5	5 aL$44	5 aL$44            $ L
 
 
 
 
 
 
 
   (3J;q!:dkBBBBr:   r'  contraction_dimsc                 6    t          | |||||||	|||          S )N)rx  rz  rg  rh  r  )r  )r   r  r  re  rg  r  rf  rh  rx  r'  r  r  s               r8   meta_scaled_mm_v2r    s<     %%   r:   c                 `    t          | ||||d           |                     | j                  S NT)rg  r  r   r   r   rg  r  r  s         r8   meta_scatter_reduce_twor    s3     dCVTJJJJ>>$*%%%r:   c                 0    t          | ||||d           | S r  r  r  s         r8   meta_scatter_reduce__twor    s"    dCVTJJJJKr:   c                d    t          j        d                                 cxk     odk    nc  fd                                            dk    r&t          j        |t           j         j                  S t          j                             d          |t           j         j                  S )Nr   r  c                  2    d                                   S )NzAThe probability distributions dimensions must be 1 or 2, but got r   r,  s   r8   rh   z"meta_multinomial.<locals>.<lambda>  s    aTYT]T]T_T_aa r:   r3   r  )rY   rj   r   r   r   r   r   )r   num_samplesreplacementr  s   `   r8   meta_multinomialr    s     
L	EIIKK1aaaa   yy{{a{;ejNNNN;

1{%*U\   r:   c                     d}| D ]}||z  }|S r2   r4   )vsr  vs      r8   multiply_integersr    s&    	A  	QHr:   c                     t          j        t                    k    fd           dz   t          j        t                     k     fd           t          j        t          d  dd          D                       ot          d D                        fd            d d         \  }}||gR S )Nc                  .    d  dt                     S )Nz%It is expected output_size equals to , but got size r  )num_spatial_dimsr  s   r8   rh   z'upsample_common_check.<locals>.<lambda>  s#    k8HkkY\]hYiYikk r:   r  c                  .    d  dt                     S )Nz$It is expected input_size equals to r  r  )expected_input_dimsr  s   r8   rh   z'upsample_common_check.<locals>.<lambda>  s#    l7Jll[^_i[j[jll r:   c              3   "   K   | ]
}|d k    V  dS r   Nr4   rP   r  s     r8   rv   z(upsample_common_check.<locals>.<genexpr>  s&      **aAE******r:   c              3   "   K   | ]
}|d k    V  dS r  r4   r  s     r8   rv   z(upsample_common_check.<locals>.<genexpr>  s&      2N2NQ1q52N2N2N2N2N2Nr:   c                      d  d S )NzDInput and output sizes should be greater than 0, but got input size z and output size r4   )r  r  s   r8   rh   z'upsample_common_check.<locals>.<lambda>  s)     A A A3>A A r:   )rY   rj   r   rK  )r  r  r  r  channelsr  s   ```  @r8   upsample_common_checkr    s   	LK,,kkkkk   +Q.	LJ..lllll  
 
L**:abb>*****Ns2N2N+2N2N2N/N/N	A 	A 	A 	A 	A   ""1"~FHH+{+++r:   c                 p    t          j                                         dk    p(t                                           dd                     fd           t                                           |d          }                     |                              t          j	                             S )Nr   r3   c                  2    d                                   S )Nz>Non-empty 3D data tensor expected but got a tensor with sizes r   r,  s   r8   rh   z$upsample_nearest1d.<locals>.<lambda>      _QVQ[Q[Q]Q]__ r:   r  r   
rY   rj   r   r  r   r  r   rd  rS   r   )r   r  scalesfull_output_sizes   `   r8   upsample_nearest1dr         
LA/

QRR0@AA____   -

kA   ??+,,//1%88 0   r:   c                     t          j                                         dk    p(t                                           dd                     fd           t                                           |d          }                     |          }t          j                   } j	        \  }}}} j
        j        dk    r|dk     rt           j        }|                    |          }|S )	Nr   r3   c                  2    d                                   S Nz>Non-empty 4D data tensor expected but got a tensor with sizes r   r,  s   r8   rh   z$upsample_nearest2d.<locals>.<lambda>  r  r:   r  r  r   rC  r   )rY   rj   r   r  r   r  r   rS   r   r   r   r|   r   
contiguous)	r   r  scales_hscales_wr  r   r   rV   
n_channelss	   `        r8   upsample_nearest2dr    s     
LA/

QRR0@AA____   -

kA   __-..F /66M  +Az1a|F""zA~~/];;FMr:   r  r  r  r  c                 v    t          ||d          t          j         j        dk     fd           t	          d          D ]9t          j                                                k     fd           :                     |                              t          j	                             S )Nr  r  rC  c                      d j          S NzFExpected grad_output to be a tensor of dimension 4 but got: dimension rB  r  s   r8   rh   z-upsample_nearest2d_backward.<locals>.<lambda>C      kYdYikk r:   c            
      R    d d           d d                                S )NzCExpected grad_output to have the same shape as output; output.size() = z but got grad_output.size(r   r  r8  r   s   r8   rh   z-upsample_nearest2d_backward.<locals>.<lambda>H  s]    J !J J'7':J J-.J J4?4D4DQ4G4GJ J r:   r   )
r  rY   rj   r   r   r   r   rd  rS   r   )r8  r  r  r  r  r  r   s   `    @@r8   upsample_nearest2d_backwardr  1  s     -K!   
LAkkkk   1XX 
 
Q#3A#66     	
 	
 	
 	
   ,,//1+>> 0   r:   c                 p    t          j                                         dk    p(t                                           dd                     fd           t                                           |d          }                     |                              t          j	                             S )Nr   r3   c                  2    d                                   S )Nz>Non-empty 5D data tensor expected but got a tensor with sizes r   r,  s   r8   rh   z$upsample_nearest3d.<locals>.<lambda>Z  r  r:   r0   r  r   r  )r   r  scales_dr  r  r  s   `     r8   upsample_nearest3dr  T  r  r:   c                 d   t          j        |           t          j        | t           j                  }}||t          |t                    st          dt          |                     t          |t                    st          dt          |                     |j        }|                                }	t          ||          }t          ||          }|
                    ||	           |
                    ||	           t          ||           t          ||           ||fS ||fS )Nr   zvalues must be TensorLike, got z indices must be TensorLike, got )r  r  )rY   r   r   rs   r   r   r|   r   r   r!   r   r#   )
r   stabler   
descendingr   r   r  r   r   
out_strides
             r8   	meta_sortr  d  s.    D!!5#3D#L#L#LqAg1&*-- 	S !Q4<<!Q!QRRR':.. 	U !SDMM!S!STTT G	XXZZ
"6955#GY779j111Iz222F3333G4444wa4Kr:   c           	          t          j         j        dk     fd           t          j         j        j        k     fd                                d          wt          j        j        dk    fd           t          j                                        k    fd           t          j        j        j        k    fd           t          j        j        dk    fd                                d	          z  z  t          j                                        k     fd
           t          j        t           fdfD                       d            d S )Nr  c                       j          dS Nz != 2rB  )input_gatess   r8   rh   z%rnn_cell_checkSizes.<locals>.<lambda>      ;3C0J0J0J r:   c                  &    j          d j          S Nr}  r-  )hidden_gatesr  s   r8   rh   z%rnn_cell_checkSizes.<locals>.<lambda>  s    ;$>>,*<>> r:   r3   c                       j          dS )Nz != 1rB  )
input_biass   r8   rh   z%rnn_cell_checkSizes.<locals>.<lambda>  s    jo3L3L3L r:   c                  6                                      d  S r	  r  )
gates_sizer	  s   r8   rh   z%rnn_cell_checkSizes.<locals>.<lambda>  s!    z''));;z;; r:   c                  &    j          d j          S r	  r-  )hidden_biasr	  s   r8   rh   z%rnn_cell_checkSizes.<locals>.<lambda>  s    z'@@[->@@ r:   c                       j          dS r  rB  )prev_hiddens   r8   rh   z%rnn_cell_checkSizes.<locals>.<lambda>  r  r:   r   c            
      p                                      d                    d           d d d  d
S )Nr}  r   z * z // z (aka r{   )r   r   )expected_prev_hidden_numelfactorr	  r  r
	  s   r8   rh   z%rnn_cell_checkSizes.<locals>.<lambda>  sL    ;$$&&K,<,<Q,?,?JTZb| r:   c              3   8   K   | ]}|j         j         k    V  d S rD   r@  )rP   r=   r  s     r8   rv   z&rnn_cell_checkSizes.<locals>.<genexpr>  sA       
 
 H**
 
 
 
 
 
r:   c                      dS )Nz%expected all inputs to be same devicer4   r4   r:   r8   rh   z%rnn_cell_checkSizes.<locals>.<lambda>  s    7 r:   )rY   rj   r   r   r   r   rK  )r  r	  r	  r	  r	  r
	  r	  r	  s   ``````@@r8   rnn_cell_checkSizesr	    s    
L!Q&(J(J(J(JKKK	L\//>>>>>   !!!$$JZ_)+L+L+L+LMMM*,;;;;;	
 	
 	
 	 11@@@@@	
 	
 	
 
L!Q&(J(J(J(JKKK!,!1!1!!4!4z!AV!K	L99   
L 
 
 
 
"J[I
 
 
 	
 	
 	87    r:   c                     t          | |||d|           t          j        | t          j                  }t          j        |t          j                  }t          j        |t          j                  }|||fS )NrC  r   )r	  rY   r   r   )r  r	  cxr	  r	  	workspacehycys           r8   _thnn_fused_lstm_cell_metar	    sq     \:{ArRRR E<STTTI		"E,C	D	D	DB		"E,C	D	D	DBIr:   c                 t   t          |          dk    }|r%t          |          }|d         }| j        d         }n:|
r| j        d         n| j        d         }|
r| j        d         n| j        d         }d}|rdnd}|dk    r|n|}|r|||z  g}n|
r||||z  gn||||z  g}|                     |          }|	|z  ||g}|t          j        d| j                  }n|                    |          }|                    |	|z  ||g          }|rdnd}|                     |t          j                  }|||||fS )Nr   r3   r   r  r@  r   )r   r   r   rY   r   r   r  )r   rv  weight_stride0
weight_bufhxr	  r  hidden_size	proj_size
num_layersbatch_firstdropouttrainbidirectionalr3  dropout_stateis_input_packed
seq_length
mini_batchbatch_sizes_sumnum_directionsout_sizer   r   
cell_shaper	  r	  reserve_shapereserves                                r8   
_cudnn_rnnr,	    s   & +&&!+O %%
 ^
+a.'2FU[^^A
'2FU[^^A
'.QQQN%NNyyH 
$h&?@		 EZX%>??j(^*CD 	
 __Y''F~-z;GJ	z[5<000\\*%%	zN2JI	J	JB %AAAMoom5;o??G2r7J..r:   c                     d}|D ]1}|                                 dk    r||                                 z  }2|                     |f          }t          | |||||||d|||	|
|||          S r  )r   r   r,	  )r   rv  r	  r	  r	  r  r	  r	  r	  r	  r 	  r!	  r3  r"	  total_weight_elemsr"  r	  s                    r8   
miopen_rnnr/	    s    &  , ,7799q==!''))+"4!677J

	!  r:   c                    |r| j         d         n| j         d         }|r| j         d         n| j         d         }|
}|r|||gn|||g}|                     |          }|t          j        d| j                  }n|                    |j                   }|t          j        d| j                  }n|                    |j                   }t          j        d| j        t          j                  }||||fS )Nr3   r   r@  r   )r   r   rY   r   r   r  )r   w0w1w2w3hx_cx_r   r3  r  r	  r	  
has_biasesr!	  r	  r 	  r$	  r%	  output_chanelsr   r   r	  r	  r	  s                           r8   mkldnn_rnn_layerr9	    s    & $/BQEKNJ#.BQEKNJ N 	6Z00*n5 
 __Y''F
{[5<000]]39%%
{[5<000]]39%%Ael%+FFFI2r9$$r:   c                     | j         dk    r%t          j        dk    pdk    fd           d S t          j        |                               dk    fd           d S )Nr   r   c                       d  S )Nz4: Expected reduction dim -1 or 0 for scalar but got r4   r   r  s   r8   rh   z'zero_numel_check_dims.<locals>.<lambda>H  s    wYYTWYY r:   c                       d  dS )Nz: Expected reduction dim z to have non-zero size.r4   r<	  s   r8   rh   z'zero_numel_check_dims.<locals>.<lambda>M  s    wUUUUU r:   )r   rY   r   r   )r   r   r  s    ``r8   zero_numel_check_dimsr>	  D  s    yA~~1H!r	YYYYY	
 	
 	
 	
 	

 	IIcNNaUUUUU	
 	
 	
 	
 	
r:   c                      |5t          ||                                          }t          ||            d S t          j        |                                dk     fd           d S )Nr   c                        dS )Nz@: Expected reduction dim to be specified for input.numel() == 0.r4   ri  s   r8   rh   z%check_argmax_argmin.<locals>.<lambda>Y  s    t]]] r:   )r   r   r>	  rY   rj   r   )r  r   r   s   `  r8   check_argmax_argminrA	  R  sq    
S$((**--dC.....JJLLA]]]]	
 	
 	
 	
 	
r:   c                     t          d| |           t          j        | j        ||fnd           }t	          | ||          }|                     |t          j                  S )Nargmaxr   )rA	  rS   r  r   r  r   rY   r   )r   r   r  r  r   s        r8   argmax_argmin_metarD	  ]  s[    $,,,
coSFF4PPD$T499E>>%u{>333r:   c                 l    |t           j        k    rt           j        }t          j        d||||          S )Nr4   r  )rY   jaggedr  r   )r  ra   r   r   r   s        r8   scalar_tensorrG	  e  s<    
 ;
%v*   r:   c                    t          ||                                 d          }|                                 dk    rdn|                     |          }t          j        |dk               t          j        ||k    d            t          | j                  }t          |          dk    r|||<   |                     |          |                     |t          j	                  fS )NTrH  r   r3   c                      dS )Nzk not in range for dimensionr4   r4   r:   r8   rh   ztopk_meta.<locals>.<lambda>w  s    )G r:   r   )
r   r   r   rY   rj   r   r   r   r   r   )r   r  r   largestr   	sliceSizetopKSizes          r8   	topk_metarN	  q  s     dhhjjd
;
;
;CXXZZ1__$))C..I	La	Li!G!GHHHDJH
8}}q>>(##T^^HEK^%P%PPPr:   c                     ||t          d          |                                }|                                 }	t          j        ||	j        |	j        |	j                  S )Nz;segment_reduce(): Either lengths or offsets must be defined)ra   r   r   )r   r  rY   r   ra   r   r   )
r  r   r8  r  r  r  r  r  data_contiggrad_contigs
             r8   meta__segment_reduce_backwardrR	    sq    
 7?I
 
 	
 //##K//##K!!	   r:   c                 4   ddl m} t          |                                 d          |                                 dk    r|                               nd}t          j         ||dk    ||k              fd           t          | j        d          | j        dz   d          z             }|r.|                                 dk    r|	                    d           | 
                    |          | 
                    |t
          j                  fS )Nr   )r  TrI	  r3   c                      d  S )Nz9kthvalue(): selected number k out of range for dimension r4   r   s   r8   rh   zkthvalue_meta.<locals>.<lambda>  s    QCQQ r:   r   )ra  r  r   r   r   rY   rj   r   r   rl  r   r   )r   r  r   r  r  dimSizer   s     `    r8   kthvalue_metarV	    s    >=====
dhhjjd
;
;
;C $

QdiinnnAG	LQW%%QQQQ  
 DSD!DJsQwyy$99::E 488::>>S!>>%  $..ek."J"JJJr:   c                    | | n|}t          j        |                                dk    d            |                                }| ,t          j        |                                 |k    d            |,t          j        |                                |k    d            t          j        |                                |k    d            t          j        |                                |k    d            t          j        |                                dk    d            t          j        |                                |d         |d	         z  d
z  k    d            d S )Nr  c                      dS N r4   r4   r:   r8   rh   z(checkLSTMBackwardSizes.<locals>.<lambda>  s    " r:   c                      dS rY	  r4   r4   r:   r8   rh   z(checkLSTMBackwardSizes.<locals>.<lambda>       r:   c                      dS rY	  r4   r4   r:   r8   rh   z(checkLSTMBackwardSizes.<locals>.<lambda>  r\	  r:   c                      dS rY	  r4   r4   r:   r8   rh   z(checkLSTMBackwardSizes.<locals>.<lambda>       r:   c                      dS rY	  r4   r4   r:   r8   rh   z(checkLSTMBackwardSizes.<locals>.<lambda>  r_	  r:   c                      dS rY	  r4   r4   r:   r8   rh   z(checkLSTMBackwardSizes.<locals>.<lambda>  s    r r:   r   r3   rC  c                      dS rY	  r4   r4   r:   r8   rh   z(checkLSTMBackwardSizes.<locals>.<lambda>  s    R r:   )rY   rj   r   r   r   )grad_hygrad_cyr	  r	  r	  defined_gradexp_sizes          r8   checkLSTMBackwardSizesrg	    s2   %177wL	L!!##q(**555  ""HW\\^^x/<<<W\\^^x/<<<	Lh&

333	Lh&

333	LA%zz222	L""hqkHQK&?!&CCZZPPPPPr:   c                     | |dS t          | ||||           t          j        |t                    }t          j        |t                    }|r|                    dd          nd }|||fS )NNNNr   r   F)r  )rg	  rY   r   legacy_contiguous_memory_formatr+  )	rc	  rd	  r	  r	  r	  has_bias
grad_gatesgrad_cxr  s	            r8   #_thnn_fused_lstm_cell_backward_implrn	    s    7?7GRY???!!@  J r1PQQQG4<F
q%000$Iw	))r:   c                 `   d }d }d }|d         r'|                     |                                           }|d         s|d         re|                     |                    d          |                     d          f          }|                     |                    d                    }|||fS )Nr   r3   r  r   r@  )r.  r-  r/  r2  r;  grad_weightr  s          r8   linear_backwardrq	    s    JKI1~ ;!++FKKMM::
1~ BQ B",,l.?.?.C.CV[[QS__-UVV **<+<+<R+@+@AA	Y//r:   c                     t           j                  dk    r j        d         ||z  z  dk    st          d j         d|           d  fd} j        d         ||z  z  } j        d         |z  } j        d	         |z  }g  j        d d         |||R }                     |          }|                     |            
          }|S )Nr  rz  r   z'Invalid input shape for pixel_shuffle: z with upscale_factor = c                 \    t           j                            |           t           j        k    S rD   r  r  s    r8   r  z,meta_pixel_shuffle.<locals>.is_channels_last  s!    "88==ATTTr:   c                  $               r+t                    dk    rt          j        S t          j        S                     t          j                  rt          j        S                     t          j                  rt          j        S d S )Nr   r   )r   rY   r   r  r  preserve_format)r  r   s   r8   r  z.meta_pixel_shuffle.<locals>.pick_memory_format  s    D!! 	)4  F**..**e.EFF 	)**e.CDD 	)((	) 	)r:   r  r   r   )r   r   r   r   rd  )	r   upscale_factorr  r!  HrWrr   r   r  s	   `       @r8   meta_pixel_shufflery	    s    	DJ!
2.>2Q RVW W WidjiiYgii
 
 	
U U U	) 	) 	) 	) 	) 	) 	
2>N:;A	B.	(B	B.	(B-$*SbS/-1-b-"--I
..
#
#C
&&1133&
4
4CJr:   c                 L   |                      | j                  }|                     |j                  }|                     |j                  }|                     |j                  }|                     |j                  }|                     |j                  }|||||||fS rD   r  )r   weight0weight1weight2weight3r5	  cx_tmpr   hy_cy_grad_output_r_optgrad_hy_r_optgrad_cy_r_optr   r  r	  r	  r7	  r 	  r!	  r3  r	  r	  diff_xdiff_hxdiff_cxdiff_w1diff_w2diff_bs                                r8   mkldnn_rnn_layer_backwardr	    s    4 __U[))FmmCI&&Gv|,,G..G..Gw}--F7GVVWgEEr:   )	out_int32rightc                t    t          j        | |rt           j        nt           j        t           j                  S )Nra   r   )rY   r   r  r   r   r   
boundariesr	  r	  s       r8   meta_bucketizer	    s7     &7ekkEK-   r:   r	  r	  r	  c                `    |                     d|rt          j        nt          j                  S )Nr4   r   )r   rY   r  r   r	  s       r8   meta_bucketize_scalarr	     s4     
&7ekkEK     r:   d   c                     dt                     dk    r*t          j                                          fd           t                     dk    r(                                 rt	          j        d           t          j        t          t                    fd           t          j        dk    fd           t          j        t          t                    fd	           t          j        t          t                    fd
           t          j        k    fd           t          j	         j
         j                  S )Nzhistc()r   c                      d j          dS )Nz%"histogram_cpu" not implemented for 'r)  r   r,  s   r8   rh   zmeta_histc.<locals>.<lambda>5  s    LekLLL r:   r   z%_histc_cuda with floating point inputc                  ,     dt                      S )Nz#: argument 'bins' must be int, not rH  binsr  s   r8   rh   zmeta_histc.<locals>.<lambda>;  s    7KKtDzzKK r:   r   c                       d  S )Nz: bins must be > 0, but got r4   r	  s   r8   rh   zmeta_histc.<locals>.<lambda>=  s    g#Q#Q4#Q#Q r:   c                  ,      dt                     S )Nz%: argument 'min' must be Number, not rH  )r  r   s   r8   rh   zmeta_histc.<locals>.<lambda>@      7LLcLL r:   c                  ,      dt                     S )Nz%: argument 'max' must be Number, not rH  )r  r  s   r8   rh   zmeta_histc.<locals>.<lambda>D  r	  r:   c                        dS )Nz: max must be larger than minr4   )r  s   r8   rh   zmeta_histc.<locals>.<lambda>F  s    %N%N%N r:   r   )r   rY   rj   r  rS   r&  rs   r   r   r   r   ra   )r   r	  r   r  r  s   ````@r8   
meta_histcr	  .  s|    G5U""##%%LLLL	
 	
 	
 5V##(?(?(A(A#%&MNNN	L4!!KKKKK   
LQQQQQRRR	L3LLLLL   
L3LLLLL   
LNNNNOOO;tELDDDDr:   c                     t                                           |d          }t          j                                         dk    p2t          d                                  dd          D                        fd                                |                              t          j	                             S )Nr  r  r   c              3   "   K   | ]
}|d k    V  dS r  r4   )rP   r   s     r8   rv   z,meta_upsample_bimode2d_aa.<locals>.<genexpr>\  s&      !H!Ht$(!H!H!H!H!H!Hr:   r3   c                  2    d                                   S r  r   r,  s   r8   rh   z+meta_upsample_bimode2d_aa.<locals>.<lambda>]  r  r:   r   )
r  r   rY   rj   r   rK  r   rd  rS   r   )r   r  r  r  r  r  s   `     r8   meta_upsample_bimode2d_aar	  J  s     -

kA   
LHc!H!Huzz||ABB7G!H!H!HHH____   ??+,,//1%88 0   r:   c                 f    t          ||d          t          j         j        dk     fd           t	          d          D ]1t          j         j                          k     fd           2                     |                              t          j	                             S )Nr  r  rC  c                      d j          S r  rB  r  s   r8   rh   z4meta_upsample_bimode2d_aa_backward.<locals>.<lambda>w  r  r:   c            
      R    d d           d d                                S )NzD
Expected grad_output to have the same shape as output; output.size(r  z
but got grad_output_size(r   r  s   r8   rh   z4meta_upsample_bimode2d_aa_backward.<locals>.<lambda>|  sR     9DE9 9K[\]K^9 99 9!,!1!1!!4!49 9 r:   r   )
r  rY   rj   r   r   r   r   rd  rS   r   )r8  r  r  r  r  r  r  r   s   `     @@r8   "meta_upsample_bimode2d_aa_backwardr	  d  s     -K!   
LAkkkk   1XX 
 
a $4Q$779 9 9 9 9 9	
 	
 	
 	
   ,,//1+>> 0   r:   c                 6   t          j        |                                dk    d            t          j        |                                dk    d            t          j        |j        j        d            t          j        |j        j        d            d S )Nr3   c                      dS )Nz%found_inf must be a 1-element tensor.r4   r4   r:   r8   rh   z<_amp_foreach_non_finite_check_and_unscale_.<locals>.<lambda>      (O r:   c                      dS )Nz%inv_scale must be a 1-element tensor.r4   r4   r:   r8   rh   z<_amp_foreach_non_finite_check_and_unscale_.<locals>.<lambda>  r	  r:   c                      dS )Nz!found_inf must be a float tensor.r4   r4   r:   r8   rh   z<_amp_foreach_non_finite_check_and_unscale_.<locals>.<lambda>      3 r:   c                      dS )Nz!inv_scale must be a float tensor.r4   r4   r:   r8   rh   z<_amp_foreach_non_finite_check_and_unscale_.<locals>.<lambda>  r	  r:   )rY   rj   r   ra   r  )r   rD  	inv_scales      r8   *_amp_foreach_non_finite_check_and_unscale_r	    s    	LQ O O   
LQ O O   
L)33   
L)33    r:   c                 *    t          j        |           S rD   rX  )r   nanposinfneginfs       r8   
nan_to_numr	    r  r:   c                 
   | j         t          j        t          j        t          j        t          j        hv rt          d| j          d          | j        }t          ||          }t          ||          }||k    r| S t          | 
                                          }t          |                                           }||         ||         c||<   ||<   ||         ||         c||<   ||<   |                     ||           | S )Nz>torch.transpose_: in-place transposition is not supported for z layout)r   rY   r  
sparse_cscr  
sparse_bscr   r   r   r   r   r   r   )r   dim0r  ndimsr   r   s         r8   r  r    s   {	   aT[aaa
 
 	
 IE$&&D$&&Dt||		D$++--  F!'vd|F4L&,!$ZdDJT
T6"""Kr:   c                 <   | j         }| j        rK|                                 }|                                 }|dk    r|dk    st	          d| d| d          n+|                                 dk    rt	          d| d          t          | d|dk     rdnd          S )	Nr  r   zEt_ expects a tensor with <= 2 sparse and 0 dense dimensions, but got z sparse and z dense dimensionsz6t_ expects a tensor with <= 2 dimensions, but self is r  r3   )r   r.  r0  r1  r   r   r  )r   r	  r0  r1  s       r8   t_r	    s    IE~ __&&
NN$$	aINN P%P P3<P P P   %3 88::>> QQQQ   dAEAIIqq1555r:   )r	  r	  sidesorterc                    t          j        t           j                  dk    p j        d d         j        d d         k     fd           t          j        d u p j        j        k     fd           t          j        |dk    p| d            |rt           j        nt           j        }t          t           j                  r!t          j        |t           j	                  S t          j
        d| j        	          S )
Nr3   r   c                  \    dt          j                   dt           j                   S )Nztorch.searchsorted(): boundaries tensor should be 1 dimension or the first N-1 dimensions of boundaries tensor and input value tensor must match, but we got boundaries tensor z and input value tensor rI  )r   sorted_sequences   r8   rh   z#meta_searchsorted.<locals>.<lambda>  s>    5378M3N3N5 5 #'tz"2"25 5 r:   c                  d    dt           j                   dt          j                  ng  S )Nz[torch.searchsorted(): boundary and sorter must have the same size, but got boundary tensor z and got sorter tensor rI  )r	  r	  s   r8   rh   z#meta_searchsorted.<locals>.<lambda>  sK    B#'(=#>#>B B%+%7tFL!!!RB B r:   r   c                      dS )Nzetorch.searchsorted(): side and right can't be set to opposites, got side of left while right was Truer4   r4   r:   r8   rh   z#meta_searchsorted.<locals>.<lambda>  s	     $ r:   r	  r4   r  )rY   rj   r   r   r  r   rs   r   r   r   r   r   )r	  r   r	  r	  r	  r	  ra   s   ``   ` r8   meta_searchsortedr	    sF    
LO!""a' 	9 "%CRC8	
 	
 	
 	
 	
	 	 	 
L$?//6<?	
 	
 	
 	
 	
   
L#e)	$ 	$   %5EKK%+E$%% KU-D
 
 
 	
 {2U?3IJJJJr:   c                 ~     t          j         t           j        t           j        t           j        fv fd           d S )Nc                      d  S )Nz/Unsupported input type encountered for isin(): r4   r   s   r8   rh   z3_check_for_unsupported_isin_dtype.<locals>.<lambda>   s    I%II r:   )rY   rj   r.  
complex128	complex64r   s   `r8   !_check_for_unsupported_isin_dtyper	     sC    	Lej%"2EODDIIII    r:   c                 Z    |                      ||                     d          f          }|S )Nr   r@  )r8  r   num_weightsr  r  rp	  s         r8   meta_embedding_dense_backwardr	     s0     ''k6F6Fr6J6J(KLLKr:   c                 |    |	r#t                               | ||||||||
|
  
        S t          | ||||||||
|
  
        S rD   )r-   _embedding_bag_sparse_backward!meta_embedding_bag_dense_backward)r  r   r  r  r  maximum_indicesr	  r  r  r  r  r  s               r8   meta_embedding_bag_backwardr	     st      
22
 
 	
 1
 
 	
r:   c
                 4    t          j         j        t           j        t           j        t           j        t           j        fv  fd           |t          k    rt          j        |d u                                | 	                    d          f          }
|
S )Nc                      d j          S )Nz$Unsupported input type encountered: r   )r  s   r8   rh   z3meta_embedding_bag_dense_backward.<locals>.<lambda>Q   s    CtzCC r:   r3   )
rY   rj   ra   r  r  r  float64r  r   r   )r  r   r  r  r	  r	  r  r  r  r  index_grad_weights   `          r8   r	  r	  B   s     
L
u}enemU]SSCCCC   x_D0111TYYq\\'BCCr:   c                    |                      d          }t          j        |t          k    d            t          j        |                                 dk               t          j        |                                dk               |                     d          }t          j        |                                dk               t          j        |                     d          |k               |                     |f          }	|	S )Nr3   c                      dS )NzHembedding_bag_backward: per_sample_weights only supported for mode='sum'r4   r4   r:   r8   rh   z@meta_embedding_bag_per_sample_weights_backward.<locals>.<lambda>f   rW  r:   r  r   )r   rY   rj   r  r   r   )
r  rv  r   r  r  r  r  embedding_featuresr  r   s
             r8   .meta_embedding_bag_per_sample_weights_backwardr	  Y   s     1	LZZ   
Lq!!!	L!#$$$,,q//K	L"###	LQ#55666^^[N++FMr:   )assume_uniqueinvertc                   t          j        t          | t                    pt          |t                    d            t          | t                    st          j        | |j                  } t          |t                    st          j        || j                  }t          | j                   t          |j                   t          j        | t           j	                  S )Nc                      dS )Nz<At least one of elements and test_elements must be a Tensor.r4   r4   r:   r8   rh   zmeta_isin.<locals>.<lambda>v   r  r:   r@  r   )
rY   rj   rs   r   rm  r   r	  ra   r   r.  )elementstest_elementsr	  r	  s       r8   	meta_isinr	  q   s     
L8V$$I
=&(I(INN   h'' G<1EFFFmV,, L]8?KKK%hn555%m&9:::HEJ7777r:   r  c                     t          j        | dk    d            t          |t          j                  \  }}t          j        ||          S )Nr   c                      dS )Nz,polygamma(n, x) does not support negative n.r4   r4   r:   r8   rh   z meta_polygamma.<locals>.<lambda>   s    !O r:   r  r   )rY   rj   r   r   r  r   )r  r   rV   rQ   s       r8   meta_polygammar	     sW     
LaOOPPP(;H  OA| D5555r:   c                      t          d          )Nz.Tensor.item() cannot be called on meta tensors)rc  r   s    r8   meta_local_scalar_denser	     s    
G
H
HHr:   c                 *    t          j        |           S rD   rX  r   s    r8   silur	     r  r:   c                 j    t          | t          j                  \  }}t          j        | |          S r  )r   r   r  rY   r   )r   rV   rQ   s      r8   sigmoidr	     s=     );H  OA| D5555r:   c                    |                                  dk    }|                                 dk    }|r|r@|                    d          |                     d          |                    d          g}nVt          j        |                    d          |                    d          k    d            |                     d          |                    d          g}n|rkt          j        |                    d          |                     d          k    d            |                     d          |                    d          g}n~t          j        |                     d          |                    d          k    d            |                     d          |                     d          |                    d          g}|p| j        }t          j        j        rUd|j        z  }|d         |z   dz
  |z  |z  }||k    r|d         |z  |dg}	n|dg}	t          j        ||	|| j	        	          }
nt          j
        ||| j	        	          }
|
S )
Nr  r   r3   c                      dS Nz matrix batch sizes have to matchr4   r4   r:   r8   rh   z2_create_grouped_mm_output_tensor.<locals>.<lambda>       6X r:   r   c                      dS r	  r4   r4   r:   r8   rh   z2_create_grouped_mm_output_tensor.<locals>.<lambda>   r	  r:   c                      dS )Nzbatched dimension has to matchr4   r4   r:   r8   rh   z2_create_grouped_mm_output_tensor.<locals>.<lambda>   s    6V r:   ro  r  )r   r   rY   rj   ra   r  r   itemsizerd  r   r   )r  r  offsrz  
mat1_is_2d
mat2_is_2dr(	  	alignmentsize_paddedr  r   s              r8    _create_grouped_mm_output_tensorr	     s1   qJqJ C 	5		!diillDIIaLLAHHL		!		!,.X.X   		!diimm4HH 
	CL		!		!,.X.X   		!diill3HH L		!		!,.V.V   		!diillDIIbMMBH'TZI} I),,	|i/!3	AIM##"1+3[!DJJ%q)J!j	$+
 
 
 k()DKHHHJr:   mat_amat_br	  c	                     t          j        d u d u k    d            d uod u}	|	rt           j        }
t           j        j        rPt           j                                        r2dt           j                            d          j        v rt           j	        }
t          j         j
        |
k    o
j
        |
k     fd           nAt          j         j
        t           j        k    oj
        t           j        k     fd           t          j                                         dv o                                dv  fd                                            dk    }                                dk    }|r|s@t          j                             d	                              d
          k    d            |	rHd }d }t          j         |            fd           t          j         |          fd           d } |d             |d           t          j        j
        t           j        k    rj
        t           j        k    p)j
        t           j        k    oj
        t           j        k    fd           j
        t           j        k    oj
        t           j        k    dfd	}|r|rj        d         nd} |d d|            |dd|           t          j        |d u d            |s|rwt          j        d u fd           Yt          j                                        dk    fd           t          j        j
        t           j        k    fd           nt          j        d u d            t          j        |d u d            t          j        |d u p|t           j        k    d            t%           |          S ) Nc                      dS )Nz,Either both scale factors are given, or noner4   r4   r:   r8   rh   z)_meta_grouped_mm_common.<locals>.<lambda>   s    > r:   gfx94r   c                  *    d j          dj          dS )Nz5Expected inputs of E4M3 FP8 type but got mat_a.dtype= and mat_b.dtype=r  r   r	  r	  s   r8   rh   z)_meta_grouped_mm_common.<locals>.<lambda>   s!    xEKxxjojuxxx r:   c                  *    d j          dj          dS )Nz1Expected inputs of BF16 type but got mat_a.dtype=r	  r  r   r	  s   r8   rh   z)_meta_grouped_mm_common.<locals>.<lambda>   s!    tttfkfqttt r:   )r  r0   c                  \    d                                   d                                  S )Nz3Multiplicands must be 2D or 3D but got mat_a.dim()=z and mat_b.dim()=r   r	  s   r8   rh   z)_meta_grouped_mm_common.<locals>.<lambda>   s-    qeiikkqqdidmdmdodoqq r:   r  r   r  c                      dS )Nz3contraction dimension of mat_a and mat_b must matchr4   r4   r:   r8   rh   z)_meta_grouped_mm_common.<locals>.<lambda>   r  r:   c                 Z    |                                  }|d         dk    o|d         dk    S Nr  r3   r   r  mat
mat_strides     r8   r.  z-_meta_grouped_mm_common.<locals>.is_row_major!  s,    Jb>A%=*R.A*==r:   c                 Z    |                                  }|d         dk    o|d         dk    S r
  r  r
  s     r8   r1  z-_meta_grouped_mm_common.<locals>.is_col_major!  s,    Jb>Q&=:b>A+==r:   c                  B    d                                  dd           S )NzNExpected mat_a tensor to be row major in the last two dimensions, got strides r  r  )r	  s   r8   rh   z)_meta_grouped_mm_common.<locals>.<lambda>!  s)    zejeqeqesestvtwtwexzz r:   c                  B    d                                  dd           S )NzQExpected mat_b tensor to be column major in the last two dimensions, got strides r  r  )r	  s   r8   rh   z)_meta_grouped_mm_common.<locals>.<lambda>!  s)    }hmhththvhvwywzwzh{}} r:   c                 D                                     dz
  d                                z  }                                dz
           dk    rQ         t          dj        dz
                     k    r)t          j                 |z  dk     fd           d S          dk    rTdz
           t          dj                           k    r,t          j        dz
           |z  dk     fd           d S t          j        dfd           d S )Nr3   ro  r   c                  (    d d  d           dS )Nr	   stride along % dim to be multiple of 16 bytes, got r  r4   end_dimmat_namer
  s   r8   rh   zF_meta_grouped_mm_common.<locals>.check_valid_strides.<locals>.<lambda>!  sB      AH  A  AG  A  Ajtu|j}  A  A  A r:   c                  4    d d dz
   d dz
            dS )Nr	  r
  r3   r
  r  r4   r
  s   r8   rh   zF_meta_grouped_mm_common.<locals>.check_valid_strides.<locals>.<lambda>%!  s\      IH  I  IGaK  I  Inx  zA  DE  zE  oF  I  I  I r:   Fc                       d d j          dS )NzInvalid strides/sizes, got z for strides and z for sizes.r-  r
  s   r8   rh   zF_meta_grouped_mm_common.<locals>.check_valid_strides.<locals>.<lambda>*!  s    ijiiSVS\iii r:   )r   element_sizer   r  r   rY   rj   )r
  r
  r	  r
  r
  s   `` @@r8   check_valid_stridesz4_meta_grouped_mm_common.<locals>.check_valid_strides!  s   ''))a-#**,,,	ZZ\\
gk"a''Jw,?3sy1%D
 D
 -
 -
 L7#i/14 A  A  A  A  A  A      A%%*Wq[*ASsy!F
 F
 +
 +
 L7Q;')3q8 I  I  I  I  I  I    
 Liiiii    r:   r	  r	  c                  *    d j          dj          dS )NzhFor FP8 scales must both be float32, or for MXFP8 both scales must be float8_e8m0fnu. Got scale_a.dtype=z and scale_b.dtype=r  r   rN  s   r8   rh   z)_meta_grouped_mm_common.<locals>.<lambda>7!  sE      r  @G  @M  r  r  bi  bo  r  r  r r:   r3   c                    	
                                  dk    rt          j                                         fd           rDt          j                                                                          k     fd           d S t          j                                         dk     fd           t          j        j        d         j                 z  k     fd           d S t          j                            d          dk     fd	           t          j        j        d         j        d         k     fd
           rt          j        j        j        dz
  k     fd           j        \  }}d}t          ||z  d          	t          |d          
t          j        j        d         k    oj        d         	
z  k    	
fd           d S t          j                                         dk     fd           t          j        j        d         j        dz            k     fd           d S )Nr  c                      d  dS )Nr	  z to be contiguous.r4   
scale_names   r8   rh   z>_meta_grouped_mm_common.<locals>.check_scale.<locals>.<lambda>B!  s    F
FFF r:   c                  .    d d j          dj          S )NzKFor MXFP8, scale must have same number of dimensions as target tensor, but  has mat.ndim= and scale.ndim=rB  r
  r  r
  s   r8   rh   z>_meta_grouped_mm_common.<locals>.check_scale.<locals>.<lambda>K!  sN      !onx  !o  !o  IL  IQ  !o  !o  ch  cm  !o  !o r:   r3   c                  :    d d                                   dS )Nr	  z to be 1D tensor, but got 	D tensor.r   r  r
  s   r8   rh   z>_meta_grouped_mm_common.<locals>.check_scale.<locals>.<lambda>P!  (     hJ h hRWR[R[R]R] h h h r:   r   c                  N    d d j                  z   dj         d          dS )Nr	  z	 to have rI  r   z
 elements.r-  )r
  r  scale_multiplierr
  
scaled_dims   r8   rh   z>_meta_grouped_mm_common.<locals>.check_scale.<locals>.<lambda>T!  s^      !UJ  !U  !U:AVYiAi  !U  !Uz  {F  GH  {I  !U  !U  !U r:   r   c                      d  dS )Nr	  z( to be contiguous in the last dimension.r4   r
  s   r8   rh   z>_meta_grouped_mm_common.<locals>.check_scale.<locals>.<lambda>Y!  s    \
\\\ r:   c                  H    d d j         d          dj         d          dS )Nr	  z batch dimension to be r   , got r  r-  r
  s   r8   rh   z>_meta_grouped_mm_common.<locals>.check_scale.<locals>.<lambda>]!  s4    p
pp39UV<pp_d_jkl_mppp r:   c                  .    d d j          dj          S )Nz0For MXFP8, 3d tensor should have 2d scales, but r
  r
  rB  r
  s   r8   rh   z>_meta_grouped_mm_common.<locals>.check_scale.<locals>.<lambda>d!  sF      !TS]  !T  !Tmpmu  !T  !T  HM  HR  !T  !T r:   rp  rC  r  c                  :    dj          d  dz   dj          S )NzFor MXFP8, expected mat.shape=z to have scale shape of (,z), but got r-  )G	blocked_K	blocked_Nr
  r  s   r8   rh   z>_meta_grouped_mm_common.<locals>.check_scale.<locals>.<lambda>n!  sY      !Y  !Y  !Yef  !Y  !Yiru~i~  !Y  !Y  LQ  LW  !Y  !Y r:   c                  :    d d                                   dS )Nr	  z to be 2D tensor, but got r 
  r   r!
  s   r8   rh   z>_meta_grouped_mm_common.<locals>.check_scale.<locals>.<lambda>s!  r"
  r:   c                  N    d d j         dz             dj         d          dS )Nr	  z non-batch dimension to be r3   r(
  r  r-  )r
  r  r
  r%
  s   r8   rh   z>_meta_grouped_mm_common.<locals>.check_scale.<locals>.<lambda>w!  s]      !FJ  !F  !FSVS\]^ak]kSl  !F  !Ftyt  AB  uC  !F  !F  !F r:   )r   rY   rj   r  r   r   r   r?   )r
  r  r
  r%
  r$
  r  r  r  r,
  r-
  r.
  is_mxfp8s   `````   @@@r8   check_scalez,_meta_grouped_mm_common.<locals>.check_scale>!  sR   wwyyA~~''))FFFF    L		swwyy0 o  o  o  o  o  o    
 L		q(hhhhh   LA#)J*?BR*RR U  U  U  U  U  U  U  U    
 LL$$)\\\\   KNcil2pppppp    L
chl2 T  T  T  T  T  T   "iGAq!!#J (Z ; ;I (C 0 0ILA!+WA)iBW0W Y  Y  Y  Y  Y  Y  Y  Y    
 L		q(hhhhh   LA#)A
N*CC F  F  F  F  F  F  F    r:   r  r  c                      dS )Nz:Scale result tensor provided, but it is not supported yet.r4   r4   r:   r8   rh   z)_meta_grouped_mm_common.<locals>.<lambda>!  rp   r:   c                  ^    d                                   d                                  dS )Nz/Offsets tensor not provided, but is needed for zD/zD multiplicand layouts.r   r	  s   r8   rh   z)_meta_grouped_mm_common.<locals>.<lambda>!  s0    yeiikkyyUZU^U^U`U`yyy r:   c                  4    d                                   dS )Nz.Offsets tensor must be 1D, but got offs.dim()=r  r   r	  s   r8   rh   z)_meta_grouped_mm_common.<locals>.<lambda>!  s    VVVV r:   c                      d j          dS )Nz7Offsets tensor must be integer (int32) tensor, but got r  r   r6
  s   r8   rh   z)_meta_grouped_mm_common.<locals>.<lambda>!  s    _RVR\___ r:   c                      dS )NzJOffsets tensor provided, but is not needed for 3D/3D multiplicand layouts.r4   r4   r:   r8   rh   z)_meta_grouped_mm_common.<locals>.<lambda>!  s    ` r:   c                      dS )Nz2Bias tensor provided, but it is not supported yet.r4   r4   r:   r8   rh   z)_meta_grouped_mm_common.<locals>.<lambda>!  s    D r:   c                      dS )Nz4If output dtype provided, it must be torch.bfloat16.r4   r4   r:   r8   rh   z)_meta_grouped_mm_common.<locals>.<lambda>!  s    F r:   rB  )rY   rj   r  r  r  r   r   get_device_propertiesgcnArchNamer!  ra   r  r   r   r  r[  r   r  r	  )r	  r	  r  r  r	  rx  r  rz  r  scaled	fp8_dtypemat_a_is_2dmat_b_is_2dr.  r1  r
  r2
  r$
  r1
  s   `````             @r8   _meta_grouped_mm_commonrA
     s    
L	Dgo.>>   D 8WD%8F  
'	M	.
''))	. 5:;;A>>JJJ-IK9$A	)Axxxxx	
 	
 	
 	

 	K5>)KekU^.Kttttt	
 	
 	

 
L		v7%))++"7qqqqq  
 ))++"K))++"K 
k 
JJrNNejjnn,II	
 	
 	

  
	> 	> 	>	> 	> 	> 	Lzzzz	
 	
 	
 	L}}}}	
 	
 	

  0 ''''''w2]em+N0N !55 :MU%99 r  r  r  r  r	
 	
 	
 MU11 6!55 	
:	 :	 :	 :	 :	 :	z "-+-+-DJqMMST 	 	Iwq2BCCCIwq2BCCCD PP	
 	
 	

  
k 
yyyyy	
 	
 	
 L

aVVVV   L
ek)____  
 	DL``	
 	
 	

 
LDD  
 
LT8Y%.8FF  
 ,E5$	JJJr:   c           
      0    t          | |d d ||d |          S )N)r  r  r	  rx  r  rz  )rA
  )r	  r	  r	  rx  rz  s        r8   meta_grouped_mmrC
  !  s3     #	 	 	 	r:   c	                 N    |pt           j        }t          | ||||||||	  	        S )N)r  r  r	  rx  r  rz  r  )rY   r  rA
  )	r	  r	  r  r  r	  rx  r  rz  r  s	            r8   meta_scaled_grouped_mmrE
  !  sB     +U^I"!%
 
 
 
r:   c                 L   t          |          t          d          k    r1| D ].}t          j        |                                dk    d            /g }| D ]M}||n|j        }|j        rt          |          }|                    |                    d|                     N|S )Ninfr   c                      dS )Nz:_foreach_norm cannot compute infinity norm on empty tensorr4   r4   r:   r8   rh   z#meta_foreach_norm.<locals>.<lambda>!  s    T r:   r4   r   )	r]   rY   rj   r   ra   r   r   r  r   )tensorsordra   r  resultsrz  s         r8   meta_foreach_normrL
  !  s    SzzU5\\!! 	 	AL		ATT    G 9 9".EEAG	 	<0;;Iq{{2Y{778888Nr:   r=   half_to_floatc                    |r7| j         t          j        t          j        fvrt	          d| j          d          t          j        | t
          j        j                  \  }}|s|n|}t          j	        | |t          j
                  }|S )Nz%half_to_float is True but x.dtype is z, expected half or bfloat16r  r	  )ra   rY   r[   r  r   rS   r   r   rT   r   r   )r=   r   rM
  computation_dtyperQ   r  s         r8   softmaxrP
  !  s      75:u~666 \\\\   ',&>	uDL' ' '#| (5K<<:KL

1L@W
X
X
XCJr:   c           	         	
 t          j        t                    dz  dk    fd           | j        t                    
t                    dz  }
|z
  	t          j        
|k    
fd           t	          d D                       r| }t          	
          D ]d
z
  dz
  z           dk     r2|                              |j                          z             }dz            dk     r.|                    d|j                 dz            z             }|                                S t          d 	                   }t          |          D ]ot                    dz   dz  z
  	z                     z   dz            z   }t          j        |dk    	fd           |	                    |           pt          j
        || j        | j        | j        t          |                     S )	Nr  r   c                  (    dt                      S )Nz1Length of pad must be even but instead it equals r  r  s   r8   rh   z'_constant_pad_nd_meta.<locals>.<lambda> "  s    NCHHNN r:   c                  0    dt                     d  dS )Nz`Length of pad should be no more than twice the number of dimensions of the input. Pad length is z while the input has z dimensions.r  )l_inpr  s   r8   rh   z'_constant_pad_nd_meta.<locals>.<lambda>
"  s/     25c((    r:   c              3   V   K   | ]$}t          |t          j                  o|d k    V  %dS r  )rs   rS   IntWithoutSymInt)rP   r  s     r8   rv   z(_constant_pad_nd_meta.<locals>.<genexpr>"  s8      
I
I:a/00;Q!V
I
I
I
I
I
Ir:   r3   c            	      X    d z             d          ddz             d z    d	S )NzThe input size z, plus negative padding r   r3   zG resulted in a negative output size, which is invalid. Check dimension z of your input.r4   )r   r  l_diffr  pad_idxs   r8   rh   z'_constant_pad_nd_meta.<locals>.<lambda>#"  sf     Mk&1*&= M M7|M M"%gk"2M M17!M M M r:   )ra   r   r   r   )rY   rj   r   r   rK  r   narrowr   r   r  r   ra   r   r   r   )r   r  rt  l_padc_input	new_shapenew_dimr   r  rX
  rT
  rY
  s    `     @@@@@r8   _constant_pad_nd_metar_
  !  s    
LC1NNNN  
 +KEHHMEU]F	L	 	 	 	 	   
I
IS
I
I
III vu%% 	T 	TA519q=)G7|a!..G}gmA&6W&E  7Q;!##!..Aw}Q/?#gPQkBR/RSS}}[&)**I5\\ 	" 	"c((q1uk*fqj)CL83w{;KKqLM M M M M M M M	
 	
 	
 	!!!!;k|)+E22   r:   r  r  r  c                 P   |                                  dk    r%t          d|                                   d          | j        }|j        }|j        dk    r
|d         f}n)|j        dk    r|d         |d         f}ng ||d         R }| j        }|                     ||          S )Nr  z'weight' must be 2-D, got z-Dr   r3   r   )r   r   r   r   ra   r   )	rv  r   r  r  r  weight_shapeindices_shaper   rz  s	            r8   	embeddingrc
  2"  s     zz||qJ&**,,JJJKKK<LMM|q&21o%7					"1%|A7		5m5\!_55	IIY777r:   max_lengthspadding_valuec                 \   t          |          dk    r t          dt          |           d          t          |          dk    r t          dt          |           d          |d         j        d         dz
  }|d         }||g| j        dd          R }|                     |          S )Nr3   z&Only one jagged dim is supported, got z offsetsz max_lengthsr   )r   r   r   r   )r   r  rd
  re
  r  r  r  s          r8   $meta__jagged_to_padded_dense_forwardrg
  K"  s     7||qKS\\KKK
 
 	
 ;1SS5E5ESSS
 
 	
 	
aAAAq,6<+,,LL)))r:   c                 ^    t          |           t                      d                         }|S )Nc                 8    t          | t          j                  S rC  rW   r   r  rO  s    r8   _fz)_create_unary_float_meta_func.<locals>._fc"  s$      =J
 
 
 	
r:   rK   r$   funcrk
  s     r8   _create_unary_float_meta_funcro
  b"  9    4]]
 
 ] 

 Ir:   c                     | j         s|j         s|j         rt          d          |                                 dk    r/|                     | j                  |                     d          fS |                     d          }|                     d          }|                    d          }|                     |||          }|
r3|r|                     |||          }n.|                     ||||          }n|                     d          }||fS )NzP_native_multi_head_attention fake implementation does not support nested tensorsr   r3   )	is_nestedr  r   r   r   r   )r  r   rt  	embed_dimr  
qkv_weightqkv_biasproj_weight	proj_biasr  need_weightsaverage_attn_weights	mask_typer  T
output_dimr   attn_weightss                     r8    native_multi_head_attention_faker~
  n"  s      
#- 
5? 
!^
 
 	
 {{}},,eooa.@.@AA

1A

1A !!!$$J__Q:..F 
* 	> !??1a33LL !??1h1==LLq))L!!r:   c                 ^    t          |           t                      d                         }|S )Nc                 :    t          | |t          j                  S rC  rj
  r<   s     r8   rk
  z*_create_binary_float_meta_func.<locals>._f"  s&      q!@!M
 
 
 	
r:   rl
  rm
  s     r8   _create_binary_float_meta_funcr
  "  rp
  r:   c                      t                      fd            } j         d}||_         t          t          t          |                    |          }|S )Nc                 T     | g|R i |}t          | j        |j                   | S rD   rs  )r   rU   rA  r   rF   s       r8   _fnz#_register_inplace_meta.<locals>._fn"  s;    b''''''
CI666r:   rV   )r   r}   rK   getattrr-   )rF   r
  inplace_names   `  r8   _register_inplace_metar
  "  sk    
2YY    Y
 k$$$LCL
4-l33
4
4S
9
9CJr:   c                 B    t          j         j        j        k     fd            g}t          t                    rGj        dk    r't          j         j        j        k     fd           |                               t          |dt          j	        iS )Nc                  (    dj          d j          S )Nr  z for `end`, but got dtype r   )r~   r   s   r8   rh   zlerp.<locals>.<lambda>"  s    T%+TTTT r:   r   c                  (    d j          dj          S )Nr  z for `weight`, but got dtype r   )r   rv  s   r8   rh   zlerp.<locals>.<lambda>"  s    b%+bbTZT`bb r:   rL   )
rY   rj   ra   rs   r   r   r  rW   r   rT   )r   r~   rv  rU   s   ``` r8   lerpr
  "  s     
Lsy TTTTT   3<D&*%% ;!Lv|+bbbbb   	F	=E  r:   )rt  c                <    t          | ||t          j                  S rC  rD  r   tensor1tensor2rt  s       r8   addcmulr
  "  s(     w0O0W   r:   c                    t          j        t          j        |j                  ot          j        |j                   d            t          | ||t          j                  S )Nc                      dS )N)zFInteger division with addcdiv is no longer supported, and in a future zErelease addcdiv will perform a true division of tensor1 and tensor2. z4The historic addcdiv behavior can be implemented as zA(input + value * torch.trunc(tensor1 / tensor2)).to(input.dtype) zfor integer inputs and as z6(input + value * tensor1 / tensor2) for float inputs. z?The future addcdiv behavior is just the latter implementation: z4(input + value * tensor1 / tensor2), for all dtypes.r4   r4   r:   r8   rh   zaddcdiv.<locals>.<lambda>"  s	     	
 r:   r  )rY   rj   rS   ry  ra   rW   r   rT   r
  s       r8   addcdivr
  "  sr     
L"7=11 6&w}55	
		
 		
    w0O0W   r:   c                  N   i } dD ]#}t           |         }|D ]}|| vr||         | |<   $|                                 D ]\  }}t          |t          j        j                  r&t          |t                    st          dt          |                      |	                    t          j
        j        j                  |           t          j
                            |                                d          r"|t           d         v rt          | d          |j        r|                                dv rd|                                v rt"                              ||           2d|                                v rt&                              ||           ed	|                                v rt(                              ||           d
|                                v rt*                              ||           t,                              ||           d S )N)r   post_autogradpre_autogradz$op_overload must be OpOverload, got CompositeImplicitAutogradr   z is a CompositeImplicitAutograd op, we shouldn't register meta function for it. Instead, we should let the decomposition run and write meta kernels for the base operators.>   aten::cloneaten::copy_aten::rot90aten::_to_copyaten::empty_stridedaten::constant_pad_ndaten::as_strided_scatterzmkldnn::zmkl::zonednn::zquantized::)r   itemsrs   rY   _opsHigherOrderOperatorr   r   r|   py_impl_CDispatchKeyr/   %_dispatch_has_kernel_for_dispatch_keyr  rc  is_view2_meta_lib_dont_use_me_use_register_meta_for_mkldnnimpl/_meta_lib_dont_use_me_use_register_meta_for_mkl2_meta_lib_dont_use_me_use_register_meta_for_onednn5_meta_lib_dont_use_me_use_register_meta_for_quantized'_meta_lib_dont_use_me_use_register_meta)activate_meta_tabletypregistryopoop_overloadrF   s         r8   activate_metar
  #  s    9 9 9-c2 	9 	9C---+3C=#C(	9 /4466 9N 9NR
 k5:#ABB 	+z22 	 JtK7H7HJJ   	7EH0566r:::899 ;
 
 +	N 8@@@"" ; ; ;   A   	N 	 	 [--////BGGUWXXXXK,,....?DD[RTUUUU{//1111BGGUWXXXX+"2"2"4"444EJJ    8<<["MMMMs9N 9Nr:   r   )rN  rO  ri	  rD   )NNNFr   r3   r   r  )Tr	  )r  )r#  T)FF)TT)r  )FTN)TFF)TF)r  )r  N)r@   r  )r4   r   rB  F)r4   r   FTN)Fr   FNFr   )NF)r   F)rc  rd  FN)NNNNN)r   NNr3   )NNF)rN  FFN)NrN  FFN)rN  FNN)NrN  FNNF)rN  FN)FN)NNNNNNN)FNNNN)NNNF)NNNNF)Nr   FNN)NNNN)r   TT)NNr   N)r	  r   r   )r   )r  N)r   FF)rN  )NTTN(  r  collections.abcr   r   enumr   	functoolsr   typingr   typing_extensionsr   rY   torch._prims_commonr  rS   r	   r
   r   torch._decompr   r   r   r   
torch._opsr   torch._primsr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   torch._prims_common.wrappersr    r!   r"   r#   r$   r  r%   r&   torch.fx.experimentalr'   r  torch.nn.functionalr(   r)   torch.utilsr*   rH   r+   r,   opsr-   libraryLibraryr
  r   r  r  r  r9   r?   rK   rW   rc   rl   linspacelogspacer  r   taker3  r   r   r   r   cummaxcumminr   r   r   r   r   r.  r   _fft_c2cr  r   r  _fft_r2cr  randpermgenerator_outr  r   r  randintr%  r"  low_outr(  randr*  _philox_key_splitr4  _philox_key_fold_inr9  rM  rQ  rU  rW  rX  _fft_c2rr]  r  rj  rq  
unsqueeze_ru  _sparse_semi_structured_linearr  ra   r  _sparse_semi_structured_mmr  _sparse_semi_structured_addmmr  _cslt_sparse_mmr  index_reducer  index_reduce_r  index_selectr  segment_reducer  r  	unary_outr  r   r  r   r  r  r  r  r  _assert_asyncr  msgr  _printr  _make_dep_tokenr  r  _functional_sym_constrain_ranger  r  (_functional_sym_constrain_range_for_sizer  _functional_assert_asyncr  r   r  r   r  r  r  r'  _linalg_eighr(  r1  _linalg_eigvalslinalg_eigvalsr4  
linalg_eigr8  r<  r>  rD  rI  rM  rS  rf  linalg_inv_exri  linalg_ldl_factor_exri   ro  linalg_ldl_solver}  	linalg_lur  linalg_lu_factor_exr  linalg_lu_solver  	lu_unpackr  r  	linalg_qrr  r  r  _linalg_svdr  rz  rE  r  r  linalg_solve_triangularr  r  r  _linalg_detr  r  r  r   reflection_pad1dr%  replication_pad1dr/  r:  reflection_pad1d_backwardr?  replication_pad1d_backwardrA  rQ  reflection_pad2drS  replication_pad2drV  _weight_norm_interface_backwardr_  reflection_pad2d_backwardr;  replication_pad2d_backwardrf  rt  reflection_pad3drv  replication_pad3dry  reflection_pad3d_backwardreplication_pad3d_backwardr  _pdist_forwardr]   r  _pdist_backwardr  baddbmmr  	bernoullir  
bernoulli_r  r  r  poissonr  _fused_moving_avg_obs_fq_helperr  mmr  r  r   r  r  miopen_batch_normr  convolutionr  r
  _has_mkldnnr
  r  _convolution_pointwiser  _linear_pointwiser  has_mklr
  r   _mkl_linearr  r
  r  qconv2d_pointwiseqconv_pointwiserm  r)  binarybinary_tensorr4  qlinear_pointwiser9  r>  linear_dynamic_fp16linear_relu_dynamic_fp16r@  r
  rA  
max_pool2drL  int4mm_packed_weight_cpur\  ra  quantize_per_tensorrc  
avg_pool2dr  r  avg_pool2d_backwardr  
avg_pool3dr  avg_pool3d_backwardr  _adaptive_avg_pool2dr  _adaptive_avg_pool3dr  _adaptive_avg_pool2d_backwardr  _adaptive_avg_pool3d_backwardr  r  adaptive_max_pool2dr  r  r  adaptive_max_pool3dr  r  r  repeat_interleaver  rt   r  r  r  r   _unsafe_indexr%  convolution_backwardr6  addbmmr>  randint_likerB  _fused_adam__fused_adamw_rY  _fused_adamr`  _int_mmrh  _convert_weight_to_int4packrs  #_convert_weight_to_int4pack_for_cpurx  _weight_int4pack_mmr  _weight_int4pack_mm_for_cpur  r  r  r  _dyn_quant_pack_4bit_weightr  _dyn_quant_matmul_4bitr  _weight_int8pack_mmr  _cdist_forwardr  _cdist_backwardr  _embedding_bagr  _embedding_bag_forward_onlyr  r   nansumr"  median	nanmedianr$  
dim_valuesr  r   r'  logical_not_r)  repeatr2  zero_r4  mul_Scalardiv_logical_and_logical_or_logical_xor_r7  add_sub_rA  rc  subrF  rounddecimalsrH  rN  
__rshift__rR  
__lshift__rV  zerorX  r4  r[  fillr]  relu_r`  	_add_relurb  rrelu_with_noiseri  rrelu_with_noise_functionalrk  rrelu_with_noise_rm  	index_put_unsafe_index_putrq  masked_fill_ru  _masked_scalerx  masked_scatter_r|  masked_scatterr~  masked_scatter_backwardr  
index_put_r  r  bmmr  	dtype_outr  r  r  r}  r~  r  r  r  rF   max_pool2d_with_indices_backwardr  max_pool2d_with_indicesr  fractional_max_pool2dr  max_pool3d_with_indicesr   max_pool3d_with_indices_backwardr  r  r  r  grid_sampler_2d_backwardr  r%  r'  r*  r  onesr9  zerosr<  select_scatterr@  slice_scatterrD  r?  r   rK  rS  gatherrY  rh  ro  rq  r{  r~  scatter_addr  scatter_add_r  r}  rg  rt  r  value_reducer  scatter_r  #_scaled_dot_product_flash_attentionr  r  r  #_scaled_dot_product_cudnn_attentionr  0_scaled_dot_product_fused_attention_overrideabler  9_scaled_dot_product_fused_attention_overrideable_backwardr  ,_scaled_dot_product_flash_attention_backwardr  +_scaled_dot_product_flash_attention_for_cpur  4_scaled_dot_product_flash_attention_for_cpu_backwardr  *_scaled_dot_product_attention_math_for_mpsr  '_scaled_dot_product_efficient_attentionr  0_scaled_dot_product_efficient_attention_backwardr  ,_scaled_dot_product_cudnn_attention_backwardr  _flash_attention_forwardr  +_flash_attention_forward_no_dropout_inplacer  r  _flash_attention_backwardr  _efficient_attention_forwardr  _efficient_attention_backwardSymIntr  rb  
_scaled_mmrd  r  _scaled_mm_v2r  scatter_reducetwotwo_outr  scatter_reduce_r  multinomialr  r  r  r  _upsample_nearest_exact1dr  _upsample_nearest_exact2dr  "_upsample_nearest_exact2d_backwardr  _upsample_nearest_exact3dr   r  values_stabler  r	  _thnn_fused_lstm_cellr	  r,	  r/	  r9	  r>	  rA	  rC	  argminrD	  rG	  topkrN	  _segment_reduce_backwardrR	  kthvaluerV	  r   rj	  rg	  rn	  rq	  pixel_shufflery	  r	  	bucketize
Tensor_outr	  
Scalar_outr	  histcr	  _upsample_bilinear2d_aa_upsample_bicubic2d_aa_upsample_lanczos2d_aar	   _upsample_bilinear2d_aa_backward_upsample_lanczos2d_aa_backwardr	  r	  r	  r  r	  searchsortedr	  r	  embedding_dense_backwardr	  _embedding_bag_backwardr	  _embedding_bag_dense_backwardr	  *_embedding_bag_per_sample_weights_backwardr	  isinr	  	polygammar	  _local_scalar_denser	  r	  r	  r	  rA
  _grouped_mmrC
  _scaled_grouped_mmrE
  _foreach_normrL
  _softmaxrP
  constant_pad_ndr_
  rc
  _jagged_to_padded_dense_forwardrg
  ro
  _native_multi_head_attentionr~
  r
  special_airy_aispecial_bessel_y0special_bessel_y1special_modified_bessel_i0special_modified_bessel_i1special_modified_bessel_k0special_modified_bessel_k1!special_scaled_modified_bessel_k0!special_scaled_modified_bessel_k1special_chebyshev_polynomial_tspecial_chebyshev_polynomial_uspecial_chebyshev_polynomial_vspecial_chebyshev_polynomial_w&special_shifted_chebyshev_polynomial_t&special_shifted_chebyshev_polynomial_u&special_shifted_chebyshev_polynomial_v&special_shifted_chebyshev_polynomial_wspecial_hermite_polynomial_hspecial_hermite_polynomial_hespecial_laguerre_polynomial_lspecial_legendre_polynomial_pr
  r
  r
  r
  lerp_addcmul_addcdiv_torch._refs.nn.functionaltorch._refs.specialr
  r4   r:   r8   <module>r     se    . . . . . . . .                   ' ' ' ' ' '  # # # # # # + + + + + + + + + +            " ! ! ! ! ! U U U U U U U U                                           < ; ; ; ; ; ; ; 7 7 7 7 7 7 8 8 8 8 8 8 8 8 ) ) ) ) ) ) WT]]Yt__y~*/-*?*?PV*W*W ' %a )X  " " "

8BF#3"4hr2v6F"FG 
 
 
 
3   (* * *   t}-..
 

=7 7 7  /.7t 	!49=122' '  32' !)4+<+@ABB%' % % % %  CB%$ t%&&I I  '&I 	[$+/4;+>P  Xy!!  "!  !)4+<+@ABBI I  CBI. . .b V $s) 4     %t}'89::"< "<  ;:"<J $s)     %t}'89::@
 @
  ;:@
F t}*++"& 3 3 3 3 ,+3 t}$%% *
 
 
 
 &%
 $dl&6788
 *     98&  $,"6788 *     98& 	!49=122%)$tPT      32 t%-..8 8 /.8" t'/00	! 	! 10	!%
 %
 %
P t#+,,   -,
 t$,--   .-
 %t}'89::*Lv *LDI *Lc *LC *L *L *L  ;:*LZ tz!""   #":	( 	( 	( t&''  (' t233
 "&$() ))) ) 4-	)
 4Z) {T!) ) ) 43)X t.//
 %)	 
  {T!	   0/> t122 	
$() ) ))
) ) 	) {T!) ) ) 32)X t#$$ $(";< ;<,;<\;< 4-;< D=	;<
 {T!;< ;< ;< ;< ;< ;< ;< %$;<| t ()) 	I 	I 	I
	I		I 	I L		I
 	I 	I 	I 	I 	I *)	I t!)** 	 	 	
			 	 L		
 	 	 	 	 	 +*	 t ())' ' *) ' t"*++
 "!! W  W  W
 W W d]	 W
 d] W d] W  W  W  W  W  W ,+ WF  $("4566   76 tx|     $("4566   76 tx|    tz!""6 6 #"6 tz~( ( (
 t!)**  +* t!%&&  '& t{"##  $# t#+,, * * * * -,* t'/00, , , 10, t3;<<   =<
 t08996 6 6 :96& t<DEE  FE
 t,011  21

F 
C 
 
 
 
    F  #        N (,
 


 !%
 
 
 
$ V S C    
 
F 
$ 
 
 
 
 
"  	   	   C     !)4+<+HIJJ]N++  s T    ,+ KJ" $,d.A.EFGGB B6 B B B  HGB  !!]N++6    ,+ "! Q QF Q Q Q Q t*++) )F )4 )F ) ) )  ,+) t"##J J JF J4 JF J J J  $#J t}) )6 )$ )6 ) ) )  ) t$%%) )6 )$ )6 ) ) )  &%) t&.// &  T    0/" 	$,d.M.QR  .f .6 .f . . .  .d t!)** &     +* )143L3PQRRT8V$$ 	  
  	
 666!"   %$ SR& %-t/D/HIJJ ' ' ''' '
 ' ' ' '  KJ'T &(:;<<S#s/3   f  fff>T8U     =<6 (0$2J2NOPPT8V$$ 	         	 
 666!"      %$ QP F $,d.B.FGHH 4 4 444 4
 4 4 4 4 4  IH4n t~S#s 	$ $$$ $ 	$
 666!"$ $ $  $P tTz!2    * &(:;<<S# f C ffn8M     =<4 $,d.B.GHIIV[$11'v '%(F"G ' ' ' 21 JI'$ t'((  	" """ " $J	" " " )("J.
.
. 49d3i . . . .".
.
. *. 66>	. . . .(f V      t$%%
   7 7 777 	7
 7 TM7 	7 TM7 4-7 6666)*7 7 7 &%7t ,4d6R6VWXX    	
   
$    YX4 t$%%S#4((( +( +(
+(+( +( 	+(
 +( 66>+( +( +( )( &%+(^ t'((
 
 )(
 tz
 W WW	W W 	W
 W W W W  Wt  >#; #; #;L t$%%= =  &%= t%&&> >  '&>( ( (< t-..\S S  /.S t.//\T T  0/T2E 2E 2Ej t$%%= =  &%= t%&&> >  '&> (0    &.&1'/'2	  \& &  &:<G <G <G~ t$%%= =  &%= t%&&> >  '&> &.&1'/'2	  \(( ((  ((V t"##

 

f 

 

v 

 

 

  $#

 t#$$Pv PV P Pf PQW P P P  %$P $dl&6788/0 ' ' ' '  98': &(:;<<&* I I I I  =<I
 t$%%   &% t~  I I I ! I
 $dl&6788" " "  98" t3;<< * * * =<*. tw3 3U[4/ 3 3 3  3,B B B
 
 
 
* .2D D,DLD IOD #Y_	D
 3i#oD D D IOd*D D D DNQ Q Q t%-.."$,"$L"$ ,
"$ ,%	"$
 $"$ "$ !&"$ "$ "$ "$ /."$J t'((#,#L# ,# I	#
 #Y# 3i# # I# # # # )(#L 	8 T@9>9N9N&&: :6 ]59#:BCC  DC, ]59#5=>>S S ?>S
 x 	:?-:O:O66;
 ;
7 
uy}0	1	1	 	 
2	1	
 :?9N9N&&: :6 ]59#5=>>]59#3;<<]59#3:;;4 4 <; =< ?>4l ]59#5<==]59#5CDD  ED >=< ]59#5=>>]59#5<==  >= ?>> ]59#5<==]59#5CDD$ $ ED >=$L ]59#7?@@]59#<DEE	 	 FE A@	 =BM<Q<QVV= =9 ]59&122 
 
 
 32
8 ]59&?@@@ @ A@@,   t'((#<# %#36#?D{#
\# # # )(# t&'' M M M ('Mb(< (< (<X t'/00E E 10EP t UJ UJ UJ   UJp t'((\K( K(  )(K(\ t(011  21" t(011@ @ 21@ t19::F F ;:F, t122\P P  32P
	
6 	
S 	
 	
 	
 	
 t'((UI+ +  )(+\ t011\H H  21H$ t'((UI' '  )('T t011\( (  21(
 t%,--* * * .-* $dl&6788
 
  98
 #+T-@-DEFF46 

 

 

c 

 

 

  GF

 	&.	0F0JKLL   ML" 
!4#5#<=>>A A ?>AH )1233.H .H 43.Hb #T[_566./q ' ' ' '  76'0 !()**' ' +*' !)4+=+EFGG  !
 
 
 
 HG
2  ()**  !! ! ! ! +*!H ~B B  B* 0122  32& 89::  ;: ()**8 8 +*8  0122< < 32< >?@@< < A@<"3 "3 "3 " " " "@ @ @F 0122D!'$D D D 32D4 +,--; ; .-;( ()**< < +*< t"*++ &  & ,+ &F t#$$G G  %$G* t"*++
 	
`5 `5 `5 ,+`5F t/7885 5 985
 
 
 
 #T[_566=$ = = = =  76= #T^%;<==) ) >=) !			 	 Xy!!   "!	 	 t ())  *) t{"##' ' $#'& tz!""  #" 				! !
 
 
 
 					 * * * *Z      
"DJ$7899  :9
 
 
" &(>?@@  A@ &(>?@@  A@ ty !!& & "!& 
!4:#4566  76 	 $)"2344" " 54" tz!""  #" t~$%% F     &% %&''RV" " "  ('" 0122RV; ; ; 32; &'((KO   )( &(>(FGHH" " " IH" t '((  )(
 t!)**  +* t#$$	 	 %$	 t"##6 6  $#6 t+,,! ! -,! t&''   (', , , ,^  $(,/005 5  105  2344J J  54J    6  ; ; ;h #-Y YYY 	Y 		Y
 	Y 	Y 	Y 	Y 	Y 	Y 	Y Y Y Y Y  !Y" #Y$ %Y& 'Y( )Y* +Y, -Y Y Y Yx;4 ;4 ;4|383838 38 		38
 	38 	38 	38 	38 	38 	38 	38 	38 38 38 38  !38" #38$ %38& '38 38 38 38lI2 I2 I2X t455\( (  65(V t+344 # # # 54#L t)122Q Q 32Qh t+,,UI _ _ _  -,_D t455\\ \  65\~%
V %
6 %
 %
 %
 %
P    t    v 3    $ t,455# # 65#$ t#$$8 8  %$8" t,--\;''! ! (' .-!, 	!"##. . $#. t&'' ) ) ) (')X 	!49=122      32( 
"DJN344      54( t"*++& & ,+& t!)**& & & +*&. . .  C d     / / /
    t{"##' ' ' $#'   6
 
 
 
   
% % % %R4 4 4 4 t'((& & )(&
 t !!  "!
 !	  & & &  & "	     8@ABB
 #< <<	< < 	<
 < < 4<< < < CB<~ 8BCDD # 	  }	
 } }    4<   ED4S#X   & 89:: #( ((	( ( }	(
 ( ( ( ( 4<( ( ( ;:(V EFGG
  $#0 00	0 0 }	0
 0 0 0 4<0 0 0 HG0f tMNN& %2 2 222 
2 	2
 2 $Z2 
2 2 2 2 2 2 2 2 2  !2$ 4<%2 2 2 ON26 9 ( " """ 
" 	"
 
" " " " " " " " " " 4<" " " 
". 8  # 	  	
  } 4<   
: A   $!" !"!"!" 
!" 	!"
 
!" !" !" !" }!" 4<!" !" !" 
!"H ?@AA
  $"&- --	- - }	-
 - - 4-- 4<- - 66>- - - BA-` <=>> )) ))))	)) )) }	))
 )) )) 4<)) )) )) ?>))X = " 4- 4-4-4- 
4- 	4-
 }4- 
4- 4- 4- 4- 4- $Z4- 4- 4<4- 4- 4- 
4-n 9 * !" """ 
" 	"
 
" " " " " " " " " " "  4<!" " " 
"0 %-   #'$(#"&!%!#J JJ	J J }	J
 }J J J J J J 4<J DjJ TzJ }J 4-J  $!J" d
#J J J 
JZ @HIJJ #'$(#"&!%!%& &	&& 
& 	&
 }& }& & & & & & 4<& Dj& Tz& }&  4-!&" $#&$ d
%& & & KJ&R -7899 #'$(#"&%' ''	' ' }	'
 }' ' ' ' ' ' }' }' }' 4<' Dj'  Tz!'" }#'$ 4-%' ' ' :9'T & ( #'$(#, ,,, 
, 	,
 
, , , , , , , , , , 4<,  Dj!," Tz#, , , 
,4 )    %%)""/S /S/S	/S /S 4-	/S
 4-/S 4-/S */S */S /S /S /S 4</S d]/S tm/S t/S /S /S 
/Sd * * !%"'%47 474747 
47 	47
 4-47 4-47 4-47 ,47 ,47 47 47 47 47 47 47  4<!47" $J#47$  %47 47 47 
47x !%(,$( AY AY
,AY
,AY \AY \	AY
 ,
AY ,%AY {T!AY AY AY AY AYH '()) !%(,$(  
,
, \ \	
 ,
 ,% {T!    *), !%$(*.*. iC iC
,iC
,iC %,iC %	iC
 %,iC %iC ,
iC {T!iC K 4'iC K 4'iC iC iC iC iCX "*+,, !%'+)-  
,
, %, %	
 K  %, % K  ,
 +$ 3i$&    -,: #')<)DEFF& & &  GF&
 t#'((   )(
  ($*:*>?@@	 	 	 	 	  A@	  , , ,* 	$d&D&LM 
 
 
 
 	$d&D&LM    . (0/7  "! #,- u|+, dl	
 dl   : 	$d&D&LM 
 
 
 
 					    *$ $ $N t)122
    32 t&''4/ 4/ ('4/n t&'') ) (')X t$,--$% $% .-$%N

 

 


 
 
 #T[%89::4 4 4 ;:4 t!)**   +* ty !!
Q 
Q 
Q "!
Q t,--LP    .-" %t}';<==Xy!!K K K "! >=K  #("9 Q Q Q t7?@@	* 	* A@	* t#+,,	0 	0 -,	0 t!)**  +*@ t-566F F 76FD %t~'@ABB27u      CB %t~'@ABB
 
 
 



 	

 
 
 
 CB
 
|E E E  E4 $,#+#+     & -5,4     8 t>FGG  HG$ ')<=>>" " "  ?>" uy~())  *): uy~ !!6 6 "!6( t !!
 
	-K -K -K -K  "!-K`   t,--  .- t+,, '
 '
 '
 -,'
T t122    32, t>??    @?. ty8=e 8 8 8 8  8  t~6c 6 6F 6 6 6  6 t'((I& I I I )(I ty"v "& " " "  " t|6& 6V 6 6 6  6' ' '^ (,$( VK VKVKVK \D VK \D 	VK
 4-VK 4-VK ,%VK {T!VK VK VK VK VKr t   $(  4- 4-	
 {T!     ! & '()) !% $(,$(  << \ \	
 ,
 ,
 ,% {T!    *)6 t!())   *)  t}v C         t#$$3 3 3  %$3l t~ $8 888 8 	8
 8 8 8 8  8. t3;<<
 	* **&\* c* 	* * * =<*,   t0899 
-" -" -" :9-"`    d2 3 3 3  d4 5 5 5  d4 5 5 5  d= > > >  d= > > >  d= > > >  d= > > >  dD E E E  dD E E E  tB C C C  tB C C C  tB C C C  tB C C C  tJ K K K  tJ K K K  tJ K K K  tJ K K K  t@ A A A  tA B B B  tA B B B  tA B B B   ty   $ t|./       t|./      , 	ty))!!$,//!!$,//
                EN EN ENP r:   