// // Generated by NVIDIA NVVM Compiler // // Compiler Build ID: CL-33191640 // Cuda compilation tools, release 12.2, V12.2.140 // Based on NVVM 7.0.1 // .version 8.2 .target sm_52 .address_size 64 // .globl decode .extern .shared .align 16 .b8 shared_mem[]; .visible .entry decode( .param .u64 decode_param_0, .param .u64 decode_param_1, .param .u64 decode_param_2, .param .u64 decode_param_3, .param .u64 decode_param_4, .param .u64 decode_param_5, .param .u32 decode_param_6, .param .u32 decode_param_7, .param .u32 decode_param_8 ) { .reg .pred %p<56>; .reg .b16 %rs<112>; .reg .b32 %r<359>; .reg .b64 %rd<226>; ld.param.u64 %rd44, [decode_param_0]; ld.param.u64 %rd48, [decode_param_1]; ld.param.u64 %rd49, [decode_param_2]; ld.param.u64 %rd50, [decode_param_3]; ld.param.u64 %rd45, [decode_param_4]; ld.param.u64 %rd46, [decode_param_5]; ld.param.u32 %r79, [decode_param_6]; ld.param.u32 %r80, [decode_param_7]; ld.param.u32 %r81, [decode_param_8]; cvta.to.global.u64 %rd1, %rd49; cvta.to.global.u64 %rd2, %rd50; cvta.to.global.u64 %rd3, %rd48; mov.u32 %r1, %ntid.x; shl.b32 %r2, %r1, 2; mov.u32 %r3, %ctaid.x; mov.u32 %r358, %tid.x; mad.lo.s32 %r5, %r3, %r1, %r358; shl.b32 %r6, %r5, 3; setp.ge.s32 %p1, %r6, %r80; mov.u64 %rd212, 0; mov.u64 %rd211, %rd212; @%p1 bra $L__BB0_2; cvt.s64.s32 %rd51, %r6; add.s64 %rd52, %rd3, %rd51; ld.global.nc.u8 %rs29, [%rd52]; cvt.u64.u16 %rd53, %rs29; shl.b64 %rd211, %rd53, 56; $L__BB0_2: add.s32 %r82, %r6, 1; setp.ge.s32 %p2, %r82, %r80; cvt.s64.s32 %rd55, %r6; add.s64 %rd6, %rd3, %rd55; @%p2 bra $L__BB0_4; ld.global.nc.u8 %rs30, [%rd6+1]; cvt.u64.u16 %rd56, %rs30; shl.b64 %rd57, %rd56, 48; and.b64 %rd212, %rd57, 71776119061217280; $L__BB0_4: add.s32 %r83, %r6, 2; setp.ge.s32 %p3, %r83, %r80; mov.u64 %rd214, 0; mov.u64 %rd213, %rd214; @%p3 bra $L__BB0_6; ld.global.nc.u8 %rs31, [%rd6+2]; cvt.u64.u16 %rd59, %rs31; shl.b64 %rd60, %rd59, 40; and.b64 %rd213, %rd60, 280375465082880; $L__BB0_6: add.s32 %r84, %r6, 3; setp.ge.s32 %p4, %r84, %r80; @%p4 bra $L__BB0_8; ld.global.nc.u8 %rs32, [%rd6+3]; cvt.u64.u16 %rd62, %rs32; shl.b64 %rd63, %rd62, 32; and.b64 %rd214, %rd63, 1095216660480; $L__BB0_8: add.s32 %r85, %r6, 4; setp.ge.s32 %p5, %r85, %r80; mov.u64 %rd216, 0; mov.u64 %rd215, %rd216; @%p5 bra $L__BB0_10; ld.global.nc.u8 %rs33, [%rd6+4]; cvt.u32.u16 %r86, %rs33; and.b32 %r87, %r86, 255; mul.wide.u32 %rd215, %r87, 16777216; $L__BB0_10: add.s32 %r88, %r6, 5; setp.ge.s32 %p6, %r88, %r80; @%p6 bra $L__BB0_12; ld.global.nc.u8 %rs34, [%rd6+5]; cvt.u32.u16 %r89, %rs34; and.b32 %r90, %r89, 255; mul.wide.u32 %rd216, %r90, 65536; $L__BB0_12: add.s32 %r91, %r6, 6; setp.ge.s32 %p7, %r91, %r80; mov.u64 %rd218, 0; mov.u64 %rd217, %rd218; @%p7 bra $L__BB0_14; ld.global.nc.u8 %rs35, [%rd6+6]; cvt.u32.u16 %r92, %rs35; and.b32 %r93, %r92, 255; mul.wide.u32 %rd217, %r93, 256; $L__BB0_14: add.s32 %r94, %r6, 7; setp.ge.s32 %p8, %r94, %r80; @%p8 bra $L__BB0_16; ld.global.nc.u8 %rs36, [%rd6+7]; cvt.u64.u16 %rd68, %rs36; and.b64 %rd218, %rd68, 255; $L__BB0_16: add.s32 %r96, %r6, 8; setp.ge.s32 %p9, %r96, %r80; mov.u32 %r340, 0; mov.u32 %r339, %r340; @%p9 bra $L__BB0_18; ld.global.nc.u8 %rs37, [%rd6+8]; cvt.u32.u16 %r97, %rs37; shl.b32 %r339, %r97, 24; $L__BB0_18: add.s32 %r99, %r6, 9; setp.ge.s32 %p10, %r99, %r80; @%p10 bra $L__BB0_20; ld.global.nc.u8 %rs38, [%rd6+9]; cvt.u32.u16 %r100, %rs38; shl.b32 %r101, %r100, 16; and.b32 %r340, %r101, 16711680; $L__BB0_20: add.s32 %r103, %r6, 10; setp.ge.s32 %p11, %r103, %r80; mov.u32 %r342, 0; mov.u32 %r341, %r342; @%p11 bra $L__BB0_22; ld.global.nc.u8 %rs39, [%rd6+10]; mul.wide.u16 %r341, %rs39, 256; $L__BB0_22: add.s32 %r105, %r6, 11; setp.ge.s32 %p12, %r105, %r80; @%p12 bra $L__BB0_24; ld.global.nc.u8 %rs41, [%rd6+11]; cvt.u32.u16 %r106, %rs41; and.b32 %r342, %r106, 255; $L__BB0_24: mov.u32 %r108, shared_mem; add.s32 %r17, %r108, %r2; add.s32 %r15, %r17, 4; bar.sync 0; mul.lo.s32 %r109, %r5, 5; shr.s32 %r110, %r109, 31; shr.u32 %r111, %r110, 29; add.s32 %r112, %r109, %r111; shr.s32 %r113, %r112, 3; cvt.s64.s32 %rd69, %r113; cvta.to.global.u64 %rd70, %rd45; add.s64 %rd71, %rd70, %rd69; ld.global.nc.u8 %rs42, [%rd71+1]; cvt.u32.u16 %r114, %rs42; and.b32 %r115, %r114, 255; ld.global.nc.u8 %rs43, [%rd71]; mov.u32 %r347, 0; cvt.u32.u16 %r116, %rs43; prmt.b32 %r117, %r116, %r115, 30212; and.b32 %r118, %r112, -8; sub.s32 %r119, %r118, %r109; add.s32 %r120, %r119, 11; shr.u32 %r121, %r117, %r120; cvt.u64.u32 %rd72, %r121; cvt.u16.u32 %rs44, %r121; and.b16 %rs107, %rs44, 31; and.b64 %rd73, %rd214, 1095216660480; or.b64 %rd74, %rd73, %rd213; or.b64 %rd75, %rd74, %rd215; or.b64 %rd76, %rd75, %rd216; and.b64 %rd77, %rd76, 281474976645120; or.b64 %rd78, %rd77, %rd217; or.b64 %rd79, %rd78, %rd218; or.b64 %rd21, %rd212, %rd211; or.b64 %rd80, %rd21, %rd79; and.b64 %rd222, %rd72, 31; and.b32 %r122, %r121, 31; shl.b64 %rd219, %rd80, %r122; shl.b32 %r123, %r79, 8; add.s32 %r16, %r123, -256; cvta.to.global.u64 %rd24, %rd46; mov.u16 %rs105, %rs107; $L__BB0_25: shr.u64 %rd82, %rd219, 56; add.s64 %rd81, %rd44, %rd82; // begin inline asm ld.global.nc.u8 %r344, [%rd81]; // end inline asm cvt.u16.u32 %rs45, %r344; and.b16 %rs46, %rs45, 255; setp.lt.u16 %p13, %rs46, 240; @%p13 bra $L__BB0_29; shl.b32 %r126, %r344, 8; and.b32 %r127, %r126, 65280; mov.u32 %r128, 65536; sub.s32 %r129, %r128, %r127; cvt.u64.u32 %rd84, %r129; bfe.u64 %rd85, %rd219, 48, 8; or.b64 %rd86, %rd85, %rd84; add.s64 %rd83, %rd44, %rd86; // begin inline asm ld.global.nc.u8 %r344, [%rd83]; // end inline asm cvt.u16.u32 %rs47, %r344; and.b16 %rs48, %rs47, 255; setp.lt.u16 %p14, %rs48, 240; @%p14 bra $L__BB0_29; shl.b32 %r131, %r344, 8; and.b32 %r132, %r131, 65280; sub.s32 %r134, %r128, %r132; cvt.u64.u32 %rd88, %r134; bfe.u64 %rd89, %rd219, 40, 8; or.b64 %rd90, %rd89, %rd88; add.s64 %rd87, %rd44, %rd90; // begin inline asm ld.global.nc.u8 %r344, [%rd87]; // end inline asm cvt.u16.u32 %rs49, %r344; and.b16 %rs50, %rs49, 255; setp.lt.u16 %p15, %rs50, 240; @%p15 bra $L__BB0_29; shl.b32 %r136, %r344, 8; and.b32 %r137, %r136, 65280; mov.u32 %r138, 65536; sub.s32 %r139, %r138, %r137; cvt.u64.u32 %rd92, %r139; shr.u64 %rd93, %rd219, 32; and.b64 %rd94, %rd93, 255; or.b64 %rd95, %rd94, %rd92; add.s64 %rd91, %rd44, %rd95; // begin inline asm ld.global.nc.u8 %r344, [%rd91]; // end inline asm $L__BB0_29: add.s32 %r347, %r347, 1; and.b32 %r141, %r344, 255; add.s32 %r142, %r16, %r141; cvt.s64.s32 %rd97, %r142; add.s64 %rd96, %rd44, %rd97; // begin inline asm ld.global.nc.u8 %r140, [%rd96]; // end inline asm cvt.u16.u32 %rs51, %r140; and.b32 %r143, %r140, 255; shl.b64 %rd219, %rd219, %r143; add.s16 %rs105, %rs105, %rs51; and.b16 %rs52, %rs105, 255; setp.lt.u16 %p16, %rs52, 32; @%p16 bra $L__BB0_25; or.b32 %r144, %r340, %r339; or.b32 %r25, %r144, %r341; or.b32 %r357, %r25, %r342; cvt.u64.u32 %rd27, %r357; cvt.u64.u16 %rd98, %rs105; and.b64 %rd99, %rd98, 255; add.s64 %rd100, %rd99, 4294967264; cvt.u32.u64 %r145, %rd100; shl.b64 %rd101, %rd27, %r145; or.b64 %rd220, %rd101, %rd219; add.s16 %rs106, %rs105, -32; and.b16 %rs53, %rs106, 248; shr.u16 %rs54, %rs53, 3; add.s16 %rs55, %rs54, 4; setp.gt.u16 %p17, %rs55, 7; @%p17 bra $L__BB0_36; $L__BB0_31: shr.u64 %rd103, %rd220, 56; add.s64 %rd102, %rd44, %rd103; // begin inline asm ld.global.nc.u8 %r346, [%rd102]; // end inline asm cvt.u16.u32 %rs56, %r346; and.b16 %rs57, %rs56, 255; setp.lt.u16 %p18, %rs57, 240; @%p18 bra $L__BB0_35; shl.b32 %r148, %r346, 8; and.b32 %r149, %r148, 65280; mov.u32 %r150, 65536; sub.s32 %r151, %r150, %r149; cvt.u64.u32 %rd105, %r151; bfe.u64 %rd106, %rd220, 48, 8; or.b64 %rd107, %rd106, %rd105; add.s64 %rd104, %rd44, %rd107; // begin inline asm ld.global.nc.u8 %r346, [%rd104]; // end inline asm cvt.u16.u32 %rs58, %r346; and.b16 %rs59, %rs58, 255; setp.lt.u16 %p19, %rs59, 240; @%p19 bra $L__BB0_35; shl.b32 %r153, %r346, 8; and.b32 %r154, %r153, 65280; sub.s32 %r156, %r150, %r154; cvt.u64.u32 %rd109, %r156; bfe.u64 %rd110, %rd220, 40, 8; or.b64 %rd111, %rd110, %rd109; add.s64 %rd108, %rd44, %rd111; // begin inline asm ld.global.nc.u8 %r346, [%rd108]; // end inline asm cvt.u16.u32 %rs60, %r346; and.b16 %rs61, %rs60, 255; setp.lt.u16 %p20, %rs61, 240; @%p20 bra $L__BB0_35; shl.b32 %r158, %r346, 8; and.b32 %r159, %r158, 65280; mov.u32 %r160, 65536; sub.s32 %r161, %r160, %r159; cvt.u64.u32 %rd113, %r161; shr.u64 %rd114, %rd220, 32; and.b64 %rd115, %rd114, 255; or.b64 %rd116, %rd115, %rd113; add.s64 %rd112, %rd44, %rd116; // begin inline asm ld.global.nc.u8 %r346, [%rd112]; // end inline asm $L__BB0_35: add.s32 %r347, %r347, 1; and.b32 %r163, %r346, 255; add.s32 %r164, %r16, %r163; cvt.s64.s32 %rd118, %r164; add.s64 %rd117, %rd44, %rd118; // begin inline asm ld.global.nc.u8 %r162, [%rd117]; // end inline asm cvt.u16.u32 %rs62, %r162; and.b32 %r165, %r162, 255; shl.b64 %rd220, %rd220, %r165; add.s16 %rs106, %rs106, %rs62; and.b16 %rs63, %rs106, 248; shr.u16 %rs64, %rs63, 3; add.s16 %rs65, %rs64, 4; setp.lt.u16 %p21, %rs65, 8; @%p21 bra $L__BB0_31; $L__BB0_36: shl.b32 %r166, %r358, 2; add.s32 %r35, %r108, %r166; mul.wide.u32 %rd119, %r3, 4; add.s64 %rd31, %rd2, %rd119; setp.eq.s32 %p22, %r358, 0; @%p22 bra $L__BB0_38; st.volatile.shared.u32 [%r35], %r347; bra.uni $L__BB0_39; $L__BB0_38: ld.global.nc.u32 %r168, [%rd31]; add.s32 %r169, %r168, %r347; st.volatile.shared.u32 [shared_mem], %r169; $L__BB0_39: bar.sync 0; setp.lt.u32 %p23, %r1, 2; @%p23 bra $L__BB0_44; add.s32 %r36, %r358, 1; mov.u32 %r348, 2; $L__BB0_41: add.s32 %r171, %r348, -1; and.b32 %r172, %r171, %r36; setp.ne.s32 %p24, %r172, 0; @%p24 bra $L__BB0_43; shr.u32 %r173, %r348, 1; sub.s32 %r174, %r358, %r173; shl.b32 %r175, %r174, 2; add.s32 %r177, %r108, %r175; ld.volatile.shared.u32 %r178, [%r35]; ld.volatile.shared.u32 %r179, [%r177]; add.s32 %r180, %r178, %r179; st.volatile.shared.u32 [%r35], %r180; $L__BB0_43: bar.sync 0; shl.b32 %r348, %r348, 1; setp.le.u32 %p25, %r348, %r1; @%p25 bra $L__BB0_41; $L__BB0_44: setp.ne.s32 %p26, %r358, 0; @%p26 bra $L__BB0_46; mov.u32 %r183, 0; st.volatile.shared.u32 [%r17+-4], %r183; $L__BB0_46: bar.sync 0; setp.lt.s32 %p27, %r1, 2; @%p27 bra $L__BB0_51; add.s32 %r40, %r358, 1; mov.u32 %r349, %r1; $L__BB0_48: add.s32 %r184, %r349, -1; and.b32 %r185, %r184, %r40; setp.eq.s32 %p28, %r185, 0; @%p28 bra $L__BB0_49; bra.uni $L__BB0_50; $L__BB0_49: shr.u32 %r186, %r349, 1; sub.s32 %r187, %r358, %r186; shl.b32 %r188, %r187, 2; add.s32 %r190, %r108, %r188; ld.volatile.shared.u32 %r191, [%r35]; ld.volatile.shared.u32 %r192, [%r190]; add.s32 %r193, %r191, %r192; st.volatile.shared.u32 [%r35], %r193; ld.volatile.shared.u32 %r194, [%r190]; ld.volatile.shared.u32 %r195, [%r35]; sub.s32 %r196, %r195, %r194; st.volatile.shared.u32 [%r190], %r196; $L__BB0_50: shr.u32 %r42, %r349, 1; bar.sync 0; setp.gt.u32 %p29, %r349, 3; mov.u32 %r349, %r42; @%p29 bra $L__BB0_48; $L__BB0_51: @%p26 bra $L__BB0_53; ld.global.nc.u32 %r197, [%rd31]; st.volatile.shared.u32 [shared_mem], %r197; add.s32 %r198, %r3, 1; mul.wide.u32 %rd120, %r198, 4; add.s64 %rd121, %rd2, %rd120; ld.global.nc.u32 %r199, [%rd121]; st.volatile.shared.u32 [%r17], %r199; $L__BB0_53: bar.sync 0; ld.volatile.shared.u32 %r43, [shared_mem]; ld.volatile.shared.u32 %r350, [%r35]; add.s32 %r200, %r350, %r347; min.u32 %r45, %r200, %r81; or.b64 %rd122, %rd215, %rd214; or.b64 %rd123, %rd122, %rd216; or.b64 %rd124, %rd123, %rd217; and.b64 %rd125, %rd124, 1099511627520; or.b64 %rd126, %rd125, %rd218; and.b64 %rd127, %rd213, 280375465082880; or.b64 %rd128, %rd21, %rd127; or.b64 %rd129, %rd128, %rd126; cvt.u32.u64 %r201, %rd222; shl.b64 %rd221, %rd129, %r201; setp.ge.u32 %p31, %r350, %r45; @%p31 bra $L__BB0_61; mov.u32 %r351, %r357; $L__BB0_55: shr.u64 %rd131, %rd221, 56; add.s64 %rd130, %rd44, %rd131; // begin inline asm ld.global.nc.u8 %r202, [%rd130]; // end inline asm cvt.u16.u32 %rs108, %r202; and.b16 %rs66, %rs108, 255; setp.lt.u16 %p32, %rs66, 240; @%p32 bra $L__BB0_59; shl.b32 %r204, %r202, 8; and.b32 %r205, %r204, 65280; mov.u32 %r206, 65536; sub.s32 %r207, %r206, %r205; cvt.u64.u32 %rd133, %r207; bfe.u64 %rd134, %rd221, 48, 8; or.b64 %rd135, %rd134, %rd133; add.s64 %rd132, %rd44, %rd135; // begin inline asm ld.global.nc.u8 %r203, [%rd132]; // end inline asm cvt.u16.u32 %rs108, %r203; and.b16 %rs67, %rs108, 255; setp.lt.u16 %p33, %rs67, 240; @%p33 bra $L__BB0_59; shl.b32 %r209, %r203, 8; and.b32 %r210, %r209, 65280; sub.s32 %r212, %r206, %r210; cvt.u64.u32 %rd137, %r212; bfe.u64 %rd138, %rd221, 40, 8; or.b64 %rd139, %rd138, %rd137; add.s64 %rd136, %rd44, %rd139; // begin inline asm ld.global.nc.u8 %r208, [%rd136]; // end inline asm cvt.u16.u32 %rs108, %r208; and.b16 %rs68, %rs108, 255; setp.lt.u16 %p34, %rs68, 240; @%p34 bra $L__BB0_59; shl.b32 %r214, %r208, 8; and.b32 %r215, %r214, 65280; mov.u32 %r216, 65536; sub.s32 %r217, %r216, %r215; cvt.u64.u32 %rd141, %r217; shr.u64 %rd142, %rd221, 32; and.b64 %rd143, %rd142, 255; or.b64 %rd144, %rd143, %rd141; add.s64 %rd140, %rd44, %rd144; // begin inline asm ld.global.nc.u8 %r213, [%rd140]; // end inline asm cvt.u16.u32 %rs108, %r213; $L__BB0_59: cvt.u64.u32 %rd146, %r350; add.s64 %rd147, %rd1, %rd146; ld.global.nc.u8 %rs69, [%rd147]; and.b16 %rs70, %rs69, 128; and.b16 %rs71, %rs108, 254; shr.u16 %rs72, %rs71, 1; or.b16 %rs73, %rs70, %rs72; mul.wide.u16 %r219, %rs73, 256; and.b16 %rs74, %rs69, 127; cvt.u32.u16 %r220, %rs74; cvt.u32.u16 %r221, %rs108; bfi.b32 %r222, %r221, %r220, 7, 9; and.b32 %r223, %r222, 255; and.b32 %r224, %r351, -65536; or.b32 %r225, %r224, %r223; or.b32 %r351, %r225, %r219; sub.s32 %r226, %r350, %r43; shl.b32 %r227, %r226, 1; add.s32 %r228, %r15, %r227; st.volatile.shared.u16 [%r228], %r351; and.b32 %r229, %r221, 255; add.s32 %r230, %r16, %r229; cvt.s64.s32 %rd148, %r230; add.s64 %rd145, %rd44, %rd148; // begin inline asm ld.global.nc.u8 %r218, [%rd145]; // end inline asm cvt.u16.u32 %rs75, %r218; and.b32 %r231, %r218, 255; shl.b64 %rd221, %rd221, %r231; add.s16 %rs107, %rs107, %rs75; and.b16 %rs76, %rs107, 255; setp.lt.u16 %p35, %rs76, 32; add.s32 %r350, %r350, 1; setp.lt.u32 %p36, %r350, %r45; and.pred %p37, %p35, %p36; @%p37 bra $L__BB0_55; cvt.u64.u16 %rd149, %rs107; and.b64 %rd222, %rd149, 255; $L__BB0_61: add.s64 %rd150, %rd222, 4294967264; cvt.u32.u64 %r232, %rd150; shl.b64 %rd151, %rd27, %r232; or.b64 %rd225, %rd151, %rd221; setp.le.u32 %p38, %r45, %r350; @%p38 bra $L__BB0_79; sub.s32 %r233, %r45, %r350; and.b32 %r234, %r233, 1; setp.eq.b32 %p39, %r234, 1; mov.pred %p40, 0; xor.pred %p41, %p39, %p40; not.pred %p42, %p41; mov.u32 %r356, %r350; @%p42 bra $L__BB0_68; shr.u64 %rd153, %rd225, 56; add.s64 %rd152, %rd44, %rd153; // begin inline asm ld.global.nc.u8 %r235, [%rd152]; // end inline asm cvt.u16.u32 %rs109, %r235; and.b16 %rs77, %rs109, 255; setp.lt.u16 %p43, %rs77, 240; @%p43 bra $L__BB0_67; shl.b32 %r237, %r235, 8; and.b32 %r238, %r237, 65280; mov.u32 %r239, 65536; sub.s32 %r240, %r239, %r238; cvt.u64.u32 %rd155, %r240; bfe.u64 %rd156, %rd225, 48, 8; or.b64 %rd157, %rd156, %rd155; add.s64 %rd154, %rd44, %rd157; // begin inline asm ld.global.nc.u8 %r236, [%rd154]; // end inline asm cvt.u16.u32 %rs109, %r236; and.b16 %rs78, %rs109, 255; setp.lt.u16 %p44, %rs78, 240; @%p44 bra $L__BB0_67; shl.b32 %r242, %r236, 8; and.b32 %r243, %r242, 65280; sub.s32 %r245, %r239, %r243; cvt.u64.u32 %rd159, %r245; bfe.u64 %rd160, %rd225, 40, 8; or.b64 %rd161, %rd160, %rd159; add.s64 %rd158, %rd44, %rd161; // begin inline asm ld.global.nc.u8 %r241, [%rd158]; // end inline asm cvt.u16.u32 %rs109, %r241; and.b16 %rs79, %rs109, 255; setp.lt.u16 %p45, %rs79, 240; @%p45 bra $L__BB0_67; shl.b32 %r247, %r241, 8; and.b32 %r248, %r247, 65280; mov.u32 %r249, 65536; sub.s32 %r250, %r249, %r248; cvt.u64.u32 %rd163, %r250; shr.u64 %rd164, %rd225, 32; and.b64 %rd165, %rd164, 255; or.b64 %rd166, %rd165, %rd163; add.s64 %rd162, %rd44, %rd166; // begin inline asm ld.global.nc.u8 %r246, [%rd162]; // end inline asm cvt.u16.u32 %rs109, %r246; $L__BB0_67: cvt.u64.u32 %rd168, %r350; add.s64 %rd169, %rd1, %rd168; ld.global.nc.u8 %rs80, [%rd169]; and.b16 %rs81, %rs80, 128; and.b16 %rs82, %rs109, 254; shr.u16 %rs83, %rs82, 1; or.b16 %rs84, %rs81, %rs83; mul.wide.u16 %r252, %rs84, 256; and.b16 %rs85, %rs80, 127; cvt.u32.u16 %r253, %rs85; cvt.u32.u16 %r254, %rs109; bfi.b32 %r255, %r254, %r253, 7, 9; and.b32 %r256, %r255, 255; and.b32 %r257, %r25, -65536; or.b32 %r258, %r257, %r256; or.b32 %r357, %r258, %r252; sub.s32 %r259, %r350, %r43; shl.b32 %r260, %r259, 1; add.s32 %r261, %r15, %r260; st.volatile.shared.u16 [%r261], %r357; add.s32 %r356, %r350, 1; and.b32 %r262, %r254, 255; add.s32 %r263, %r16, %r262; cvt.s64.s32 %rd170, %r263; add.s64 %rd167, %rd44, %rd170; // begin inline asm ld.global.nc.u8 %r251, [%rd167]; // end inline asm and.b32 %r264, %r251, 255; shl.b64 %rd225, %rd225, %r264; $L__BB0_68: mov.u32 %r265, -2; sub.s32 %r266, %r265, %r350; not.b32 %r267, %r45; setp.eq.s32 %p46, %r266, %r267; @%p46 bra $L__BB0_79; shl.b32 %r269, %r356, 1; add.s32 %r270, %r2, %r269; shl.b32 %r271, %r43, 1; sub.s32 %r272, %r270, %r271; add.s32 %r274, %r108, %r272; add.s32 %r61, %r274, 1; mov.u32 %r355, 0; $L__BB0_70: shr.u64 %rd172, %rd225, 56; add.s64 %rd171, %rd44, %rd172; // begin inline asm ld.global.nc.u8 %r275, [%rd171]; // end inline asm cvt.u16.u32 %rs110, %r275; and.b16 %rs86, %rs110, 255; setp.lt.u16 %p47, %rs86, 240; @%p47 bra $L__BB0_74; shl.b32 %r277, %r275, 8; and.b32 %r278, %r277, 65280; mov.u32 %r279, 65536; sub.s32 %r280, %r279, %r278; cvt.u64.u32 %rd174, %r280; bfe.u64 %rd175, %rd225, 48, 8; or.b64 %rd176, %rd175, %rd174; add.s64 %rd173, %rd44, %rd176; // begin inline asm ld.global.nc.u8 %r276, [%rd173]; // end inline asm cvt.u16.u32 %rs110, %r276; and.b16 %rs87, %rs110, 255; setp.lt.u16 %p48, %rs87, 240; @%p48 bra $L__BB0_74; shl.b32 %r282, %r276, 8; and.b32 %r283, %r282, 65280; sub.s32 %r285, %r279, %r283; cvt.u64.u32 %rd178, %r285; bfe.u64 %rd179, %rd225, 40, 8; or.b64 %rd180, %rd179, %rd178; add.s64 %rd177, %rd44, %rd180; // begin inline asm ld.global.nc.u8 %r281, [%rd177]; // end inline asm cvt.u16.u32 %rs110, %r281; and.b16 %rs88, %rs110, 255; setp.lt.u16 %p49, %rs88, 240; @%p49 bra $L__BB0_74; shl.b32 %r287, %r281, 8; and.b32 %r288, %r287, 65280; mov.u32 %r289, 65536; sub.s32 %r290, %r289, %r288; cvt.u64.u32 %rd182, %r290; shr.u64 %rd183, %rd225, 32; and.b64 %rd184, %rd183, 255; or.b64 %rd185, %rd184, %rd182; add.s64 %rd181, %rd44, %rd185; // begin inline asm ld.global.nc.u8 %r286, [%rd181]; // end inline asm cvt.u16.u32 %rs110, %r286; $L__BB0_74: cvt.u64.u32 %rd188, %r356; add.s64 %rd189, %rd1, %rd188; ld.global.nc.u8 %rs89, [%rd189]; and.b16 %rs90, %rs89, -128; and.b16 %rs91, %rs110, 254; shr.u16 %rs92, %rs91, 1; or.b16 %rs93, %rs90, %rs92; cvt.u32.u16 %r293, %rs93; and.b16 %rs94, %rs89, 127; cvt.u32.u16 %r294, %rs94; and.b32 %r68, %r357, -65536; cvt.u32.u16 %r295, %rs110; bfi.b32 %r296, %r295, %r294, 7, 9; and.b32 %r297, %r296, 255; prmt.b32 %r298, %r293, %r297, 8452; add.s32 %r299, %r61, %r355; st.volatile.shared.u16 [%r299+3], %r298; and.b32 %r300, %r295, 255; add.s32 %r301, %r16, %r300; cvt.s64.s32 %rd190, %r301; add.s64 %rd186, %rd44, %rd190; // begin inline asm ld.global.nc.u8 %r291, [%rd186]; // end inline asm and.b32 %r302, %r291, 255; shl.b64 %rd42, %rd225, %r302; shr.u64 %rd191, %rd42, 56; add.s64 %rd187, %rd44, %rd191; // begin inline asm ld.global.nc.u8 %r292, [%rd187]; // end inline asm cvt.u16.u32 %rs111, %r292; and.b16 %rs95, %rs111, 255; setp.lt.u16 %p50, %rs95, 240; @%p50 bra $L__BB0_78; shl.b32 %r304, %r292, 8; and.b32 %r305, %r304, 65280; mov.u32 %r306, 65536; sub.s32 %r307, %r306, %r305; cvt.u64.u32 %rd193, %r307; bfe.u64 %rd194, %rd42, 48, 8; or.b64 %rd195, %rd194, %rd193; add.s64 %rd192, %rd44, %rd195; // begin inline asm ld.global.nc.u8 %r303, [%rd192]; // end inline asm cvt.u16.u32 %rs111, %r303; and.b16 %rs96, %rs111, 255; setp.lt.u16 %p51, %rs96, 240; @%p51 bra $L__BB0_78; shl.b32 %r309, %r303, 8; and.b32 %r310, %r309, 65280; sub.s32 %r312, %r306, %r310; cvt.u64.u32 %rd197, %r312; bfe.u64 %rd198, %rd42, 40, 8; or.b64 %rd199, %rd198, %rd197; add.s64 %rd196, %rd44, %rd199; // begin inline asm ld.global.nc.u8 %r308, [%rd196]; // end inline asm cvt.u16.u32 %rs111, %r308; and.b16 %rs97, %rs111, 255; setp.lt.u16 %p52, %rs97, 240; @%p52 bra $L__BB0_78; shl.b32 %r314, %r308, 8; and.b32 %r315, %r314, 65280; mov.u32 %r316, 65536; sub.s32 %r317, %r316, %r315; cvt.u64.u32 %rd201, %r317; shr.u64 %rd202, %rd42, 32; and.b64 %rd203, %rd202, 255; or.b64 %rd204, %rd203, %rd201; add.s64 %rd200, %rd44, %rd204; // begin inline asm ld.global.nc.u8 %r313, [%rd200]; // end inline asm cvt.u16.u32 %rs111, %r313; $L__BB0_78: add.s32 %r319, %r356, 1; cvt.u64.u32 %rd206, %r319; add.s64 %rd207, %rd1, %rd206; ld.global.nc.u8 %rs98, [%rd207]; and.b16 %rs99, %rs98, 128; and.b16 %rs100, %rs111, 254; shr.u16 %rs101, %rs100, 1; or.b16 %rs102, %rs99, %rs101; mul.wide.u16 %r320, %rs102, 256; and.b16 %rs103, %rs98, 127; cvt.u32.u16 %r321, %rs103; cvt.u32.u16 %r322, %rs111; bfi.b32 %r323, %r322, %r321, 7, 9; and.b32 %r324, %r323, 255; or.b32 %r325, %r68, %r324; or.b32 %r357, %r325, %r320; add.s32 %r338, %r299, 3; st.volatile.shared.u16 [%r338+2], %r357; and.b32 %r326, %r322, 255; add.s32 %r327, %r16, %r326; cvt.s64.s32 %rd208, %r327; add.s64 %rd205, %rd44, %rd208; // begin inline asm ld.global.nc.u8 %r318, [%rd205]; // end inline asm and.b32 %r328, %r318, 255; shl.b64 %rd225, %rd42, %r328; add.s32 %r355, %r355, 4; add.s32 %r356, %r356, 2; setp.lt.u32 %p53, %r356, %r45; @%p53 bra $L__BB0_70; $L__BB0_79: bar.sync 0; ld.volatile.shared.u32 %r329, [%r17]; sub.s32 %r330, %r329, %r43; sub.s32 %r76, %r81, %r43; min.u32 %r331, %r330, %r76; setp.ge.u32 %p54, %r358, %r331; @%p54 bra $L__BB0_81; $L__BB0_80: shl.b32 %r332, %r358, 1; add.s32 %r333, %r17, %r332; ld.volatile.shared.u16 %rs104, [%r333+4]; add.s32 %r334, %r358, %r43; mul.wide.u32 %rd209, %r334, 2; add.s64 %rd210, %rd24, %rd209; st.global.u16 [%rd210], %rs104; ld.volatile.shared.u32 %r335, [%r17]; sub.s32 %r336, %r335, %r43; min.u32 %r337, %r336, %r76; add.s32 %r358, %r358, %r1; setp.lt.u32 %p55, %r358, %r337; @%p55 bra $L__BB0_80; $L__BB0_81: ret; }