; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX906 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX908 %s ; Due to high register pressure, regalloc would split the liverange of wwm VGPR register used for SGPR spills ; and introduce a copy. The copy should be of whole-wave with exec mask manipulation around it. ; FIXME: The destination register involved in the whole-wave copy should be considered for preserving all the lanes ; with a spill/restore at function prolog/epilog. The copy might otherwise clobber its inactive lanes unwantedly. define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 { ; GFX906-LABEL: preserve_wwm_copy_dstreg: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX906-NEXT: s_mov_b32 s16, s33 ; GFX906-NEXT: s_mov_b32 s33, s32 ; GFX906-NEXT: s_xor_saveexec_b64 s[18:19], -1 ; GFX906-NEXT: buffer_store_dword v33, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill ; GFX906-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill ; GFX906-NEXT: s_mov_b64 exec, -1 ; GFX906-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX906-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill ; GFX906-NEXT: s_mov_b64 exec, s[18:19] ; GFX906-NEXT: ; implicit-def: $vgpr2 ; GFX906-NEXT: s_mov_b32 s21, s15 ; GFX906-NEXT: v_writelane_b32 v2, s6, 0 ; GFX906-NEXT: v_writelane_b32 v2, s7, 1 ; GFX906-NEXT: v_writelane_b32 v2, s21, 2 ; GFX906-NEXT: s_mov_b32 s22, s14 ; GFX906-NEXT: v_writelane_b32 v2, s22, 3 ; GFX906-NEXT: s_mov_b32 s23, s13 ; GFX906-NEXT: v_writelane_b32 v2, s23, 4 ; GFX906-NEXT: s_mov_b32 s24, s12 ; GFX906-NEXT: v_writelane_b32 v2, s24, 5 ; GFX906-NEXT: s_mov_b64 s[26:27], s[10:11] ; GFX906-NEXT: v_writelane_b32 v2, s26, 6 ; GFX906-NEXT: v_writelane_b32 v2, s27, 7 ; GFX906-NEXT: v_writelane_b32 v2, s8, 8 ; GFX906-NEXT: v_writelane_b32 v41, s16, 2 ; GFX906-NEXT: v_writelane_b32 v2, s9, 9 ; GFX906-NEXT: v_writelane_b32 v41, s30, 0 ; GFX906-NEXT: v_writelane_b32 v2, s4, 10 ; GFX906-NEXT: s_addk_i32 s32, 0x2800 ; GFX906-NEXT: v_writelane_b32 v41, s31, 1 ; GFX906-NEXT: v_mov_b32_e32 v32, v31 ; GFX906-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX906-NEXT: s_waitcnt vmcnt(0) ; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; GFX906-NEXT: v_writelane_b32 v2, s5, 11 ; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX906-NEXT: v_mov_b32_e32 v33, v2 ; GFX906-NEXT: s_mov_b64 exec, s[34:35] ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def v[0:31] ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill ; GFX906-NEXT: s_waitcnt vmcnt(0) ; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill ; GFX906-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill ; GFX906-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill ; GFX906-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill ; GFX906-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill ; GFX906-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill ; GFX906-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill ; GFX906-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill ; GFX906-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill ; GFX906-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill ; GFX906-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill ; GFX906-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill ; GFX906-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill ; GFX906-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill ; GFX906-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill ; GFX906-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill ; GFX906-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill ; GFX906-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill ; GFX906-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill ; GFX906-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill ; GFX906-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill ; GFX906-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill ; GFX906-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill ; GFX906-NEXT: buffer_store_dword v24, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill ; GFX906-NEXT: buffer_store_dword v25, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill ; GFX906-NEXT: buffer_store_dword v26, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill ; GFX906-NEXT: buffer_store_dword v27, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill ; GFX906-NEXT: buffer_store_dword v28, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill ; GFX906-NEXT: buffer_store_dword v29, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill ; GFX906-NEXT: buffer_store_dword v30, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill ; GFX906-NEXT: buffer_store_dword v31, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def v40 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def s11 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX906-NEXT: v_mov_b32_e32 v40, v33 ; GFX906-NEXT: s_mov_b64 exec, s[34:35] ; GFX906-NEXT: v_writelane_b32 v40, s11, 12 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def s12 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: v_writelane_b32 v40, s12, 13 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def s13 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: v_writelane_b32 v40, s13, 14 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def s14 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: v_writelane_b32 v40, s14, 15 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def s15 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: v_writelane_b32 v40, s15, 16 ; GFX906-NEXT: s_getpc_b64 s[10:11] ; GFX906-NEXT: s_add_u32 s10, s10, foo@gotpcrel32@lo+4 ; GFX906-NEXT: s_addc_u32 s11, s11, foo@gotpcrel32@hi+12 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def s16 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: v_writelane_b32 v40, s16, 17 ; GFX906-NEXT: s_load_dwordx2 s[10:11], s[10:11], 0x0 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def s17 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: v_writelane_b32 v40, s17, 18 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def s18 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: v_writelane_b32 v40, s18, 19 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def s19 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: v_writelane_b32 v40, s19, 20 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def s20 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: v_writelane_b32 v40, s20, 21 ; GFX906-NEXT: s_waitcnt lgkmcnt(0) ; GFX906-NEXT: v_writelane_b32 v40, s10, 22 ; GFX906-NEXT: v_writelane_b32 v40, s11, 23 ; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX906-NEXT: s_mov_b64 exec, s[34:35] ; GFX906-NEXT: v_readlane_b32 s16, v40, 22 ; GFX906-NEXT: s_mov_b32 s12, s24 ; GFX906-NEXT: s_mov_b32 s13, s23 ; GFX906-NEXT: s_mov_b32 s14, s22 ; GFX906-NEXT: v_mov_b32_e32 v31, v32 ; GFX906-NEXT: s_mov_b32 s15, s21 ; GFX906-NEXT: s_mov_b64 s[10:11], s[26:27] ; GFX906-NEXT: v_readlane_b32 s17, v40, 23 ; GFX906-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX906-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX906-NEXT: s_mov_b64 exec, s[34:35] ; GFX906-NEXT: v_readlane_b32 s11, v40, 12 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; use s11 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: v_readlane_b32 s12, v40, 13 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; use s12 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: v_readlane_b32 s13, v40, 14 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; use s13 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: v_readlane_b32 s14, v40, 15 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; use s14 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: v_readlane_b32 s15, v40, 16 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; use s15 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: v_readlane_b32 s16, v40, 17 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; use s16 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: v_readlane_b32 s17, v40, 18 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; use s17 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: v_readlane_b32 s18, v40, 19 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; use s18 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: v_readlane_b32 s19, v40, 20 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; use s19 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: v_readlane_b32 s20, v40, 21 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; use s20 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def s21 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def s22 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def s23 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def s24 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def s25 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def s26 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def s27 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def s28 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def s29 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GFX906-NEXT: v_writelane_b32 v40, s21, 24 ; GFX906-NEXT: v_writelane_b32 v40, s22, 25 ; GFX906-NEXT: v_writelane_b32 v40, s23, 26 ; GFX906-NEXT: v_writelane_b32 v40, s24, 27 ; GFX906-NEXT: v_writelane_b32 v40, s25, 28 ; GFX906-NEXT: v_writelane_b32 v40, s26, 29 ; GFX906-NEXT: v_writelane_b32 v40, s27, 30 ; GFX906-NEXT: v_writelane_b32 v40, s28, 31 ; GFX906-NEXT: v_writelane_b32 v40, s29, 32 ; GFX906-NEXT: v_readlane_b32 s4, v40, 10 ; GFX906-NEXT: v_readlane_b32 s6, v40, 0 ; GFX906-NEXT: v_readlane_b32 s8, v40, 8 ; GFX906-NEXT: v_readlane_b32 s10, v40, 6 ; GFX906-NEXT: v_readlane_b32 s16, v40, 22 ; GFX906-NEXT: v_readlane_b32 s12, v40, 5 ; GFX906-NEXT: v_readlane_b32 s13, v40, 4 ; GFX906-NEXT: v_readlane_b32 s14, v40, 3 ; GFX906-NEXT: v_readlane_b32 s15, v40, 2 ; GFX906-NEXT: v_readlane_b32 s5, v40, 11 ; GFX906-NEXT: v_readlane_b32 s7, v40, 1 ; GFX906-NEXT: v_readlane_b32 s9, v40, 9 ; GFX906-NEXT: v_readlane_b32 s11, v40, 7 ; GFX906-NEXT: v_readlane_b32 s17, v40, 23 ; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX906-NEXT: s_mov_b64 exec, s[34:35] ; GFX906-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX906-NEXT: s_mov_b64 exec, s[34:35] ; GFX906-NEXT: v_readlane_b32 s21, v40, 24 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; use s21 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: v_readlane_b32 s22, v40, 25 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; use s22 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: v_readlane_b32 s23, v40, 26 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; use s23 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: v_readlane_b32 s24, v40, 27 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; use s24 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: v_readlane_b32 s25, v40, 28 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; use s25 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: v_readlane_b32 s26, v40, 29 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; use s26 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: v_readlane_b32 s27, v40, 30 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; use s27 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: v_readlane_b32 s28, v40, 31 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; use s28 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: v_readlane_b32 s29, v40, 32 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; use s29 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GFX906-NEXT: v_readlane_b32 s4, v40, 10 ; GFX906-NEXT: v_readlane_b32 s6, v40, 0 ; GFX906-NEXT: v_readlane_b32 s8, v40, 8 ; GFX906-NEXT: v_readlane_b32 s10, v40, 6 ; GFX906-NEXT: v_readlane_b32 s16, v40, 22 ; GFX906-NEXT: v_readlane_b32 s5, v40, 11 ; GFX906-NEXT: v_readlane_b32 s7, v40, 1 ; GFX906-NEXT: v_readlane_b32 s9, v40, 9 ; GFX906-NEXT: v_readlane_b32 s11, v40, 7 ; GFX906-NEXT: v_readlane_b32 s12, v40, 5 ; GFX906-NEXT: v_readlane_b32 s13, v40, 4 ; GFX906-NEXT: v_readlane_b32 s14, v40, 3 ; GFX906-NEXT: v_readlane_b32 s15, v40, 2 ; GFX906-NEXT: v_readlane_b32 s17, v40, 23 ; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX906-NEXT: s_mov_b64 exec, s[34:35] ; GFX906-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX906-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v23, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v24, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v25, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v26, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v27, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v28, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v29, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v30, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload ; GFX906-NEXT: v_readlane_b32 s31, v41, 1 ; GFX906-NEXT: v_readlane_b32 s30, v41, 0 ; GFX906-NEXT: ; kill: killed $vgpr40 ; GFX906-NEXT: v_readlane_b32 s4, v41, 2 ; GFX906-NEXT: s_waitcnt vmcnt(0) ; GFX906-NEXT: flat_store_dwordx4 v[0:1], v[30:33] offset:112 ; GFX906-NEXT: s_waitcnt vmcnt(0) ; GFX906-NEXT: flat_store_dwordx4 v[0:1], v[26:29] offset:96 ; GFX906-NEXT: s_waitcnt vmcnt(0) ; GFX906-NEXT: flat_store_dwordx4 v[0:1], v[22:25] offset:80 ; GFX906-NEXT: s_waitcnt vmcnt(0) ; GFX906-NEXT: flat_store_dwordx4 v[0:1], v[18:21] offset:64 ; GFX906-NEXT: s_waitcnt vmcnt(0) ; GFX906-NEXT: flat_store_dwordx4 v[0:1], v[14:17] offset:48 ; GFX906-NEXT: s_waitcnt vmcnt(0) ; GFX906-NEXT: flat_store_dwordx4 v[0:1], v[10:13] offset:32 ; GFX906-NEXT: s_waitcnt vmcnt(0) ; GFX906-NEXT: flat_store_dwordx4 v[0:1], v[6:9] offset:16 ; GFX906-NEXT: s_waitcnt vmcnt(0) ; GFX906-NEXT: flat_store_dwordx4 v[0:1], v[2:5] ; GFX906-NEXT: s_waitcnt vmcnt(0) ; GFX906-NEXT: s_xor_saveexec_b64 s[6:7], -1 ; GFX906-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:152 ; 4-byte Folded Reload ; GFX906-NEXT: s_mov_b64 exec, -1 ; GFX906-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload ; GFX906-NEXT: s_mov_b64 exec, s[6:7] ; GFX906-NEXT: s_addk_i32 s32, 0xd800 ; GFX906-NEXT: s_mov_b32 s33, s4 ; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX906-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: preserve_wwm_copy_dstreg: ; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: s_mov_b32 s16, s33 ; GFX908-NEXT: s_mov_b32 s33, s32 ; GFX908-NEXT: s_xor_saveexec_b64 s[18:19], -1 ; GFX908-NEXT: buffer_store_dword v33, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill ; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill ; GFX908-NEXT: s_mov_b64 exec, -1 ; GFX908-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill ; GFX908-NEXT: s_mov_b64 exec, s[18:19] ; GFX908-NEXT: v_mov_b32_e32 v3, s16 ; GFX908-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill ; GFX908-NEXT: s_addk_i32 s32, 0x2c00 ; GFX908-NEXT: s_mov_b64 s[16:17], exec ; GFX908-NEXT: s_mov_b64 exec, 1 ; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:164 ; GFX908-NEXT: v_writelane_b32 v2, s30, 0 ; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:164 ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: s_mov_b64 exec, s[16:17] ; GFX908-NEXT: s_mov_b64 s[16:17], exec ; GFX908-NEXT: s_mov_b64 exec, 1 ; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:164 ; GFX908-NEXT: v_writelane_b32 v2, s31, 0 ; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:164 ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: s_mov_b64 exec, s[16:17] ; GFX908-NEXT: ; implicit-def: $vgpr2 ; GFX908-NEXT: s_mov_b32 s21, s15 ; GFX908-NEXT: v_writelane_b32 v2, s6, 0 ; GFX908-NEXT: v_writelane_b32 v2, s7, 1 ; GFX908-NEXT: v_writelane_b32 v2, s21, 2 ; GFX908-NEXT: s_mov_b32 s22, s14 ; GFX908-NEXT: v_writelane_b32 v2, s22, 3 ; GFX908-NEXT: s_mov_b32 s23, s13 ; GFX908-NEXT: v_writelane_b32 v2, s23, 4 ; GFX908-NEXT: s_mov_b32 s24, s12 ; GFX908-NEXT: v_writelane_b32 v2, s24, 5 ; GFX908-NEXT: s_mov_b64 s[26:27], s[10:11] ; GFX908-NEXT: v_writelane_b32 v2, s26, 6 ; GFX908-NEXT: v_writelane_b32 v2, s27, 7 ; GFX908-NEXT: v_writelane_b32 v2, s8, 8 ; GFX908-NEXT: v_writelane_b32 v2, s9, 9 ; GFX908-NEXT: v_writelane_b32 v2, s4, 10 ; GFX908-NEXT: v_mov_b32_e32 v32, v31 ; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill ; GFX908-NEXT: v_writelane_b32 v2, s5, 11 ; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX908-NEXT: v_mov_b32_e32 v33, v2 ; GFX908-NEXT: s_mov_b64 exec, s[34:35] ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def v[0:31] ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill ; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill ; GFX908-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill ; GFX908-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill ; GFX908-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill ; GFX908-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill ; GFX908-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill ; GFX908-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill ; GFX908-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill ; GFX908-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill ; GFX908-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill ; GFX908-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill ; GFX908-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill ; GFX908-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill ; GFX908-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill ; GFX908-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill ; GFX908-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill ; GFX908-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill ; GFX908-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill ; GFX908-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill ; GFX908-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill ; GFX908-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill ; GFX908-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill ; GFX908-NEXT: buffer_store_dword v24, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill ; GFX908-NEXT: buffer_store_dword v25, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill ; GFX908-NEXT: buffer_store_dword v26, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill ; GFX908-NEXT: buffer_store_dword v27, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill ; GFX908-NEXT: buffer_store_dword v28, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill ; GFX908-NEXT: buffer_store_dword v29, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill ; GFX908-NEXT: buffer_store_dword v30, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill ; GFX908-NEXT: buffer_store_dword v31, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def v40 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def s11 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX908-NEXT: v_mov_b32_e32 v40, v33 ; GFX908-NEXT: s_mov_b64 exec, s[34:35] ; GFX908-NEXT: v_writelane_b32 v40, s11, 12 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def s12 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: v_writelane_b32 v40, s12, 13 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def s13 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: v_writelane_b32 v40, s13, 14 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def s14 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: v_writelane_b32 v40, s14, 15 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def s15 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: v_writelane_b32 v40, s15, 16 ; GFX908-NEXT: s_getpc_b64 s[10:11] ; GFX908-NEXT: s_add_u32 s10, s10, foo@gotpcrel32@lo+4 ; GFX908-NEXT: s_addc_u32 s11, s11, foo@gotpcrel32@hi+12 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def s16 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: v_writelane_b32 v40, s16, 17 ; GFX908-NEXT: s_load_dwordx2 s[10:11], s[10:11], 0x0 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def s17 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: v_writelane_b32 v40, s17, 18 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def s18 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: v_writelane_b32 v40, s18, 19 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def s19 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: v_writelane_b32 v40, s19, 20 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def s20 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: v_writelane_b32 v40, s20, 21 ; GFX908-NEXT: s_waitcnt lgkmcnt(0) ; GFX908-NEXT: v_writelane_b32 v40, s10, 22 ; GFX908-NEXT: v_writelane_b32 v40, s11, 23 ; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX908-NEXT: s_mov_b64 exec, s[34:35] ; GFX908-NEXT: v_readlane_b32 s16, v40, 22 ; GFX908-NEXT: s_mov_b32 s12, s24 ; GFX908-NEXT: s_mov_b32 s13, s23 ; GFX908-NEXT: s_mov_b32 s14, s22 ; GFX908-NEXT: v_mov_b32_e32 v31, v32 ; GFX908-NEXT: s_mov_b32 s15, s21 ; GFX908-NEXT: s_mov_b64 s[10:11], s[26:27] ; GFX908-NEXT: v_readlane_b32 s17, v40, 23 ; GFX908-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX908-NEXT: s_mov_b64 exec, s[34:35] ; GFX908-NEXT: v_readlane_b32 s11, v40, 12 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use s11 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: v_readlane_b32 s12, v40, 13 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use s12 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: v_readlane_b32 s13, v40, 14 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use s13 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: v_readlane_b32 s14, v40, 15 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use s14 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: v_readlane_b32 s15, v40, 16 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use s15 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: v_readlane_b32 s16, v40, 17 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use s16 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: v_readlane_b32 s17, v40, 18 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use s17 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: v_readlane_b32 s18, v40, 19 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use s18 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: v_readlane_b32 s19, v40, 20 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use s19 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: v_readlane_b32 s20, v40, 21 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use s20 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def s21 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def s22 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def s23 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def s24 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def s25 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def s26 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def s27 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def s28 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def s29 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; GFX908-NEXT: v_writelane_b32 v40, s21, 24 ; GFX908-NEXT: v_writelane_b32 v40, s22, 25 ; GFX908-NEXT: v_writelane_b32 v40, s23, 26 ; GFX908-NEXT: v_writelane_b32 v40, s24, 27 ; GFX908-NEXT: v_writelane_b32 v40, s25, 28 ; GFX908-NEXT: v_writelane_b32 v40, s26, 29 ; GFX908-NEXT: v_writelane_b32 v40, s27, 30 ; GFX908-NEXT: v_writelane_b32 v40, s28, 31 ; GFX908-NEXT: v_writelane_b32 v40, s29, 32 ; GFX908-NEXT: v_readlane_b32 s4, v40, 10 ; GFX908-NEXT: v_readlane_b32 s6, v40, 0 ; GFX908-NEXT: v_readlane_b32 s8, v40, 8 ; GFX908-NEXT: v_readlane_b32 s10, v40, 6 ; GFX908-NEXT: v_readlane_b32 s16, v40, 22 ; GFX908-NEXT: v_readlane_b32 s12, v40, 5 ; GFX908-NEXT: v_readlane_b32 s13, v40, 4 ; GFX908-NEXT: v_readlane_b32 s14, v40, 3 ; GFX908-NEXT: v_readlane_b32 s15, v40, 2 ; GFX908-NEXT: v_readlane_b32 s5, v40, 11 ; GFX908-NEXT: v_readlane_b32 s7, v40, 1 ; GFX908-NEXT: v_readlane_b32 s9, v40, 9 ; GFX908-NEXT: v_readlane_b32 s11, v40, 7 ; GFX908-NEXT: v_readlane_b32 s17, v40, 23 ; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX908-NEXT: s_mov_b64 exec, s[34:35] ; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX908-NEXT: s_mov_b64 exec, s[34:35] ; GFX908-NEXT: v_readlane_b32 s21, v40, 24 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use s21 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: v_readlane_b32 s22, v40, 25 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use s22 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: v_readlane_b32 s23, v40, 26 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use s23 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: v_readlane_b32 s24, v40, 27 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use s24 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: v_readlane_b32 s25, v40, 28 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use s25 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: v_readlane_b32 s26, v40, 29 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use s26 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: v_readlane_b32 s27, v40, 30 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use s27 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: v_readlane_b32 s28, v40, 31 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use s28 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: v_readlane_b32 s29, v40, 32 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use s29 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; GFX908-NEXT: v_readlane_b32 s4, v40, 10 ; GFX908-NEXT: v_readlane_b32 s6, v40, 0 ; GFX908-NEXT: v_readlane_b32 s8, v40, 8 ; GFX908-NEXT: v_readlane_b32 s10, v40, 6 ; GFX908-NEXT: v_readlane_b32 s16, v40, 22 ; GFX908-NEXT: v_readlane_b32 s5, v40, 11 ; GFX908-NEXT: v_readlane_b32 s7, v40, 1 ; GFX908-NEXT: v_readlane_b32 s9, v40, 9 ; GFX908-NEXT: v_readlane_b32 s11, v40, 7 ; GFX908-NEXT: v_readlane_b32 s12, v40, 5 ; GFX908-NEXT: v_readlane_b32 s13, v40, 4 ; GFX908-NEXT: v_readlane_b32 s14, v40, 3 ; GFX908-NEXT: v_readlane_b32 s15, v40, 2 ; GFX908-NEXT: v_readlane_b32 s17, v40, 23 ; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX908-NEXT: s_mov_b64 exec, s[34:35] ; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload ; GFX908-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload ; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload ; GFX908-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload ; GFX908-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload ; GFX908-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload ; GFX908-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload ; GFX908-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload ; GFX908-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload ; GFX908-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload ; GFX908-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload ; GFX908-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload ; GFX908-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload ; GFX908-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload ; GFX908-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload ; GFX908-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload ; GFX908-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload ; GFX908-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload ; GFX908-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload ; GFX908-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload ; GFX908-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload ; GFX908-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload ; GFX908-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload ; GFX908-NEXT: buffer_load_dword v23, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload ; GFX908-NEXT: buffer_load_dword v24, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload ; GFX908-NEXT: buffer_load_dword v25, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload ; GFX908-NEXT: buffer_load_dword v26, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload ; GFX908-NEXT: buffer_load_dword v27, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload ; GFX908-NEXT: buffer_load_dword v28, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload ; GFX908-NEXT: buffer_load_dword v29, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload ; GFX908-NEXT: buffer_load_dword v30, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload ; GFX908-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload ; GFX908-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload ; GFX908-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload ; GFX908-NEXT: s_mov_b64 s[4:5], exec ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[30:33] offset:112 ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[26:29] offset:96 ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[22:25] offset:80 ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[18:21] offset:64 ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[14:17] offset:48 ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[10:13] offset:32 ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[6:9] offset:16 ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[2:5] ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: s_mov_b64 exec, 1 ; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:164 ; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: v_readlane_b32 s31, v0, 0 ; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:164 ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: s_mov_b64 exec, s[4:5] ; GFX908-NEXT: s_mov_b64 s[4:5], exec ; GFX908-NEXT: s_mov_b64 exec, 1 ; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:164 ; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: v_readlane_b32 s30, v0, 0 ; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:164 ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: s_mov_b64 exec, s[4:5] ; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:160 ; 4-byte Folded Reload ; GFX908-NEXT: ; kill: killed $vgpr40 ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: v_readfirstlane_b32 s4, v0 ; GFX908-NEXT: s_xor_saveexec_b64 s[6:7], -1 ; GFX908-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload ; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:156 ; 4-byte Folded Reload ; GFX908-NEXT: s_mov_b64 exec, -1 ; GFX908-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:152 ; 4-byte Folded Reload ; GFX908-NEXT: s_mov_b64 exec, s[6:7] ; GFX908-NEXT: s_addk_i32 s32, 0xd400 ; GFX908-NEXT: s_mov_b32 s33, s4 ; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX908-NEXT: s_setpc_b64 s[30:31] %vreg0 = call <32 x float> asm sideeffect "; def $0", "=v"() %v40 = call i32 asm sideeffect "; def $0","=${v40}"() %s11 = call i32 asm sideeffect "; def $0","=${s11}"() %s12 = call i32 asm sideeffect "; def $0","=${s12}"() %s13 = call i32 asm sideeffect "; def $0","=${s13}"() %s14 = call i32 asm sideeffect "; def $0","=${s14}"() %s15 = call i32 asm sideeffect "; def $0","=${s15}"() %s16 = call i32 asm sideeffect "; def $0","=${s16}"() %s17 = call i32 asm sideeffect "; def $0","=${s17}"() %s18 = call i32 asm sideeffect "; def $0","=${s18}"() %s19 = call i32 asm sideeffect "; def $0","=${s19}"() %s20 = call i32 asm sideeffect "; def $0","=${s20}"() call void @foo() call void asm sideeffect "; use $0","${s11}"(i32 %s11) call void asm sideeffect "; use $0","${s12}"(i32 %s12) call void asm sideeffect "; use $0","${s13}"(i32 %s13) call void asm sideeffect "; use $0","${s14}"(i32 %s14) call void asm sideeffect "; use $0","${s15}"(i32 %s15) call void asm sideeffect "; use $0","${s16}"(i32 %s16) call void asm sideeffect "; use $0","${s17}"(i32 %s17) call void asm sideeffect "; use $0","${s18}"(i32 %s18) call void asm sideeffect "; use $0","${s19}"(i32 %s19) call void asm sideeffect "; use $0","${s20}"(i32 %s20) %s21 = call i32 asm sideeffect "; def $0","=${s21}"() %s22 = call i32 asm sideeffect "; def $0","=${s22}"() %s23 = call i32 asm sideeffect "; def $0","=${s23}"() %s24 = call i32 asm sideeffect "; def $0","=${s24}"() %s25 = call i32 asm sideeffect "; def $0","=${s25}"() %s26 = call i32 asm sideeffect "; def $0","=${s26}"() %s27 = call i32 asm sideeffect "; def $0","=${s27}"() %s28 = call i32 asm sideeffect "; def $0","=${s28}"() %s29 = call i32 asm sideeffect "; def $0","=${s29}"() call void @foo() call void asm sideeffect "; use $0","${s21}"(i32 %s21) call void asm sideeffect "; use $0","${s22}"(i32 %s22) call void asm sideeffect "; use $0","${s23}"(i32 %s23) call void asm sideeffect "; use $0","${s24}"(i32 %s24) call void asm sideeffect "; use $0","${s25}"(i32 %s25) call void asm sideeffect "; use $0","${s26}"(i32 %s26) call void asm sideeffect "; use $0","${s27}"(i32 %s27) call void asm sideeffect "; use $0","${s28}"(i32 %s28) call void asm sideeffect "; use $0","${s29}"(i32 %s29) call void @foo() store volatile <32 x float> %vreg0, ptr %parg0 ret void } declare void @foo() attributes #0 = { "amdgpu-num-vgpr"="42" "amdgpu-num-sgpr"="40"} !llvm.module.flags = !{!0} !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}