819 lines
40 KiB
LLVM
819 lines
40 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX906 %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX908 %s
|
|
|
|
; Due to high register pressure, regalloc would split the liverange of wwm VGPR register used for SGPR spills
|
|
; and introduce a copy. The copy should be of whole-wave with exec mask manipulation around it.
|
|
; FIXME: The destination register involved in the whole-wave copy should be considered for preserving all the lanes
|
|
; with a spill/restore at function prolog/epilog. The copy might otherwise clobber its inactive lanes unwantedly.
|
|
define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
|
|
; GFX906-LABEL: preserve_wwm_copy_dstreg:
|
|
; GFX906: ; %bb.0:
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX906-NEXT: s_mov_b32 s16, s33
|
|
; GFX906-NEXT: s_mov_b32 s33, s32
|
|
; GFX906-NEXT: s_xor_saveexec_b64 s[18:19], -1
|
|
; GFX906-NEXT: buffer_store_dword v33, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: s_mov_b64 exec, -1
|
|
; GFX906-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: s_mov_b64 exec, s[18:19]
|
|
; GFX906-NEXT: ; implicit-def: $vgpr2
|
|
; GFX906-NEXT: s_mov_b32 s21, s15
|
|
; GFX906-NEXT: v_writelane_b32 v2, s6, 0
|
|
; GFX906-NEXT: v_writelane_b32 v2, s7, 1
|
|
; GFX906-NEXT: v_writelane_b32 v2, s21, 2
|
|
; GFX906-NEXT: s_mov_b32 s22, s14
|
|
; GFX906-NEXT: v_writelane_b32 v2, s22, 3
|
|
; GFX906-NEXT: s_mov_b32 s23, s13
|
|
; GFX906-NEXT: v_writelane_b32 v2, s23, 4
|
|
; GFX906-NEXT: s_mov_b32 s24, s12
|
|
; GFX906-NEXT: v_writelane_b32 v2, s24, 5
|
|
; GFX906-NEXT: s_mov_b64 s[26:27], s[10:11]
|
|
; GFX906-NEXT: v_writelane_b32 v2, s26, 6
|
|
; GFX906-NEXT: v_writelane_b32 v2, s27, 7
|
|
; GFX906-NEXT: v_writelane_b32 v2, s8, 8
|
|
; GFX906-NEXT: v_writelane_b32 v41, s16, 2
|
|
; GFX906-NEXT: v_writelane_b32 v2, s9, 9
|
|
; GFX906-NEXT: v_writelane_b32 v41, s30, 0
|
|
; GFX906-NEXT: v_writelane_b32 v2, s4, 10
|
|
; GFX906-NEXT: s_addk_i32 s32, 0x2800
|
|
; GFX906-NEXT: v_writelane_b32 v41, s31, 1
|
|
; GFX906-NEXT: v_mov_b32_e32 v32, v31
|
|
; GFX906-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: v_writelane_b32 v2, s5, 11
|
|
; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1
|
|
; GFX906-NEXT: v_mov_b32_e32 v33, v2
|
|
; GFX906-NEXT: s_mov_b64 exec, s[34:35]
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; def v[0:31]
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: buffer_store_dword v24, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: buffer_store_dword v25, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: buffer_store_dword v26, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: buffer_store_dword v27, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: buffer_store_dword v28, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: buffer_store_dword v29, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: buffer_store_dword v30, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: buffer_store_dword v31, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; def v40
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; def s11
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1
|
|
; GFX906-NEXT: v_mov_b32_e32 v40, v33
|
|
; GFX906-NEXT: s_mov_b64 exec, s[34:35]
|
|
; GFX906-NEXT: v_writelane_b32 v40, s11, 12
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; def s12
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: v_writelane_b32 v40, s12, 13
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; def s13
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: v_writelane_b32 v40, s13, 14
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; def s14
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: v_writelane_b32 v40, s14, 15
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; def s15
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: v_writelane_b32 v40, s15, 16
|
|
; GFX906-NEXT: s_getpc_b64 s[10:11]
|
|
; GFX906-NEXT: s_add_u32 s10, s10, foo@gotpcrel32@lo+4
|
|
; GFX906-NEXT: s_addc_u32 s11, s11, foo@gotpcrel32@hi+12
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; def s16
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: v_writelane_b32 v40, s16, 17
|
|
; GFX906-NEXT: s_load_dwordx2 s[10:11], s[10:11], 0x0
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; def s17
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: v_writelane_b32 v40, s17, 18
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; def s18
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: v_writelane_b32 v40, s18, 19
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; def s19
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: v_writelane_b32 v40, s19, 20
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; def s20
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: v_writelane_b32 v40, s20, 21
|
|
; GFX906-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX906-NEXT: v_writelane_b32 v40, s10, 22
|
|
; GFX906-NEXT: v_writelane_b32 v40, s11, 23
|
|
; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1
|
|
; GFX906-NEXT: s_mov_b64 exec, s[34:35]
|
|
; GFX906-NEXT: v_readlane_b32 s16, v40, 22
|
|
; GFX906-NEXT: s_mov_b32 s12, s24
|
|
; GFX906-NEXT: s_mov_b32 s13, s23
|
|
; GFX906-NEXT: s_mov_b32 s14, s22
|
|
; GFX906-NEXT: v_mov_b32_e32 v31, v32
|
|
; GFX906-NEXT: s_mov_b32 s15, s21
|
|
; GFX906-NEXT: s_mov_b64 s[10:11], s[26:27]
|
|
; GFX906-NEXT: v_readlane_b32 s17, v40, 23
|
|
; GFX906-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
|
|
; GFX906-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
|
; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1
|
|
; GFX906-NEXT: s_mov_b64 exec, s[34:35]
|
|
; GFX906-NEXT: v_readlane_b32 s11, v40, 12
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; use s11
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: v_readlane_b32 s12, v40, 13
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; use s12
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: v_readlane_b32 s13, v40, 14
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; use s13
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: v_readlane_b32 s14, v40, 15
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; use s14
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: v_readlane_b32 s15, v40, 16
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; use s15
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: v_readlane_b32 s16, v40, 17
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; use s16
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: v_readlane_b32 s17, v40, 18
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; use s17
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: v_readlane_b32 s18, v40, 19
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; use s18
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: v_readlane_b32 s19, v40, 20
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; use s19
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: v_readlane_b32 s20, v40, 21
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; use s20
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; def s21
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; def s22
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; def s23
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; def s24
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; def s25
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; def s26
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; def s27
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; def s28
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; def s29
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: v_writelane_b32 v40, s21, 24
|
|
; GFX906-NEXT: v_writelane_b32 v40, s22, 25
|
|
; GFX906-NEXT: v_writelane_b32 v40, s23, 26
|
|
; GFX906-NEXT: v_writelane_b32 v40, s24, 27
|
|
; GFX906-NEXT: v_writelane_b32 v40, s25, 28
|
|
; GFX906-NEXT: v_writelane_b32 v40, s26, 29
|
|
; GFX906-NEXT: v_writelane_b32 v40, s27, 30
|
|
; GFX906-NEXT: v_writelane_b32 v40, s28, 31
|
|
; GFX906-NEXT: v_writelane_b32 v40, s29, 32
|
|
; GFX906-NEXT: v_readlane_b32 s4, v40, 10
|
|
; GFX906-NEXT: v_readlane_b32 s6, v40, 0
|
|
; GFX906-NEXT: v_readlane_b32 s8, v40, 8
|
|
; GFX906-NEXT: v_readlane_b32 s10, v40, 6
|
|
; GFX906-NEXT: v_readlane_b32 s16, v40, 22
|
|
; GFX906-NEXT: v_readlane_b32 s12, v40, 5
|
|
; GFX906-NEXT: v_readlane_b32 s13, v40, 4
|
|
; GFX906-NEXT: v_readlane_b32 s14, v40, 3
|
|
; GFX906-NEXT: v_readlane_b32 s15, v40, 2
|
|
; GFX906-NEXT: v_readlane_b32 s5, v40, 11
|
|
; GFX906-NEXT: v_readlane_b32 s7, v40, 1
|
|
; GFX906-NEXT: v_readlane_b32 s9, v40, 9
|
|
; GFX906-NEXT: v_readlane_b32 s11, v40, 7
|
|
; GFX906-NEXT: v_readlane_b32 s17, v40, 23
|
|
; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1
|
|
; GFX906-NEXT: s_mov_b64 exec, s[34:35]
|
|
; GFX906-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
|
; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1
|
|
; GFX906-NEXT: s_mov_b64 exec, s[34:35]
|
|
; GFX906-NEXT: v_readlane_b32 s21, v40, 24
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; use s21
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: v_readlane_b32 s22, v40, 25
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; use s22
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: v_readlane_b32 s23, v40, 26
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; use s23
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: v_readlane_b32 s24, v40, 27
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; use s24
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: v_readlane_b32 s25, v40, 28
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; use s25
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: v_readlane_b32 s26, v40, 29
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; use s26
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: v_readlane_b32 s27, v40, 30
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; use s27
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: v_readlane_b32 s28, v40, 31
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; use s28
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: v_readlane_b32 s29, v40, 32
|
|
; GFX906-NEXT: ;;#ASMSTART
|
|
; GFX906-NEXT: ; use s29
|
|
; GFX906-NEXT: ;;#ASMEND
|
|
; GFX906-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: v_readlane_b32 s4, v40, 10
|
|
; GFX906-NEXT: v_readlane_b32 s6, v40, 0
|
|
; GFX906-NEXT: v_readlane_b32 s8, v40, 8
|
|
; GFX906-NEXT: v_readlane_b32 s10, v40, 6
|
|
; GFX906-NEXT: v_readlane_b32 s16, v40, 22
|
|
; GFX906-NEXT: v_readlane_b32 s5, v40, 11
|
|
; GFX906-NEXT: v_readlane_b32 s7, v40, 1
|
|
; GFX906-NEXT: v_readlane_b32 s9, v40, 9
|
|
; GFX906-NEXT: v_readlane_b32 s11, v40, 7
|
|
; GFX906-NEXT: v_readlane_b32 s12, v40, 5
|
|
; GFX906-NEXT: v_readlane_b32 s13, v40, 4
|
|
; GFX906-NEXT: v_readlane_b32 s14, v40, 3
|
|
; GFX906-NEXT: v_readlane_b32 s15, v40, 2
|
|
; GFX906-NEXT: v_readlane_b32 s17, v40, 23
|
|
; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1
|
|
; GFX906-NEXT: s_mov_b64 exec, s[34:35]
|
|
; GFX906-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
|
; GFX906-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: buffer_load_dword v23, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: buffer_load_dword v24, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: buffer_load_dword v25, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: buffer_load_dword v26, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: buffer_load_dword v27, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: buffer_load_dword v28, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: buffer_load_dword v29, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: buffer_load_dword v30, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: v_readlane_b32 s31, v41, 1
|
|
; GFX906-NEXT: v_readlane_b32 s30, v41, 0
|
|
; GFX906-NEXT: ; kill: killed $vgpr40
|
|
; GFX906-NEXT: v_readlane_b32 s4, v41, 2
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX906-NEXT: flat_store_dwordx4 v[0:1], v[30:33] offset:112
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX906-NEXT: flat_store_dwordx4 v[0:1], v[26:29] offset:96
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX906-NEXT: flat_store_dwordx4 v[0:1], v[22:25] offset:80
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX906-NEXT: flat_store_dwordx4 v[0:1], v[18:21] offset:64
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX906-NEXT: flat_store_dwordx4 v[0:1], v[14:17] offset:48
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX906-NEXT: flat_store_dwordx4 v[0:1], v[10:13] offset:32
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX906-NEXT: flat_store_dwordx4 v[0:1], v[6:9] offset:16
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX906-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX906-NEXT: s_xor_saveexec_b64 s[6:7], -1
|
|
; GFX906-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:152 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: s_mov_b64 exec, -1
|
|
; GFX906-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload
|
|
; GFX906-NEXT: s_mov_b64 exec, s[6:7]
|
|
; GFX906-NEXT: s_addk_i32 s32, 0xd800
|
|
; GFX906-NEXT: s_mov_b32 s33, s4
|
|
; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX908-LABEL: preserve_wwm_copy_dstreg:
|
|
; GFX908: ; %bb.0:
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX908-NEXT: s_mov_b32 s16, s33
|
|
; GFX908-NEXT: s_mov_b32 s33, s32
|
|
; GFX908-NEXT: s_xor_saveexec_b64 s[18:19], -1
|
|
; GFX908-NEXT: buffer_store_dword v33, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: s_mov_b64 exec, -1
|
|
; GFX908-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: s_mov_b64 exec, s[18:19]
|
|
; GFX908-NEXT: v_mov_b32_e32 v3, s16
|
|
; GFX908-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: s_addk_i32 s32, 0x2c00
|
|
; GFX908-NEXT: s_mov_b64 s[16:17], exec
|
|
; GFX908-NEXT: s_mov_b64 exec, 1
|
|
; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:164
|
|
; GFX908-NEXT: v_writelane_b32 v2, s30, 0
|
|
; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:164
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: s_mov_b64 exec, s[16:17]
|
|
; GFX908-NEXT: s_mov_b64 s[16:17], exec
|
|
; GFX908-NEXT: s_mov_b64 exec, 1
|
|
; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:164
|
|
; GFX908-NEXT: v_writelane_b32 v2, s31, 0
|
|
; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:164
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: s_mov_b64 exec, s[16:17]
|
|
; GFX908-NEXT: ; implicit-def: $vgpr2
|
|
; GFX908-NEXT: s_mov_b32 s21, s15
|
|
; GFX908-NEXT: v_writelane_b32 v2, s6, 0
|
|
; GFX908-NEXT: v_writelane_b32 v2, s7, 1
|
|
; GFX908-NEXT: v_writelane_b32 v2, s21, 2
|
|
; GFX908-NEXT: s_mov_b32 s22, s14
|
|
; GFX908-NEXT: v_writelane_b32 v2, s22, 3
|
|
; GFX908-NEXT: s_mov_b32 s23, s13
|
|
; GFX908-NEXT: v_writelane_b32 v2, s23, 4
|
|
; GFX908-NEXT: s_mov_b32 s24, s12
|
|
; GFX908-NEXT: v_writelane_b32 v2, s24, 5
|
|
; GFX908-NEXT: s_mov_b64 s[26:27], s[10:11]
|
|
; GFX908-NEXT: v_writelane_b32 v2, s26, 6
|
|
; GFX908-NEXT: v_writelane_b32 v2, s27, 7
|
|
; GFX908-NEXT: v_writelane_b32 v2, s8, 8
|
|
; GFX908-NEXT: v_writelane_b32 v2, s9, 9
|
|
; GFX908-NEXT: v_writelane_b32 v2, s4, 10
|
|
; GFX908-NEXT: v_mov_b32_e32 v32, v31
|
|
; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: v_writelane_b32 v2, s5, 11
|
|
; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1
|
|
; GFX908-NEXT: v_mov_b32_e32 v33, v2
|
|
; GFX908-NEXT: s_mov_b64 exec, s[34:35]
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; def v[0:31]
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: buffer_store_dword v24, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: buffer_store_dword v25, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: buffer_store_dword v26, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: buffer_store_dword v27, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: buffer_store_dword v28, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: buffer_store_dword v29, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: buffer_store_dword v30, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: buffer_store_dword v31, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; def v40
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; def s11
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1
|
|
; GFX908-NEXT: v_mov_b32_e32 v40, v33
|
|
; GFX908-NEXT: s_mov_b64 exec, s[34:35]
|
|
; GFX908-NEXT: v_writelane_b32 v40, s11, 12
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; def s12
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: v_writelane_b32 v40, s12, 13
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; def s13
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: v_writelane_b32 v40, s13, 14
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; def s14
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: v_writelane_b32 v40, s14, 15
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; def s15
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: v_writelane_b32 v40, s15, 16
|
|
; GFX908-NEXT: s_getpc_b64 s[10:11]
|
|
; GFX908-NEXT: s_add_u32 s10, s10, foo@gotpcrel32@lo+4
|
|
; GFX908-NEXT: s_addc_u32 s11, s11, foo@gotpcrel32@hi+12
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; def s16
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: v_writelane_b32 v40, s16, 17
|
|
; GFX908-NEXT: s_load_dwordx2 s[10:11], s[10:11], 0x0
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; def s17
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: v_writelane_b32 v40, s17, 18
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; def s18
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: v_writelane_b32 v40, s18, 19
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; def s19
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: v_writelane_b32 v40, s19, 20
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; def s20
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: v_writelane_b32 v40, s20, 21
|
|
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX908-NEXT: v_writelane_b32 v40, s10, 22
|
|
; GFX908-NEXT: v_writelane_b32 v40, s11, 23
|
|
; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1
|
|
; GFX908-NEXT: s_mov_b64 exec, s[34:35]
|
|
; GFX908-NEXT: v_readlane_b32 s16, v40, 22
|
|
; GFX908-NEXT: s_mov_b32 s12, s24
|
|
; GFX908-NEXT: s_mov_b32 s13, s23
|
|
; GFX908-NEXT: s_mov_b32 s14, s22
|
|
; GFX908-NEXT: v_mov_b32_e32 v31, v32
|
|
; GFX908-NEXT: s_mov_b32 s15, s21
|
|
; GFX908-NEXT: s_mov_b64 s[10:11], s[26:27]
|
|
; GFX908-NEXT: v_readlane_b32 s17, v40, 23
|
|
; GFX908-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
|
|
; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
|
; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1
|
|
; GFX908-NEXT: s_mov_b64 exec, s[34:35]
|
|
; GFX908-NEXT: v_readlane_b32 s11, v40, 12
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; use s11
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: v_readlane_b32 s12, v40, 13
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; use s12
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: v_readlane_b32 s13, v40, 14
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; use s13
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: v_readlane_b32 s14, v40, 15
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; use s14
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: v_readlane_b32 s15, v40, 16
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; use s15
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: v_readlane_b32 s16, v40, 17
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; use s16
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: v_readlane_b32 s17, v40, 18
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; use s17
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: v_readlane_b32 s18, v40, 19
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; use s18
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: v_readlane_b32 s19, v40, 20
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; use s19
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: v_readlane_b32 s20, v40, 21
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; use s20
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; def s21
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; def s22
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; def s23
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; def s24
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; def s25
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; def s26
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; def s27
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; def s28
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; def s29
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: v_writelane_b32 v40, s21, 24
|
|
; GFX908-NEXT: v_writelane_b32 v40, s22, 25
|
|
; GFX908-NEXT: v_writelane_b32 v40, s23, 26
|
|
; GFX908-NEXT: v_writelane_b32 v40, s24, 27
|
|
; GFX908-NEXT: v_writelane_b32 v40, s25, 28
|
|
; GFX908-NEXT: v_writelane_b32 v40, s26, 29
|
|
; GFX908-NEXT: v_writelane_b32 v40, s27, 30
|
|
; GFX908-NEXT: v_writelane_b32 v40, s28, 31
|
|
; GFX908-NEXT: v_writelane_b32 v40, s29, 32
|
|
; GFX908-NEXT: v_readlane_b32 s4, v40, 10
|
|
; GFX908-NEXT: v_readlane_b32 s6, v40, 0
|
|
; GFX908-NEXT: v_readlane_b32 s8, v40, 8
|
|
; GFX908-NEXT: v_readlane_b32 s10, v40, 6
|
|
; GFX908-NEXT: v_readlane_b32 s16, v40, 22
|
|
; GFX908-NEXT: v_readlane_b32 s12, v40, 5
|
|
; GFX908-NEXT: v_readlane_b32 s13, v40, 4
|
|
; GFX908-NEXT: v_readlane_b32 s14, v40, 3
|
|
; GFX908-NEXT: v_readlane_b32 s15, v40, 2
|
|
; GFX908-NEXT: v_readlane_b32 s5, v40, 11
|
|
; GFX908-NEXT: v_readlane_b32 s7, v40, 1
|
|
; GFX908-NEXT: v_readlane_b32 s9, v40, 9
|
|
; GFX908-NEXT: v_readlane_b32 s11, v40, 7
|
|
; GFX908-NEXT: v_readlane_b32 s17, v40, 23
|
|
; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1
|
|
; GFX908-NEXT: s_mov_b64 exec, s[34:35]
|
|
; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
|
; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1
|
|
; GFX908-NEXT: s_mov_b64 exec, s[34:35]
|
|
; GFX908-NEXT: v_readlane_b32 s21, v40, 24
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; use s21
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: v_readlane_b32 s22, v40, 25
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; use s22
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: v_readlane_b32 s23, v40, 26
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; use s23
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: v_readlane_b32 s24, v40, 27
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; use s24
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: v_readlane_b32 s25, v40, 28
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; use s25
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: v_readlane_b32 s26, v40, 29
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; use s26
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: v_readlane_b32 s27, v40, 30
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; use s27
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: v_readlane_b32 s28, v40, 31
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; use s28
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: v_readlane_b32 s29, v40, 32
|
|
; GFX908-NEXT: ;;#ASMSTART
|
|
; GFX908-NEXT: ; use s29
|
|
; GFX908-NEXT: ;;#ASMEND
|
|
; GFX908-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: v_readlane_b32 s4, v40, 10
|
|
; GFX908-NEXT: v_readlane_b32 s6, v40, 0
|
|
; GFX908-NEXT: v_readlane_b32 s8, v40, 8
|
|
; GFX908-NEXT: v_readlane_b32 s10, v40, 6
|
|
; GFX908-NEXT: v_readlane_b32 s16, v40, 22
|
|
; GFX908-NEXT: v_readlane_b32 s5, v40, 11
|
|
; GFX908-NEXT: v_readlane_b32 s7, v40, 1
|
|
; GFX908-NEXT: v_readlane_b32 s9, v40, 9
|
|
; GFX908-NEXT: v_readlane_b32 s11, v40, 7
|
|
; GFX908-NEXT: v_readlane_b32 s12, v40, 5
|
|
; GFX908-NEXT: v_readlane_b32 s13, v40, 4
|
|
; GFX908-NEXT: v_readlane_b32 s14, v40, 3
|
|
; GFX908-NEXT: v_readlane_b32 s15, v40, 2
|
|
; GFX908-NEXT: v_readlane_b32 s17, v40, 23
|
|
; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1
|
|
; GFX908-NEXT: s_mov_b64 exec, s[34:35]
|
|
; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
|
; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: buffer_load_dword v23, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: buffer_load_dword v24, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: buffer_load_dword v25, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: buffer_load_dword v26, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: buffer_load_dword v27, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: buffer_load_dword v28, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: buffer_load_dword v29, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: buffer_load_dword v30, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: s_mov_b64 s[4:5], exec
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[30:33] offset:112
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[26:29] offset:96
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[22:25] offset:80
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[18:21] offset:64
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[14:17] offset:48
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[10:13] offset:32
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[6:9] offset:16
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: s_mov_b64 exec, 1
|
|
; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:164
|
|
; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: v_readlane_b32 s31, v0, 0
|
|
; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:164
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: s_mov_b64 exec, s[4:5]
|
|
; GFX908-NEXT: s_mov_b64 s[4:5], exec
|
|
; GFX908-NEXT: s_mov_b64 exec, 1
|
|
; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:164
|
|
; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: v_readlane_b32 s30, v0, 0
|
|
; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:164
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: s_mov_b64 exec, s[4:5]
|
|
; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:160 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: ; kill: killed $vgpr40
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX908-NEXT: v_readfirstlane_b32 s4, v0
|
|
; GFX908-NEXT: s_xor_saveexec_b64 s[6:7], -1
|
|
; GFX908-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:156 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: s_mov_b64 exec, -1
|
|
; GFX908-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:152 ; 4-byte Folded Reload
|
|
; GFX908-NEXT: s_mov_b64 exec, s[6:7]
|
|
; GFX908-NEXT: s_addk_i32 s32, 0xd400
|
|
; GFX908-NEXT: s_mov_b32 s33, s4
|
|
; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GFX908-NEXT: s_setpc_b64 s[30:31]
|
|
%vreg0 = call <32 x float> asm sideeffect "; def $0", "=v"()
|
|
%v40 = call i32 asm sideeffect "; def $0","=${v40}"()
|
|
|
|
%s11 = call i32 asm sideeffect "; def $0","=${s11}"()
|
|
%s12 = call i32 asm sideeffect "; def $0","=${s12}"()
|
|
%s13 = call i32 asm sideeffect "; def $0","=${s13}"()
|
|
%s14 = call i32 asm sideeffect "; def $0","=${s14}"()
|
|
%s15 = call i32 asm sideeffect "; def $0","=${s15}"()
|
|
%s16 = call i32 asm sideeffect "; def $0","=${s16}"()
|
|
%s17 = call i32 asm sideeffect "; def $0","=${s17}"()
|
|
%s18 = call i32 asm sideeffect "; def $0","=${s18}"()
|
|
%s19 = call i32 asm sideeffect "; def $0","=${s19}"()
|
|
%s20 = call i32 asm sideeffect "; def $0","=${s20}"()
|
|
call void @foo()
|
|
call void asm sideeffect "; use $0","${s11}"(i32 %s11)
|
|
call void asm sideeffect "; use $0","${s12}"(i32 %s12)
|
|
call void asm sideeffect "; use $0","${s13}"(i32 %s13)
|
|
call void asm sideeffect "; use $0","${s14}"(i32 %s14)
|
|
call void asm sideeffect "; use $0","${s15}"(i32 %s15)
|
|
call void asm sideeffect "; use $0","${s16}"(i32 %s16)
|
|
call void asm sideeffect "; use $0","${s17}"(i32 %s17)
|
|
call void asm sideeffect "; use $0","${s18}"(i32 %s18)
|
|
call void asm sideeffect "; use $0","${s19}"(i32 %s19)
|
|
call void asm sideeffect "; use $0","${s20}"(i32 %s20)
|
|
|
|
%s21 = call i32 asm sideeffect "; def $0","=${s21}"()
|
|
%s22 = call i32 asm sideeffect "; def $0","=${s22}"()
|
|
%s23 = call i32 asm sideeffect "; def $0","=${s23}"()
|
|
%s24 = call i32 asm sideeffect "; def $0","=${s24}"()
|
|
%s25 = call i32 asm sideeffect "; def $0","=${s25}"()
|
|
%s26 = call i32 asm sideeffect "; def $0","=${s26}"()
|
|
%s27 = call i32 asm sideeffect "; def $0","=${s27}"()
|
|
%s28 = call i32 asm sideeffect "; def $0","=${s28}"()
|
|
%s29 = call i32 asm sideeffect "; def $0","=${s29}"()
|
|
call void @foo()
|
|
call void asm sideeffect "; use $0","${s21}"(i32 %s21)
|
|
call void asm sideeffect "; use $0","${s22}"(i32 %s22)
|
|
call void asm sideeffect "; use $0","${s23}"(i32 %s23)
|
|
call void asm sideeffect "; use $0","${s24}"(i32 %s24)
|
|
call void asm sideeffect "; use $0","${s25}"(i32 %s25)
|
|
call void asm sideeffect "; use $0","${s26}"(i32 %s26)
|
|
call void asm sideeffect "; use $0","${s27}"(i32 %s27)
|
|
call void asm sideeffect "; use $0","${s28}"(i32 %s28)
|
|
call void asm sideeffect "; use $0","${s29}"(i32 %s29)
|
|
|
|
call void @foo()
|
|
|
|
store volatile <32 x float> %vreg0, ptr %parg0
|
|
|
|
ret void
|
|
}
|
|
|
|
declare void @foo()
|
|
|
|
attributes #0 = { "amdgpu-num-vgpr"="42" "amdgpu-num-sgpr"="40"}
|
|
|
|
!llvm.module.flags = !{!0}
|
|
!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
|