# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck %s -check-prefixes=CHECK,GFX10 # RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck %s -check-prefixes=CHECK,GFX12 # CHECK-LABEL: name: merge_s_buffer_load_x2 # CHECK: S_BUFFER_LOAD_DWORDX2_IMM %0, 0, 0 :: (dereferenceable invariant load (s64), align 4) name: merge_s_buffer_load_x2 tracksRegLiveness: true body: | bb.0: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32)) %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s32)) S_ENDPGM 0 ... --- # CHECK-LABEL: name: merge_s_buffer_load_x1_x2 # CHECK: S_BUFFER_LOAD_DWORD_IMM %0, 0, 0 :: (dereferenceable invariant load (s32)) # CHECK: S_BUFFER_LOAD_DWORDX2_IMM %0, 4, 0 :: (dereferenceable invariant load (s64)) name: merge_s_buffer_load_x1_x2 tracksRegLiveness: true body: | bb.0: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32)) %2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s64)) S_ENDPGM 0 ... --- # CHECK-LABEL: name: merge_s_buffer_load_x2_x1 # GFX10: S_BUFFER_LOAD_DWORDX2_IMM %0, 0, 0 :: (dereferenceable invariant load (s64)) # GFX10: S_BUFFER_LOAD_DWORD_IMM %0, 8, 0 :: (dereferenceable invariant load (s32)) # GFX12: S_BUFFER_LOAD_DWORDX3_IMM %0, 0, 0 :: (dereferenceable invariant load (s96), align 8) name: merge_s_buffer_load_x2_x1 tracksRegLiveness: true body: | bb.0: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s64)) %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 8, 0 :: (dereferenceable invariant load (s32)) S_ENDPGM 0 ... --- # CHECK-LABEL: name: merge_s_buffer_load_x4 # CHECK: S_BUFFER_LOAD_DWORDX4_IMM %0, 0, 0 :: (dereferenceable invariant load (s128), align 4) name: merge_s_buffer_load_x4 tracksRegLiveness: true body: | bb.0: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32)) %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s32)) %3:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 8, 0 :: (dereferenceable invariant load (s32)) %4:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 12, 0 :: (dereferenceable invariant load (s32)) S_ENDPGM 0 ... --- # CHECK-LABEL: name: merge_s_buffer_load_x1_x3 # CHECK: S_BUFFER_LOAD_DWORD_IMM %0, 0, 0 :: (dereferenceable invariant load (s32)) # CHECK: S_BUFFER_LOAD_DWORDX3_IMM %0, 4, 0 :: (dereferenceable invariant load (s96), align 16) name: merge_s_buffer_load_x1_x3 tracksRegLiveness: true body: | bb.0: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32)) %2:sgpr_96 = S_BUFFER_LOAD_DWORDX3_IMM %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s96)) S_ENDPGM 0 ... --- # CHECK-LABEL: name: merge_s_buffer_load_x3_x1 # CHECK: S_BUFFER_LOAD_DWORDX4_IMM %0, 0, 0 :: (dereferenceable invariant load (s128)) name: merge_s_buffer_load_x3_x1 tracksRegLiveness: true body: | bb.0: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr_96 = S_BUFFER_LOAD_DWORDX3_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s96)) %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 12, 0 :: (dereferenceable invariant load (s32)) S_ENDPGM 0 ... --- # CHECK-LABEL: name: merge_s_buffer_load_x8 # CHECK: S_BUFFER_LOAD_DWORDX8_IMM %0, 0, 0 :: (dereferenceable invariant load (s256), align 4) name: merge_s_buffer_load_x8 tracksRegLiveness: true body: | bb.0: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32)) %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s32)) %3:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 8, 0 :: (dereferenceable invariant load (s32)) %4:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 12, 0 :: (dereferenceable invariant load (s32)) %5:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32)) %6:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 20, 0 :: (dereferenceable invariant load (s32)) %7:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 24, 0 :: (dereferenceable invariant load (s32)) %8:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 28, 0 :: (dereferenceable invariant load (s32)) S_ENDPGM 0 ... --- # CHECK-LABEL: name: merge_s_buffer_load_x8_reordered # CHECK: S_BUFFER_LOAD_DWORDX8_IMM %0, 0, 0 :: (dereferenceable invariant load (s256), align 4) name: merge_s_buffer_load_x8_reordered tracksRegLiveness: true body: | bb.0: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 20, 0 :: (dereferenceable invariant load (s32)) %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s32)) %3:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32)) %4:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 28, 0 :: (dereferenceable invariant load (s32)) %5:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 12, 0 :: (dereferenceable invariant load (s32)) %6:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32)) %7:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 8, 0 :: (dereferenceable invariant load (s32)) %8:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 24, 0 :: (dereferenceable invariant load (s32)) S_ENDPGM 0 ... --- # CHECK-LABEL: name: merge_s_buffer_load_x8_out_of_x2 # CHECK: S_BUFFER_LOAD_DWORDX8_IMM %0, 0, 0 :: (dereferenceable invariant load (s256), align 8) name: merge_s_buffer_load_x8_out_of_x2 tracksRegLiveness: true body: | bb.0: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s64)) %2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 8, 0 :: (dereferenceable invariant load (s64)) %3:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s64)) %4:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 24, 0 :: (dereferenceable invariant load (s64)) S_ENDPGM 0 ... --- # CHECK-LABEL: name: merge_s_buffer_load_x8_out_of_x4 # CHECK: S_BUFFER_LOAD_DWORDX8_IMM %0, 0, 0 :: (dereferenceable invariant load (s256), align 16) name: merge_s_buffer_load_x8_out_of_x4 tracksRegLiveness: true body: | bb.0: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s128)) %2:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s128)) S_ENDPGM 0 ... --- # CHECK-LABEL: name: merge_s_buffer_load_x8_mixed # CHECK: S_BUFFER_LOAD_DWORDX8_IMM %0, 0, 0 :: (dereferenceable invariant load (s256), align 16) name: merge_s_buffer_load_x8_mixed tracksRegLiveness: true body: | bb.0: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s128)) %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32)) %3:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 24, 0 :: (dereferenceable invariant load (s64)) %4:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 20, 0 :: (dereferenceable invariant load (s32)) S_ENDPGM 0 ... --- # CHECK-LABEL: name: merge_s_buffer_load_sgpr_imm # CHECK: S_BUFFER_LOAD_DWORDX4_SGPR_IMM %0, %1, 0, 0 :: (dereferenceable invariant load (s128), align 4) name: merge_s_buffer_load_sgpr_imm tracksRegLiveness: true body: | bb.0: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sreg_32 = COPY $sgpr4 %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM %0:sgpr_128, %1:sreg_32, 0, 0 :: (dereferenceable invariant load (s32)) %3:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM %0:sgpr_128, %1:sreg_32, 4, 0 :: (dereferenceable invariant load (s32)) %4:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM %0:sgpr_128, %1:sreg_32, 8, 0 :: (dereferenceable invariant load (s32)) %5:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM %0:sgpr_128, %1:sreg_32, 12, 0 :: (dereferenceable invariant load (s32)) S_ENDPGM 0 ... --- # CHECK-LABEL: name: no_merge_for_different_soffsets # CHECK: S_BUFFER_LOAD_DWORD_SGPR_IMM %0, %1, 4, 0 :: (dereferenceable invariant load (s32)) # CHECK: S_BUFFER_LOAD_DWORD_SGPR_IMM %0, %2, 8, 0 :: (dereferenceable invariant load (s32)) name: no_merge_for_different_soffsets tracksRegLiveness: true body: | bb.0: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $sgpr5 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sreg_32 = COPY $sgpr4 %2:sreg_32 = COPY $sgpr5 %3:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM %0:sgpr_128, %1:sreg_32, 4, 0 :: (dereferenceable invariant load (s32)) %4:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM %0:sgpr_128, %2:sreg_32, 8, 0 :: (dereferenceable invariant load (s32)) S_ENDPGM 0 ... --- # CHECK-LABEL: name: no_merge_for_non_adjacent_offsets # CHECK: S_BUFFER_LOAD_DWORD_SGPR_IMM %0, %1, 4, 0 :: (dereferenceable invariant load (s32)) # CHECK: S_BUFFER_LOAD_DWORD_SGPR_IMM %0, %1, 12, 0 :: (dereferenceable invariant load (s32)) name: no_merge_for_non_adjacent_offsets tracksRegLiveness: true body: | bb.0: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sreg_32 = COPY $sgpr4 %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM %0:sgpr_128, %1:sreg_32, 4, 0 :: (dereferenceable invariant load (s32)) %3:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM %0:sgpr_128, %1:sreg_32, 12, 0 :: (dereferenceable invariant load (s32)) S_ENDPGM 0 ... ---