bolt/deps/llvm-18.1.8/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN,GFX11 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN,GFX12 %s

define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) {
; GFX11-LABEL: load_2dmsaa:
; GFX11:       ; %bb.0: ; %main_body
; GFX11-NEXT:    image_msaa_load v[0:3], v[0:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm ; encoding: [0x98,0x01,0x60,0xf0,0x00,0x00,0x00,0x00]
; GFX11-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
; GFX11-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: load_2dmsaa:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    image_msaa_load v[0:3], [v0, v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm ; encoding: [0x06,0x20,0x46,0xe4,0x00,0x00,0x00,0x00,0x00,0x01,0x02,0x00]
; GFX12-NEXT:    s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
  ret <4 x float> %v
}

define amdgpu_ps <4 x float> @load_2dmsaa_both(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %fragid) {
; GFX11-LABEL: load_2dmsaa_both:
; GFX11:       ; %bb.0: ; %main_body
; GFX11-NEXT:    image_msaa_load v[0:4], v[0:2], s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_2D_MSAA unorm tfe lwe ; encoding: [0x98,0x02,0x60,0xf0,0x00,0x00,0x60,0x00]
; GFX11-NEXT:    v_mov_b32_e32 v5, 0 ; encoding: [0x80,0x02,0x0a,0x7e]
; GFX11-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
; GFX11-NEXT:    global_store_b32 v5, v4, s[8:9] ; encoding: [0x00,0x00,0x6a,0xdc,0x05,0x04,0x08,0x00]
; GFX11-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: load_2dmsaa_both:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    image_msaa_load v[0:4], [v0, v1, v2], s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_2D_MSAA unorm tfe lwe ; encoding: [0x0e,0x20,0x86,0xe4,0x00,0x01,0x00,0x00,0x00,0x01,0x02,0x00]
; GFX12-NEXT:    v_mov_b32_e32 v5, 0 ; encoding: [0x80,0x02,0x0a,0x7e]
; GFX12-NEXT:    s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]
; GFX12-NEXT:    global_store_b32 v5, v4, s[8:9] ; encoding: [0x08,0x80,0x06,0xee,0x00,0x00,0x00,0x02,0x05,0x00,0x00,0x00]
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32i32.i32(i32 2, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 3, i32 0)
  %v.vec = extractvalue {<4 x float>, i32} %v, 0
  %v.err = extractvalue {<4 x float>, i32} %v, 1
  store i32 %v.err, ptr addrspace(1) %out, align 4
  ret <4 x float> %v.vec
}

define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
; GFX11-LABEL: load_2darraymsaa:
; GFX11:       ; %bb.0: ; %main_body
; GFX11-NEXT:    image_msaa_load v[0:3], v[0:3], s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm ; encoding: [0x9c,0x04,0x60,0xf0,0x00,0x00,0x00,0x00]
; GFX11-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
; GFX11-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: load_2darraymsaa:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    image_msaa_load v[0:3], [v0, v1, v2, v3], s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm ; encoding: [0x07,0x20,0x06,0xe5,0x00,0x00,0x00,0x00,0x00,0x01,0x02,0x03]
; GFX12-NEXT:    s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call <4 x float> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32.i32(i32 4, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
  ret <4 x float> %v
}

define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
; GFX11-LABEL: load_2darraymsaa_tfe:
; GFX11:       ; %bb.0: ; %main_body
; GFX11-NEXT:    image_msaa_load v[0:4], v[0:3], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe ; encoding: [0x9c,0x08,0x60,0xf0,0x00,0x00,0x20,0x00]
; GFX11-NEXT:    v_mov_b32_e32 v5, 0 ; encoding: [0x80,0x02,0x0a,0x7e]
; GFX11-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
; GFX11-NEXT:    global_store_b32 v5, v4, s[8:9] ; encoding: [0x00,0x00,0x6a,0xdc,0x05,0x04,0x08,0x00]
; GFX11-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: load_2darraymsaa_tfe:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    image_msaa_load v[0:4], [v0, v1, v2, v3], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe ; encoding: [0x0f,0x20,0x06,0xe6,0x00,0x00,0x00,0x00,0x00,0x01,0x02,0x03]
; GFX12-NEXT:    v_mov_b32_e32 v5, 0 ; encoding: [0x80,0x02,0x0a,0x7e]
; GFX12-NEXT:    s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]
; GFX12-NEXT:    global_store_b32 v5, v4, s[8:9] ; encoding: [0x08,0x80,0x06,0xee,0x00,0x00,0x00,0x02,0x05,0x00,0x00,0x00]
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32i32.i32(i32 8, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
  %v.vec = extractvalue {<4 x float>, i32} %v, 0
  %v.err = extractvalue {<4 x float>, i32} %v, 1
  store i32 %v.err, ptr addrspace(1) %out, align 4
  ret <4 x float> %v.vec
}

define amdgpu_ps <4 x float> @load_2dmsaa_glc(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) {
; GFX11-LABEL: load_2dmsaa_glc:
; GFX11:       ; %bb.0: ; %main_body
; GFX11-NEXT:    image_msaa_load v[0:3], v[0:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm glc ; encoding: [0x98,0x41,0x60,0xf0,0x00,0x00,0x00,0x00]
; GFX11-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
; GFX11-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: load_2dmsaa_glc:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    image_msaa_load v[0:3], [v0, v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm th:TH_LOAD_NT ; encoding: [0x06,0x20,0x46,0xe4,0x00,0x00,0x10,0x00,0x00,0x01,0x02,0x00]
; GFX12-NEXT:    s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 1)
  ret <4 x float> %v
}

define amdgpu_ps <4 x float> @load_2dmsaa_slc(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) {
; GFX11-LABEL: load_2dmsaa_slc:
; GFX11:       ; %bb.0: ; %main_body
; GFX11-NEXT:    image_msaa_load v[0:3], v[0:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm slc ; encoding: [0x98,0x11,0x60,0xf0,0x00,0x00,0x00,0x00]
; GFX11-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
; GFX11-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: load_2dmsaa_slc:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    image_msaa_load v[0:3], [v0, v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm th:TH_LOAD_HT ; encoding: [0x06,0x20,0x46,0xe4,0x00,0x00,0x20,0x00,0x00,0x01,0x02,0x00]
; GFX12-NEXT:    s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 2)
  ret <4 x float> %v
}

define amdgpu_ps <4 x float> @load_2dmsaa_glc_slc(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) {
; GFX11-LABEL: load_2dmsaa_glc_slc:
; GFX11:       ; %bb.0: ; %main_body
; GFX11-NEXT:    image_msaa_load v[0:3], v[0:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm glc slc ; encoding: [0x98,0x51,0x60,0xf0,0x00,0x00,0x00,0x00]
; GFX11-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
; GFX11-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: load_2dmsaa_glc_slc:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    image_msaa_load v[0:3], [v0, v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm th:TH_LOAD_LU ; encoding: [0x06,0x20,0x46,0xe4,0x00,0x00,0x30,0x00,0x00,0x01,0x02,0x00]
; GFX12-NEXT:    s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 3)
  ret <4 x float> %v
}

define amdgpu_ps <4 x half> @load_2dmsaa_d16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) {
; GFX11-LABEL: load_2dmsaa_d16:
; GFX11:       ; %bb.0: ; %main_body
; GFX11-NEXT:    image_msaa_load v[0:1], v[0:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm d16 ; encoding: [0x98,0x01,0x62,0xf0,0x00,0x00,0x00,0x00]
; GFX11-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
; GFX11-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: load_2dmsaa_d16:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    image_msaa_load v[0:1], [v0, v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm d16 ; encoding: [0x26,0x20,0x46,0xe4,0x00,0x00,0x00,0x00,0x00,0x01,0x02,0x00]
; GFX12-NEXT:    s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call <4 x half> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f16.i32(i32 1, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
  ret <4 x half> %v
}

define amdgpu_ps <4 x half> @load_2dmsaa_tfe_d16(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %fragid) {
; GFX11-LABEL: load_2dmsaa_tfe_d16:
; GFX11:       ; %bb.0: ; %main_body
; GFX11-NEXT:    image_msaa_load v[0:2], v[0:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm tfe d16 ; encoding: [0x98,0x01,0x62,0xf0,0x00,0x00,0x20,0x00]
; GFX11-NEXT:    v_mov_b32_e32 v3, 0 ; encoding: [0x80,0x02,0x06,0x7e]
; GFX11-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
; GFX11-NEXT:    global_store_b32 v3, v2, s[8:9] ; encoding: [0x00,0x00,0x6a,0xdc,0x03,0x02,0x08,0x00]
; GFX11-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: load_2dmsaa_tfe_d16:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    image_msaa_load v[0:2], [v0, v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm tfe d16 ; encoding: [0x2e,0x20,0x46,0xe4,0x00,0x00,0x00,0x00,0x00,0x01,0x02,0x00]
; GFX12-NEXT:    v_mov_b32_e32 v3, 0 ; encoding: [0x80,0x02,0x06,0x7e]
; GFX12-NEXT:    s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]
; GFX12-NEXT:    global_store_b32 v3, v2, s[8:9] ; encoding: [0x08,0x80,0x06,0xee,0x00,0x00,0x00,0x01,0x03,0x00,0x00,0x00]
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call {<4 x half>,i32} @llvm.amdgcn.image.msaa.load.2dmsaa.v4f16i32.i32(i32 1, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
  %v.vec = extractvalue {<4 x half>, i32} %v, 0
  %v.err = extractvalue {<4 x half>, i32} %v, 1
  store i32 %v.err, ptr addrspace(1) %out, align 4
  ret <4 x half> %v.vec
}

define amdgpu_ps <4 x half> @load_2darraymsaa_d16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
; GFX11-LABEL: load_2darraymsaa_d16:
; GFX11:       ; %bb.0: ; %main_body
; GFX11-NEXT:    image_msaa_load v[0:1], v[0:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm d16 ; encoding: [0x9c,0x01,0x62,0xf0,0x00,0x00,0x00,0x00]
; GFX11-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
; GFX11-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: load_2darraymsaa_d16:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    image_msaa_load v[0:1], [v0, v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm d16 ; encoding: [0x27,0x20,0x46,0xe4,0x00,0x00,0x00,0x00,0x00,0x01,0x02,0x03]
; GFX12-NEXT:    s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call <4 x half> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f16.i32(i32 1, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
  ret <4 x half> %v
}

define amdgpu_ps <4 x half> @load_2darraymsaa_tfe_d16(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
; GFX11-LABEL: load_2darraymsaa_tfe_d16:
; GFX11:       ; %bb.0: ; %main_body
; GFX11-NEXT:    image_msaa_load v[0:2], v[0:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe d16 ; encoding: [0x9c,0x01,0x62,0xf0,0x00,0x00,0x20,0x00]
; GFX11-NEXT:    v_mov_b32_e32 v3, 0 ; encoding: [0x80,0x02,0x06,0x7e]
; GFX11-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
; GFX11-NEXT:    global_store_b32 v3, v2, s[8:9] ; encoding: [0x00,0x00,0x6a,0xdc,0x03,0x02,0x08,0x00]
; GFX11-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: load_2darraymsaa_tfe_d16:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    image_msaa_load v[0:2], [v0, v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe d16 ; encoding: [0x2f,0x20,0x46,0xe4,0x00,0x00,0x00,0x00,0x00,0x01,0x02,0x03]
; GFX12-NEXT:    v_mov_b32_e32 v3, 0 ; encoding: [0x80,0x02,0x06,0x7e]
; GFX12-NEXT:    s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]
; GFX12-NEXT:    global_store_b32 v3, v2, s[8:9] ; encoding: [0x08,0x80,0x06,0xee,0x00,0x00,0x00,0x01,0x03,0x00,0x00,0x00]
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call {<4 x half>,i32} @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f16i32.i32(i32 1, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
  %v.vec = extractvalue {<4 x half>, i32} %v, 0
  %v.err = extractvalue {<4 x half>, i32} %v, 1
  store i32 %v.err, ptr addrspace(1) %out, align 4
  ret <4 x half> %v.vec
}

define amdgpu_ps <4 x float> @load_2dmsaa_a16(<8 x i32> inreg %rsrc, i16 %s, i16 %t, i16 %fragid) {
; GFX11-LABEL: load_2dmsaa_a16:
; GFX11:       ; %bb.0: ; %main_body
; GFX11-NEXT:    v_perm_b32 v1, v1, v0, 0x5040100 ; encoding: [0x01,0x00,0x44,0xd6,0x01,0x01,0xfe,0x03,0x00,0x01,0x04,0x05]
; GFX11-NEXT:    image_msaa_load v[0:3], v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16 ; encoding: [0x98,0x01,0x61,0xf0,0x01,0x00,0x00,0x00]
; GFX11-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
; GFX11-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: load_2dmsaa_a16:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100 ; encoding: [0x00,0x00,0x44,0xd6,0x01,0x01,0xfe,0x03,0x00,0x01,0x04,0x05]
; GFX12-NEXT:    image_msaa_load v[0:3], [v0, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16 ; encoding: [0x46,0x20,0x46,0xe4,0x00,0x00,0x00,0x00,0x00,0x02,0x00,0x00]
; GFX12-NEXT:    s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i16(i32 1, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
  ret <4 x float> %v
}

define amdgpu_ps <4 x float> @load_2darraymsaa_a16(<8 x i32> inreg %rsrc, i16 %s, i16 %t, i16 %slice, i16 %fragid) {
; GFX11-LABEL: load_2darraymsaa_a16:
; GFX11:       ; %bb.0: ; %main_body
; GFX11-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100 ; encoding: [0x02,0x00,0x44,0xd6,0x03,0x05,0xfe,0x03,0x00,0x01,0x04,0x05]
; GFX11-NEXT:    v_perm_b32 v1, v1, v0, 0x5040100 ; encoding: [0x01,0x00,0x44,0xd6,0x01,0x01,0xfe,0x03,0x00,0x01,0x04,0x05]
; GFX11-NEXT:    image_msaa_load v[0:3], v[1:2], s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 ; encoding: [0x9c,0x04,0x61,0xf0,0x01,0x00,0x00,0x00]
; GFX11-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
; GFX11-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: load_2darraymsaa_a16:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100 ; encoding: [0x02,0x00,0x44,0xd6,0x03,0x05,0xfe,0x03,0x00,0x01,0x04,0x05]
; GFX12-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100 ; encoding: [0x00,0x00,0x44,0xd6,0x01,0x01,0xfe,0x03,0x00,0x01,0x04,0x05]
; GFX12-NEXT:    image_msaa_load v[0:3], [v0, v2], s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 ; encoding: [0x47,0x20,0x06,0xe5,0x00,0x00,0x00,0x00,0x00,0x02,0x00,0x00]
; GFX12-NEXT:    s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call <4 x float> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32.i16(i32 4, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
  ret <4 x float> %v
}

declare <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
declare {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
declare {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1

declare <4 x half> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
declare {<4 x half>,i32} @llvm.amdgcn.image.msaa.load.2dmsaa.v4f16i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
declare <4 x half> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f16.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
declare {<4 x half>,i32} @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f16i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1

declare <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #1

attributes #0 = { nounwind }
attributes #1 = { nounwind readonly }
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GCN: {{.*}}
Embed LLVM 18.1.8 2025-02-14 19:21:04 +01:00			`; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4`
			`; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -show-mc-encoding < %s \| FileCheck -check-prefixes=GCN,GFX11 %s`
			`; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -show-mc-encoding < %s \| FileCheck -check-prefixes=GCN,GFX12 %s`

			`define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) {`
			`; GFX11-LABEL: load_2dmsaa:`
			`; GFX11: ; %bb.0: ; %main_body`
			`; GFX11-NEXT: image_msaa_load v[0:3], v[0:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm ; encoding: [0x98,0x01,0x60,0xf0,0x00,0x00,0x00,0x00]`
			`; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]`
			`; GFX11-NEXT: ; return to shader part epilog`
			`;`
			`; GFX12-LABEL: load_2dmsaa:`
			`; GFX12: ; %bb.0: ; %main_body`
			`; GFX12-NEXT: image_msaa_load v[0:3], [v0, v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm ; encoding: [0x06,0x20,0x46,0xe4,0x00,0x00,0x00,0x00,0x00,0x01,0x02,0x00]`
			`; GFX12-NEXT: s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]`
			`; GFX12-NEXT: ; return to shader part epilog`
			`main_body:`
			`%v = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)`
			`ret <4 x float> %v`
			`}`

			`define amdgpu_ps <4 x float> @load_2dmsaa_both(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %fragid) {`
			`; GFX11-LABEL: load_2dmsaa_both:`
			`; GFX11: ; %bb.0: ; %main_body`
			`; GFX11-NEXT: image_msaa_load v[0:4], v[0:2], s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_2D_MSAA unorm tfe lwe ; encoding: [0x98,0x02,0x60,0xf0,0x00,0x00,0x60,0x00]`
			`; GFX11-NEXT: v_mov_b32_e32 v5, 0 ; encoding: [0x80,0x02,0x0a,0x7e]`
			`; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]`
			`; GFX11-NEXT: global_store_b32 v5, v4, s[8:9] ; encoding: [0x00,0x00,0x6a,0xdc,0x05,0x04,0x08,0x00]`
			`; GFX11-NEXT: ; return to shader part epilog`
			`;`
			`; GFX12-LABEL: load_2dmsaa_both:`
			`; GFX12: ; %bb.0: ; %main_body`
			`; GFX12-NEXT: image_msaa_load v[0:4], [v0, v1, v2], s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_2D_MSAA unorm tfe lwe ; encoding: [0x0e,0x20,0x86,0xe4,0x00,0x01,0x00,0x00,0x00,0x01,0x02,0x00]`
			`; GFX12-NEXT: v_mov_b32_e32 v5, 0 ; encoding: [0x80,0x02,0x0a,0x7e]`
			`; GFX12-NEXT: s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]`
			`; GFX12-NEXT: global_store_b32 v5, v4, s[8:9] ; encoding: [0x08,0x80,0x06,0xee,0x00,0x00,0x00,0x02,0x05,0x00,0x00,0x00]`
			`; GFX12-NEXT: ; return to shader part epilog`
			`main_body:`
			`%v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32i32.i32(i32 2, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 3, i32 0)`
			`%v.vec = extractvalue {<4 x float>, i32} %v, 0`
			`%v.err = extractvalue {<4 x float>, i32} %v, 1`
			`store i32 %v.err, ptr addrspace(1) %out, align 4`
			`ret <4 x float> %v.vec`
			`}`

			`define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %fragid) {`
			`; GFX11-LABEL: load_2darraymsaa:`
			`; GFX11: ; %bb.0: ; %main_body`
			`; GFX11-NEXT: image_msaa_load v[0:3], v[0:3], s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm ; encoding: [0x9c,0x04,0x60,0xf0,0x00,0x00,0x00,0x00]`
			`; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]`
			`; GFX11-NEXT: ; return to shader part epilog`
			`;`
			`; GFX12-LABEL: load_2darraymsaa:`
			`; GFX12: ; %bb.0: ; %main_body`
			`; GFX12-NEXT: image_msaa_load v[0:3], [v0, v1, v2, v3], s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm ; encoding: [0x07,0x20,0x06,0xe5,0x00,0x00,0x00,0x00,0x00,0x01,0x02,0x03]`
			`; GFX12-NEXT: s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]`
			`; GFX12-NEXT: ; return to shader part epilog`
			`main_body:`
			`%v = call <4 x float> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32.i32(i32 4, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)`
			`ret <4 x float> %v`
			`}`

			`define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) {`
			`; GFX11-LABEL: load_2darraymsaa_tfe:`
			`; GFX11: ; %bb.0: ; %main_body`
			`; GFX11-NEXT: image_msaa_load v[0:4], v[0:3], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe ; encoding: [0x9c,0x08,0x60,0xf0,0x00,0x00,0x20,0x00]`
			`; GFX11-NEXT: v_mov_b32_e32 v5, 0 ; encoding: [0x80,0x02,0x0a,0x7e]`
			`; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]`
			`; GFX11-NEXT: global_store_b32 v5, v4, s[8:9] ; encoding: [0x00,0x00,0x6a,0xdc,0x05,0x04,0x08,0x00]`
			`; GFX11-NEXT: ; return to shader part epilog`
			`;`
			`; GFX12-LABEL: load_2darraymsaa_tfe:`
			`; GFX12: ; %bb.0: ; %main_body`
			`; GFX12-NEXT: image_msaa_load v[0:4], [v0, v1, v2, v3], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe ; encoding: [0x0f,0x20,0x06,0xe6,0x00,0x00,0x00,0x00,0x00,0x01,0x02,0x03]`
			`; GFX12-NEXT: v_mov_b32_e32 v5, 0 ; encoding: [0x80,0x02,0x0a,0x7e]`
			`; GFX12-NEXT: s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]`
			`; GFX12-NEXT: global_store_b32 v5, v4, s[8:9] ; encoding: [0x08,0x80,0x06,0xee,0x00,0x00,0x00,0x02,0x05,0x00,0x00,0x00]`
			`; GFX12-NEXT: ; return to shader part epilog`
			`main_body:`
			`%v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32i32.i32(i32 8, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0)`
			`%v.vec = extractvalue {<4 x float>, i32} %v, 0`
			`%v.err = extractvalue {<4 x float>, i32} %v, 1`
			`store i32 %v.err, ptr addrspace(1) %out, align 4`
			`ret <4 x float> %v.vec`
			`}`

			`define amdgpu_ps <4 x float> @load_2dmsaa_glc(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) {`
			`; GFX11-LABEL: load_2dmsaa_glc:`
			`; GFX11: ; %bb.0: ; %main_body`
			`; GFX11-NEXT: image_msaa_load v[0:3], v[0:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm glc ; encoding: [0x98,0x41,0x60,0xf0,0x00,0x00,0x00,0x00]`
			`; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]`
			`; GFX11-NEXT: ; return to shader part epilog`
			`;`
			`; GFX12-LABEL: load_2dmsaa_glc:`
			`; GFX12: ; %bb.0: ; %main_body`
			`; GFX12-NEXT: image_msaa_load v[0:3], [v0, v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm th:TH_LOAD_NT ; encoding: [0x06,0x20,0x46,0xe4,0x00,0x00,0x10,0x00,0x00,0x01,0x02,0x00]`
			`; GFX12-NEXT: s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]`
			`; GFX12-NEXT: ; return to shader part epilog`
			`main_body:`
			`%v = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 1)`
			`ret <4 x float> %v`
			`}`

			`define amdgpu_ps <4 x float> @load_2dmsaa_slc(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) {`
			`; GFX11-LABEL: load_2dmsaa_slc:`
			`; GFX11: ; %bb.0: ; %main_body`
			`; GFX11-NEXT: image_msaa_load v[0:3], v[0:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm slc ; encoding: [0x98,0x11,0x60,0xf0,0x00,0x00,0x00,0x00]`
			`; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]`
			`; GFX11-NEXT: ; return to shader part epilog`
			`;`
			`; GFX12-LABEL: load_2dmsaa_slc:`
			`; GFX12: ; %bb.0: ; %main_body`
			`; GFX12-NEXT: image_msaa_load v[0:3], [v0, v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm th:TH_LOAD_HT ; encoding: [0x06,0x20,0x46,0xe4,0x00,0x00,0x20,0x00,0x00,0x01,0x02,0x00]`
			`; GFX12-NEXT: s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]`
			`; GFX12-NEXT: ; return to shader part epilog`
			`main_body:`
			`%v = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 2)`
			`ret <4 x float> %v`
			`}`

			`define amdgpu_ps <4 x float> @load_2dmsaa_glc_slc(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) {`
			`; GFX11-LABEL: load_2dmsaa_glc_slc:`
			`; GFX11: ; %bb.0: ; %main_body`
			`; GFX11-NEXT: image_msaa_load v[0:3], v[0:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm glc slc ; encoding: [0x98,0x51,0x60,0xf0,0x00,0x00,0x00,0x00]`
			`; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]`
			`; GFX11-NEXT: ; return to shader part epilog`
			`;`
			`; GFX12-LABEL: load_2dmsaa_glc_slc:`
			`; GFX12: ; %bb.0: ; %main_body`
			`; GFX12-NEXT: image_msaa_load v[0:3], [v0, v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm th:TH_LOAD_LU ; encoding: [0x06,0x20,0x46,0xe4,0x00,0x00,0x30,0x00,0x00,0x01,0x02,0x00]`
			`; GFX12-NEXT: s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]`
			`; GFX12-NEXT: ; return to shader part epilog`
			`main_body:`
			`%v = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 3)`
			`ret <4 x float> %v`
			`}`

			`define amdgpu_ps <4 x half> @load_2dmsaa_d16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) {`
			`; GFX11-LABEL: load_2dmsaa_d16:`
			`; GFX11: ; %bb.0: ; %main_body`
			`; GFX11-NEXT: image_msaa_load v[0:1], v[0:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm d16 ; encoding: [0x98,0x01,0x62,0xf0,0x00,0x00,0x00,0x00]`
			`; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]`
			`; GFX11-NEXT: ; return to shader part epilog`
			`;`
			`; GFX12-LABEL: load_2dmsaa_d16:`
			`; GFX12: ; %bb.0: ; %main_body`
			`; GFX12-NEXT: image_msaa_load v[0:1], [v0, v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm d16 ; encoding: [0x26,0x20,0x46,0xe4,0x00,0x00,0x00,0x00,0x00,0x01,0x02,0x00]`
			`; GFX12-NEXT: s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]`
			`; GFX12-NEXT: ; return to shader part epilog`
			`main_body:`
			`%v = call <4 x half> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f16.i32(i32 1, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)`
			`ret <4 x half> %v`
			`}`

			`define amdgpu_ps <4 x half> @load_2dmsaa_tfe_d16(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %fragid) {`
			`; GFX11-LABEL: load_2dmsaa_tfe_d16:`
			`; GFX11: ; %bb.0: ; %main_body`
			`; GFX11-NEXT: image_msaa_load v[0:2], v[0:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm tfe d16 ; encoding: [0x98,0x01,0x62,0xf0,0x00,0x00,0x20,0x00]`
			`; GFX11-NEXT: v_mov_b32_e32 v3, 0 ; encoding: [0x80,0x02,0x06,0x7e]`
			`; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]`
			`; GFX11-NEXT: global_store_b32 v3, v2, s[8:9] ; encoding: [0x00,0x00,0x6a,0xdc,0x03,0x02,0x08,0x00]`
			`; GFX11-NEXT: ; return to shader part epilog`
			`;`
			`; GFX12-LABEL: load_2dmsaa_tfe_d16:`
			`; GFX12: ; %bb.0: ; %main_body`
			`; GFX12-NEXT: image_msaa_load v[0:2], [v0, v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm tfe d16 ; encoding: [0x2e,0x20,0x46,0xe4,0x00,0x00,0x00,0x00,0x00,0x01,0x02,0x00]`
			`; GFX12-NEXT: v_mov_b32_e32 v3, 0 ; encoding: [0x80,0x02,0x06,0x7e]`
			`; GFX12-NEXT: s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]`
			`; GFX12-NEXT: global_store_b32 v3, v2, s[8:9] ; encoding: [0x08,0x80,0x06,0xee,0x00,0x00,0x00,0x01,0x03,0x00,0x00,0x00]`
			`; GFX12-NEXT: ; return to shader part epilog`
			`main_body:`
			`%v = call {<4 x half>,i32} @llvm.amdgcn.image.msaa.load.2dmsaa.v4f16i32.i32(i32 1, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0)`
			`%v.vec = extractvalue {<4 x half>, i32} %v, 0`
			`%v.err = extractvalue {<4 x half>, i32} %v, 1`
			`store i32 %v.err, ptr addrspace(1) %out, align 4`
			`ret <4 x half> %v.vec`
			`}`

			`define amdgpu_ps <4 x half> @load_2darraymsaa_d16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %fragid) {`
			`; GFX11-LABEL: load_2darraymsaa_d16:`
			`; GFX11: ; %bb.0: ; %main_body`
			`; GFX11-NEXT: image_msaa_load v[0:1], v[0:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm d16 ; encoding: [0x9c,0x01,0x62,0xf0,0x00,0x00,0x00,0x00]`
			`; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]`
			`; GFX11-NEXT: ; return to shader part epilog`
			`;`
			`; GFX12-LABEL: load_2darraymsaa_d16:`
			`; GFX12: ; %bb.0: ; %main_body`
			`; GFX12-NEXT: image_msaa_load v[0:1], [v0, v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm d16 ; encoding: [0x27,0x20,0x46,0xe4,0x00,0x00,0x00,0x00,0x00,0x01,0x02,0x03]`
			`; GFX12-NEXT: s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]`
			`; GFX12-NEXT: ; return to shader part epilog`
			`main_body:`
			`%v = call <4 x half> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f16.i32(i32 1, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)`
			`ret <4 x half> %v`
			`}`

			`define amdgpu_ps <4 x half> @load_2darraymsaa_tfe_d16(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) {`
			`; GFX11-LABEL: load_2darraymsaa_tfe_d16:`
			`; GFX11: ; %bb.0: ; %main_body`
			`; GFX11-NEXT: image_msaa_load v[0:2], v[0:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe d16 ; encoding: [0x9c,0x01,0x62,0xf0,0x00,0x00,0x20,0x00]`
			`; GFX11-NEXT: v_mov_b32_e32 v3, 0 ; encoding: [0x80,0x02,0x06,0x7e]`
			`; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]`
			`; GFX11-NEXT: global_store_b32 v3, v2, s[8:9] ; encoding: [0x00,0x00,0x6a,0xdc,0x03,0x02,0x08,0x00]`
			`; GFX11-NEXT: ; return to shader part epilog`
			`;`
			`; GFX12-LABEL: load_2darraymsaa_tfe_d16:`
			`; GFX12: ; %bb.0: ; %main_body`
			`; GFX12-NEXT: image_msaa_load v[0:2], [v0, v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe d16 ; encoding: [0x2f,0x20,0x46,0xe4,0x00,0x00,0x00,0x00,0x00,0x01,0x02,0x03]`
			`; GFX12-NEXT: v_mov_b32_e32 v3, 0 ; encoding: [0x80,0x02,0x06,0x7e]`
			`; GFX12-NEXT: s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]`
			`; GFX12-NEXT: global_store_b32 v3, v2, s[8:9] ; encoding: [0x08,0x80,0x06,0xee,0x00,0x00,0x00,0x01,0x03,0x00,0x00,0x00]`
			`; GFX12-NEXT: ; return to shader part epilog`
			`main_body:`
			`%v = call {<4 x half>,i32} @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f16i32.i32(i32 1, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0)`
			`%v.vec = extractvalue {<4 x half>, i32} %v, 0`
			`%v.err = extractvalue {<4 x half>, i32} %v, 1`
			`store i32 %v.err, ptr addrspace(1) %out, align 4`
			`ret <4 x half> %v.vec`
			`}`

			`define amdgpu_ps <4 x float> @load_2dmsaa_a16(<8 x i32> inreg %rsrc, i16 %s, i16 %t, i16 %fragid) {`
			`; GFX11-LABEL: load_2dmsaa_a16:`
			`; GFX11: ; %bb.0: ; %main_body`
			`; GFX11-NEXT: v_perm_b32 v1, v1, v0, 0x5040100 ; encoding: [0x01,0x00,0x44,0xd6,0x01,0x01,0xfe,0x03,0x00,0x01,0x04,0x05]`
			`; GFX11-NEXT: image_msaa_load v[0:3], v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16 ; encoding: [0x98,0x01,0x61,0xf0,0x01,0x00,0x00,0x00]`
			`; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]`
			`; GFX11-NEXT: ; return to shader part epilog`
			`;`
			`; GFX12-LABEL: load_2dmsaa_a16:`
			`; GFX12: ; %bb.0: ; %main_body`
			`; GFX12-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 ; encoding: [0x00,0x00,0x44,0xd6,0x01,0x01,0xfe,0x03,0x00,0x01,0x04,0x05]`
			`; GFX12-NEXT: image_msaa_load v[0:3], [v0, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16 ; encoding: [0x46,0x20,0x46,0xe4,0x00,0x00,0x00,0x00,0x00,0x02,0x00,0x00]`
			`; GFX12-NEXT: s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]`
			`; GFX12-NEXT: ; return to shader part epilog`
			`main_body:`
			`%v = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i16(i32 1, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)`
			`ret <4 x float> %v`
			`}`

			`define amdgpu_ps <4 x float> @load_2darraymsaa_a16(<8 x i32> inreg %rsrc, i16 %s, i16 %t, i16 %slice, i16 %fragid) {`
			`; GFX11-LABEL: load_2darraymsaa_a16:`
			`; GFX11: ; %bb.0: ; %main_body`
			`; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 ; encoding: [0x02,0x00,0x44,0xd6,0x03,0x05,0xfe,0x03,0x00,0x01,0x04,0x05]`
			`; GFX11-NEXT: v_perm_b32 v1, v1, v0, 0x5040100 ; encoding: [0x01,0x00,0x44,0xd6,0x01,0x01,0xfe,0x03,0x00,0x01,0x04,0x05]`
			`; GFX11-NEXT: image_msaa_load v[0:3], v[1:2], s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 ; encoding: [0x9c,0x04,0x61,0xf0,0x01,0x00,0x00,0x00]`
			`; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]`
			`; GFX11-NEXT: ; return to shader part epilog`
			`;`
			`; GFX12-LABEL: load_2darraymsaa_a16:`
			`; GFX12: ; %bb.0: ; %main_body`
			`; GFX12-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 ; encoding: [0x02,0x00,0x44,0xd6,0x03,0x05,0xfe,0x03,0x00,0x01,0x04,0x05]`
			`; GFX12-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 ; encoding: [0x00,0x00,0x44,0xd6,0x01,0x01,0xfe,0x03,0x00,0x01,0x04,0x05]`
			`; GFX12-NEXT: image_msaa_load v[0:3], [v0, v2], s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 ; encoding: [0x47,0x20,0x06,0xe5,0x00,0x00,0x00,0x00,0x00,0x02,0x00,0x00]`
			`; GFX12-NEXT: s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]`
			`; GFX12-NEXT: ; return to shader part epilog`
			`main_body:`
			`%v = call <4 x float> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32.i16(i32 4, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)`
			`ret <4 x float> %v`
			`}`

			`declare <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1`
			`declare {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1`
			`declare <4 x float> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1`
			`declare {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1`

			`declare <4 x half> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1`
			`declare {<4 x half>,i32} @llvm.amdgcn.image.msaa.load.2dmsaa.v4f16i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1`
			`declare <4 x half> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f16.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1`
			`declare {<4 x half>,i32} @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f16i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1`

			`declare <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1`
			`declare <4 x float> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #1`

			`attributes #0 = { nounwind }`
			`attributes #1 = { nounwind readonly }`
			`;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:`
			`; GCN: {{.*}}`