diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 3d2a74adab96..8ecbd62903a2 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -966,6 +966,15 @@ defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_s defm SCRATCH_LOAD_LDS_DWORD : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_dword">; let SubtargetPredicate = isGFX12Plus in { + let Uses = [EXEC, M0] in { + defm GLOBAL_LOAD_BLOCK : FLAT_Global_Load_Pseudo <"global_load_block", VReg_1024>; + defm GLOBAL_STORE_BLOCK : FLAT_Global_Store_Pseudo <"global_store_block", VReg_1024>; + } + let Uses = [EXEC, FLAT_SCR, M0] in { + defm SCRATCH_LOAD_BLOCK : FLAT_Scratch_Load_Pseudo <"scratch_load_block", VReg_1024>; + defm SCRATCH_STORE_BLOCK : FLAT_Scratch_Store_Pseudo <"scratch_store_block", VReg_1024>; + } + let WaveSizePredicate = isWave32 in { let Mnemonic = "global_load_tr_b128" in defm GLOBAL_LOAD_TR_B128_w32 : FLAT_Global_Load_Pseudo <"global_load_tr_b128_w32", VReg_128>; @@ -2658,6 +2667,8 @@ defm GLOBAL_STORE_BYTE_D16_HI : VGLOBAL_Real_AllAddr_gfx12<0x024, "global_s defm GLOBAL_STORE_SHORT_D16_HI : VGLOBAL_Real_AllAddr_gfx12<0x025, "global_store_d16_hi_b16">; defm GLOBAL_LOAD_DWORD_ADDTID : VGLOBAL_Real_AllAddr_gfx12<0x028, "global_load_addtid_b32">; defm GLOBAL_STORE_DWORD_ADDTID : VGLOBAL_Real_AllAddr_gfx12<0x029, "global_store_addtid_b32">; +defm GLOBAL_LOAD_BLOCK : VGLOBAL_Real_AllAddr_gfx12<0x053>; +defm GLOBAL_STORE_BLOCK : VGLOBAL_Real_AllAddr_gfx12<0x054>; defm GLOBAL_ATOMIC_SWAP : VGLOBAL_Real_Atomics_gfx12<0x033, "global_atomic_swap_b32">; defm GLOBAL_ATOMIC_CMPSWAP : VGLOBAL_Real_Atomics_gfx12<0x034, "global_atomic_cmpswap_b32">; @@ -2728,3 +2739,6 @@ defm SCRATCH_LOAD_SBYTE_D16_HI : VSCRATCH_Real_AllAddr_gfx12<0x22, "scratch_ defm SCRATCH_LOAD_SHORT_D16_HI : VSCRATCH_Real_AllAddr_gfx12<0x23, "scratch_load_d16_hi_b16">; defm SCRATCH_STORE_BYTE_D16_HI : VSCRATCH_Real_AllAddr_gfx12<0x24, "scratch_store_d16_hi_b8">; defm SCRATCH_STORE_SHORT_D16_HI : VSCRATCH_Real_AllAddr_gfx12<0x25, "scratch_store_d16_hi_b16">; + +defm SCRATCH_LOAD_BLOCK : VSCRATCH_Real_AllAddr_gfx12<0x53>; +defm SCRATCH_STORE_BLOCK : VSCRATCH_Real_AllAddr_gfx12<0x54>; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 80c623514bda..c64b3a7c356f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2230,7 +2230,8 @@ class getLdStRegisterOperand { !eq(RC.Size, 64) : AVLdSt_64, !eq(RC.Size, 96) : AVLdSt_96, !eq(RC.Size, 128) : AVLdSt_128, - !eq(RC.Size, 160) : AVLdSt_160); + !eq(RC.Size, 160) : AVLdSt_160, + !eq(RC.Size, 1024) : AVLdSt_1024); } class getHasVOP3DPP ; def AVLdSt_96 : AVLdStOperand; def AVLdSt_128 : AVLdStOperand; def AVLdSt_160 : AVLdStOperand; +def AVLdSt_1024 : AVLdStOperand; //===----------------------------------------------------------------------===// // ACSrc_* Operands with an AGPR or an inline constant diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vflat.s b/llvm/test/MC/AMDGPU/gfx12_asm_vflat.s index 575bc1a8255a..30bfaff8f17a 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vflat.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vflat.s @@ -1896,6 +1896,27 @@ global_load_u8 v1, v[0:1], off offset:64 global_load_u8 v1, v[3:4], off // GFX12: encoding: [0x7c,0x00,0x04,0xee,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00] +global_load_block v[9:40], v0, s[0:1] offset:-64 +// GFX12: encoding: [0x00,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] + +global_load_block v[9:40], v0, s[0:1] offset:64 +// GFX12: encoding: [0x00,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0x40,0x00,0x00] + +global_load_block v[9:40], v5, s[2:3] +// GFX12: encoding: [0x02,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x05,0x00,0x00,0x00] + +global_load_block v[9:40], v[0:1], off offset:-64 +// GFX12: encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] + +global_load_block v[9:40], v[0:1], off offset:64 +// GFX12: encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0x40,0x00,0x00] + +global_load_block v[9:40], v[5:6], off +// GFX12: encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x05,0x00,0x00,0x00] + +global_load_block v[9:40], v[5:6], off th:TH_LOAD_HT scope:SCOPE_SE +// GFX12: encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x24,0x00,0x05,0x00,0x00,0x00] + global_store_addtid_b32 v2, off offset:-64 // GFX12: encoding: [0x7c,0x40,0x0a,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] @@ -2058,6 +2079,27 @@ global_store_d16_hi_b8 v[0:1], v2, off offset:64 global_store_d16_hi_b8 v[3:4], v1, off // GFX12: encoding: [0x7c,0x00,0x09,0xee,0x00,0x00,0x80,0x00,0x03,0x00,0x00,0x00] +global_store_block v0, v[2:33], s[0:1] offset:-64 +// GFX12: encoding: [0x00,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] + +global_store_block v0, v[2:33], s[0:1] offset:64 +// GFX12: encoding: [0x00,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] + +global_store_block v1, v[3:34], s[2:3] +// GFX12: encoding: [0x02,0x00,0x15,0xee,0x00,0x00,0x80,0x01,0x01,0x00,0x00,0x00] + +global_store_block v[0:1], v[2:33], off offset:-64 +// GFX12: encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] + +global_store_block v[0:1], v[2:33], off offset:64 +// GFX12: encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] + +global_store_block v[1:2], v[3:34], off +// GFX12: encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0x80,0x01,0x01,0x00,0x00,0x00] + +global_store_block v[1:2], v[3:34], off th:TH_STORE_HT scope:SCOPE_SE +// GFX12: encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0xa4,0x01,0x01,0x00,0x00,0x00] + global_inv // GFX12: encoding: [0x7c,0xc0,0x0a,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00] @@ -2490,6 +2532,36 @@ scratch_load_u8 v1, v0, s0 offset:64 scratch_load_u8 v1, v2, s1 // GFX12: encoding: [0x01,0x00,0x04,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x00,0x00] +scratch_load_block v[3:34], off, off offset:-64 +// GFX12: encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] + +scratch_load_block v[3:34], off, off offset:64 +// GFX12: encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0x40,0x00,0x00] + +scratch_load_block v[3:34], off, s0 offset:-64 +// GFX12: encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] + +scratch_load_block v[3:34], off, s0 offset:64 +// GFX12: encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0x40,0x00,0x00] + +scratch_load_block v[3:34], v0, off offset:-64 +// GFX12: encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0xc0,0xff,0xff] + +scratch_load_block v[3:34], v0, off offset:64 +// GFX12: encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0x40,0x00,0x00] + +scratch_load_block v[3:34], v0, s0 offset:-64 +// GFX12: encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0xc0,0xff,0xff] + +scratch_load_block v[3:34], v0, s0 offset:64 +// GFX12: encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0x40,0x00,0x00] + +scratch_load_block v[3:34], v2, s1 +// GFX12: encoding: [0x01,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x02,0x00,0x00,0x00] + +scratch_load_block v[3:34], v2, s1 th:TH_LOAD_HT scope:SCOPE_SE +// GFX12: encoding: [0x01,0xc0,0x14,0xed,0x03,0x00,0x26,0x00,0x02,0x00,0x00,0x00] + scratch_store_b128 off, v[2:5], off offset:-64 // GFX12: encoding: [0x7c,0x40,0x07,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] @@ -2732,3 +2804,33 @@ scratch_store_d16_hi_b8 v0, v2, s0 offset:64 scratch_store_d16_hi_b8 v1, v2, s3 // GFX12: encoding: [0x03,0x00,0x09,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00] + +scratch_store_block off, v[2:33], off offset:-64 +// GFX12: encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] + +scratch_store_block off, v[2:33], off offset:64 +// GFX12: encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] + +scratch_store_block off, v[2:33], s0 offset:-64 +// GFX12: encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] + +scratch_store_block off, v[2:33], s0 offset:64 +// GFX12: encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] + +scratch_store_block v0, v[2:33], off offset:-64 +// GFX12: encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff] + +scratch_store_block v0, v[2:33], off offset:64 +// GFX12: encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00] + +scratch_store_block v0, v[2:33], s0 offset:-64 +// GFX12: encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff] + +scratch_store_block v0, v[2:33], s0 offset:64 +// GFX12: encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00] + +scratch_store_block v1, v[2:33], s3 +// GFX12: encoding: [0x03,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00] + +scratch_store_block v1, v[2:33], s3 th:TH_STORE_HT scope:SCOPE_SE +// GFX12: encoding: [0x03,0x00,0x15,0xed,0x00,0x00,0x26,0x01,0x01,0x00,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vflat.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vflat.txt index e0b658b1fda3..7953e0eb67c1 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vflat.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vflat.txt @@ -1155,6 +1155,27 @@ # GFX12: global_load_u8 v1, v[3:4], off ; encoding: [0x7c,0x00,0x04,0xee,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00] 0x7c,0x00,0x04,0xee,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00 +# GFX12: global_load_block v[9:40], v0, s[0:1] offset:-64 ; encoding: [0x00,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x00,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX12: global_load_block v[9:40], v0, s[0:1] offset:64 ; encoding: [0x00,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x00,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX12: global_load_block v[9:40], v5, s[2:3] ; encoding: [0x02,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x05,0x00,0x00,0x00] +0x02,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x05,0x00,0x00,0x00 + +# GFX12: global_load_block v[9:40], v[0:1], off offset:-64 ; encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX12: global_load_block v[9:40], v[0:1], off offset:64 ; encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX12: global_load_block v[9:40], v[5:6], off ; encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x05,0x00,0x00,0x00] +0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x05,0x00,0x00,0x00 + +# GFX12: global_load_block v[9:40], v[5:6], off th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x24,0x00,0x05,0x00,0x00,0x00] +0x7c,0xc0,0x14,0xee,0x09,0x00,0x24,0x00,0x05,0x00,0x00,0x00 + # GFX12: global_store_addtid_b32 v2, off offset:64 ; encoding: [0x7c,0x40,0x0a,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] 0x7c,0x40,0x0a,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 @@ -1257,6 +1278,27 @@ # GFX12: global_store_d16_hi_b8 v[3:4], v1, off ; encoding: [0x7c,0x00,0x09,0xee,0x00,0x00,0x80,0x00,0x03,0x00,0x00,0x00] 0x7c,0x00,0x09,0xee,0x00,0x00,0x80,0x00,0x03,0x00,0x00,0x00 +# GFX12: global_store_block v0, v[2:33], s[0:1] offset:-64 ; encoding: [0x00,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX12: global_store_block v0, v[2:33], s[0:1] offset:64 ; encoding: [0x00,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX12: global_store_block v1, v[3:34], s[2:3] ; encoding: [0x02,0x00,0x15,0xee,0x00,0x00,0x80,0x01,0x01,0x00,0x00,0x00] +0x02,0x00,0x15,0xee,0x00,0x00,0x80,0x01,0x01,0x00,0x00,0x00 + +# GFX12: global_store_block v[0:1], v[2:33], off offset:-64 ; encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX12: global_store_block v[0:1], v[2:33], off offset:64 ; encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX12: global_store_block v[1:2], v[3:34], off ; encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0x80,0x01,0x01,0x00,0x00,0x00] +0x7c,0x00,0x15,0xee,0x00,0x00,0x80,0x01,0x01,0x00,0x00,0x00 + +# GFX12: global_store_block v[1:2], v[3:34], off th:TH_STORE_HT scope:SCOPE_SE ; encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0xa4,0x01,0x01,0x00,0x00,0x00] +0x7c,0x00,0x15,0xee,0x00,0x00,0xa4,0x01,0x01,0x00,0x00,0x00 + # GFX12: global_inv ; encoding: [0x7c,0xc0,0x0a,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00] 0x7c,0xc0,0x0a,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 @@ -1518,6 +1560,36 @@ # GFX12: scratch_load_u8 v1, v2, s1 ; encoding: [0x01,0x00,0x04,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x00,0x00] 0x01,0x00,0x04,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x00,0x00 +# GFX12: scratch_load_block v[3:34], off, off offset:-64 ; encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX12: scratch_load_block v[3:34], off, off offset:64 ; encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX12: scratch_load_block v[3:34], off, s0 offset:-64 ; encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x00,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX12: scratch_load_block v[3:34], off, s0 offset:64 ; encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x00,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX12: scratch_load_block v[3:34], v0, off offset:-64 ; encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0xc0,0xff,0xff + +# GFX12: scratch_load_block v[3:34], v0, off offset:64 ; encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0x40,0x00,0x00 + +# GFX12: scratch_load_block v[3:34], v0, s0 offset:-64 ; encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0xc0,0xff,0xff] +0x00,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0xc0,0xff,0xff + +# GFX12: scratch_load_block v[3:34], v0, s0 offset:64 ; encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0x40,0x00,0x00] +0x00,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0x40,0x00,0x00 + +# GFX12: scratch_load_block v[3:34], v2, s1 ; encoding: [0x01,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x02,0x00,0x00,0x00] +0x01,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x02,0x00,0x00,0x00 + +# GFX12: scratch_load_block v[3:34], v2, s1 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x01,0xc0,0x14,0xed,0x03,0x00,0x26,0x00,0x02,0x00,0x00,0x00] +0x01,0xc0,0x14,0xed,0x03,0x00,0x26,0x00,0x02,0x00,0x00,0x00 + # GFX12: scratch_store_b128 off, v[2:5], off offset:64 ; encoding: [0x7c,0x40,0x07,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] 0x7c,0x40,0x07,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 @@ -1658,3 +1730,33 @@ # GFX12: scratch_store_d16_hi_b8 v1, v2, s3 ; encoding: [0x03,0x00,0x09,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00] 0x03,0x00,0x09,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00 + +# GFX12: scratch_store_block off, v[2:33], off offset:-64 ; encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX12: scratch_store_block off, v[2:33], off offset:64 ; encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX12: scratch_store_block off, v[2:33], s0 offset:-64 ; encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX12: scratch_store_block off, v[2:33], s0 offset:64 ; encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX12: scratch_store_block v0, v[2:33], off offset:-64 ; encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff + +# GFX12: scratch_store_block v0, v[2:33], off offset:64 ; encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00 + +# GFX12: scratch_store_block v0, v[2:33], s0 offset:-64 ; encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff] +0x00,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff + +# GFX12: scratch_store_block v0, v[2:33], s0 offset:64 ; encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00] +0x00,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00 + +# GFX12: scratch_store_block v1, v[2:33], s3 ; encoding: [0x03,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00] +0x03,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00 + +# GFX12: scratch_store_block v1, v[2:33], s3 th:TH_STORE_HT scope:SCOPE_SE ; encoding: [0x03,0x00,0x15,0xed,0x00,0x00,0x26,0x01,0x01,0x00,0x00,0x00] +0x03,0x00,0x15,0xed,0x00,0x00,0x26,0x01,0x01,0x00,0x00,0x00