From 39b2e35f3da7bfe3acc67f637edfdfd383f9bb03 Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Tue, 1 Oct 2024 10:49:50 +0100 Subject: [PATCH] [RISCV][test] Precommit tests showing codegen for unaligned load/store with zbkb We have missed opportunities for selecting pack* instructions, that will be addressed in future patches. --- .../CodeGen/RISCV/unaligned-load-store.ll | 168 +++++++++++++++--- 1 file changed, 143 insertions(+), 25 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll index 10497db6edc4..9af18428adf1 100644 --- a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll +++ b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefixes=ALL,SLOW,RV32I %s +; RUN: | FileCheck -check-prefixes=ALL,SLOW,SLOWBASE,RV32I %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefixes=ALL,SLOW,RV64I %s +; RUN: | FileCheck -check-prefixes=ALL,SLOW,SLOWBASE,RV64I %s +; RUN: llc -mtriple=riscv32 -mattr=+zbkb -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=ALL,SLOW,SLOWZBKB,RV32IZBKB %s +; RUN: llc -mtriple=riscv64 -mattr=+zbkb -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=ALL,SLOW,SLOWZBKB,RV64IZBKB %s ; RUN: llc -mtriple=riscv32 -mattr=+unaligned-scalar-mem -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=ALL,FAST,RV32I-FAST %s ; RUN: llc -mtriple=riscv64 -mattr=+unaligned-scalar-mem -verify-machineinstrs < %s \ @@ -37,16 +41,35 @@ define i16 @load_i16(ptr %p) { } define i24 @load_i24(ptr %p) { -; SLOW-LABEL: load_i24: -; SLOW: # %bb.0: -; SLOW-NEXT: lbu a1, 1(a0) -; SLOW-NEXT: lbu a2, 0(a0) -; SLOW-NEXT: lbu a0, 2(a0) -; SLOW-NEXT: slli a1, a1, 8 -; SLOW-NEXT: or a1, a1, a2 -; SLOW-NEXT: slli a0, a0, 16 -; SLOW-NEXT: or a0, a1, a0 -; SLOW-NEXT: ret +; SLOWBASE-LABEL: load_i24: +; SLOWBASE: # %bb.0: +; SLOWBASE-NEXT: lbu a1, 1(a0) +; SLOWBASE-NEXT: lbu a2, 0(a0) +; SLOWBASE-NEXT: lbu a0, 2(a0) +; SLOWBASE-NEXT: slli a1, a1, 8 +; SLOWBASE-NEXT: or a1, a1, a2 +; SLOWBASE-NEXT: slli a0, a0, 16 +; SLOWBASE-NEXT: or a0, a1, a0 +; SLOWBASE-NEXT: ret +; +; RV32IZBKB-LABEL: load_i24: +; RV32IZBKB: # %bb.0: +; RV32IZBKB-NEXT: lbu a1, 1(a0) +; RV32IZBKB-NEXT: lbu a2, 0(a0) +; RV32IZBKB-NEXT: lbu a0, 2(a0) +; RV32IZBKB-NEXT: packh a1, a2, a1 +; RV32IZBKB-NEXT: pack a0, a1, a0 +; RV32IZBKB-NEXT: ret +; +; RV64IZBKB-LABEL: load_i24: +; RV64IZBKB: # %bb.0: +; RV64IZBKB-NEXT: lbu a1, 1(a0) +; RV64IZBKB-NEXT: lbu a2, 0(a0) +; RV64IZBKB-NEXT: lbu a0, 2(a0) +; RV64IZBKB-NEXT: packh a1, a2, a1 +; RV64IZBKB-NEXT: slli a0, a0, 16 +; RV64IZBKB-NEXT: or a0, a1, a0 +; RV64IZBKB-NEXT: ret ; ; FAST-LABEL: load_i24: ; FAST: # %bb.0: @@ -60,19 +83,32 @@ define i24 @load_i24(ptr %p) { } define i32 @load_i32(ptr %p) { -; SLOW-LABEL: load_i32: -; SLOW: # %bb.0: -; SLOW-NEXT: lbu a1, 1(a0) -; SLOW-NEXT: lbu a2, 0(a0) -; SLOW-NEXT: lbu a3, 2(a0) -; SLOW-NEXT: lbu a0, 3(a0) -; SLOW-NEXT: slli a1, a1, 8 -; SLOW-NEXT: or a1, a1, a2 -; SLOW-NEXT: slli a3, a3, 16 -; SLOW-NEXT: slli a0, a0, 24 -; SLOW-NEXT: or a0, a0, a3 -; SLOW-NEXT: or a0, a0, a1 -; SLOW-NEXT: ret +; SLOWBASE-LABEL: load_i32: +; SLOWBASE: # %bb.0: +; SLOWBASE-NEXT: lbu a1, 1(a0) +; SLOWBASE-NEXT: lbu a2, 0(a0) +; SLOWBASE-NEXT: lbu a3, 2(a0) +; SLOWBASE-NEXT: lbu a0, 3(a0) +; SLOWBASE-NEXT: slli a1, a1, 8 +; SLOWBASE-NEXT: or a1, a1, a2 +; SLOWBASE-NEXT: slli a3, a3, 16 +; SLOWBASE-NEXT: slli a0, a0, 24 +; SLOWBASE-NEXT: or a0, a0, a3 +; SLOWBASE-NEXT: or a0, a0, a1 +; SLOWBASE-NEXT: ret +; +; SLOWZBKB-LABEL: load_i32: +; SLOWZBKB: # %bb.0: +; SLOWZBKB-NEXT: lbu a1, 1(a0) +; SLOWZBKB-NEXT: lbu a2, 0(a0) +; SLOWZBKB-NEXT: lbu a3, 2(a0) +; SLOWZBKB-NEXT: lbu a0, 3(a0) +; SLOWZBKB-NEXT: packh a1, a2, a1 +; SLOWZBKB-NEXT: slli a3, a3, 16 +; SLOWZBKB-NEXT: slli a0, a0, 24 +; SLOWZBKB-NEXT: or a0, a0, a3 +; SLOWZBKB-NEXT: or a0, a0, a1 +; SLOWZBKB-NEXT: ret ; ; FAST-LABEL: load_i32: ; FAST: # %bb.0: @@ -134,6 +170,51 @@ define i64 @load_i64(ptr %p) { ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; +; RV32IZBKB-LABEL: load_i64: +; RV32IZBKB: # %bb.0: +; RV32IZBKB-NEXT: lbu a1, 1(a0) +; RV32IZBKB-NEXT: lbu a2, 0(a0) +; RV32IZBKB-NEXT: lbu a3, 2(a0) +; RV32IZBKB-NEXT: lbu a4, 3(a0) +; RV32IZBKB-NEXT: packh a1, a2, a1 +; RV32IZBKB-NEXT: slli a3, a3, 16 +; RV32IZBKB-NEXT: slli a4, a4, 24 +; RV32IZBKB-NEXT: or a3, a4, a3 +; RV32IZBKB-NEXT: lbu a2, 5(a0) +; RV32IZBKB-NEXT: lbu a4, 4(a0) +; RV32IZBKB-NEXT: lbu a5, 6(a0) +; RV32IZBKB-NEXT: lbu a6, 7(a0) +; RV32IZBKB-NEXT: or a0, a3, a1 +; RV32IZBKB-NEXT: packh a1, a4, a2 +; RV32IZBKB-NEXT: slli a5, a5, 16 +; RV32IZBKB-NEXT: slli a6, a6, 24 +; RV32IZBKB-NEXT: or a2, a6, a5 +; RV32IZBKB-NEXT: or a1, a2, a1 +; RV32IZBKB-NEXT: ret +; +; RV64IZBKB-LABEL: load_i64: +; RV64IZBKB: # %bb.0: +; RV64IZBKB-NEXT: lbu a1, 5(a0) +; RV64IZBKB-NEXT: lbu a2, 4(a0) +; RV64IZBKB-NEXT: lbu a3, 6(a0) +; RV64IZBKB-NEXT: lbu a4, 7(a0) +; RV64IZBKB-NEXT: packh a1, a2, a1 +; RV64IZBKB-NEXT: slli a3, a3, 16 +; RV64IZBKB-NEXT: slli a4, a4, 24 +; RV64IZBKB-NEXT: or a3, a4, a3 +; RV64IZBKB-NEXT: lbu a2, 1(a0) +; RV64IZBKB-NEXT: lbu a4, 0(a0) +; RV64IZBKB-NEXT: lbu a5, 2(a0) +; RV64IZBKB-NEXT: lbu a0, 3(a0) +; RV64IZBKB-NEXT: or a1, a3, a1 +; RV64IZBKB-NEXT: packh a2, a4, a2 +; RV64IZBKB-NEXT: slli a5, a5, 16 +; RV64IZBKB-NEXT: slli a0, a0, 24 +; RV64IZBKB-NEXT: or a0, a0, a5 +; RV64IZBKB-NEXT: or a0, a0, a2 +; RV64IZBKB-NEXT: pack a0, a0, a1 +; RV64IZBKB-NEXT: ret +; ; RV32I-FAST-LABEL: load_i64: ; RV32I-FAST: # %bb.0: ; RV32I-FAST-NEXT: lw a2, 0(a0) @@ -252,6 +333,43 @@ define void @store_i64(ptr %p, i64 %v) { ; RV64I-NEXT: sb a1, 1(a0) ; RV64I-NEXT: ret ; +; RV32IZBKB-LABEL: store_i64: +; RV32IZBKB: # %bb.0: +; RV32IZBKB-NEXT: sb a2, 4(a0) +; RV32IZBKB-NEXT: sb a1, 0(a0) +; RV32IZBKB-NEXT: srli a3, a2, 24 +; RV32IZBKB-NEXT: sb a3, 7(a0) +; RV32IZBKB-NEXT: srli a3, a2, 16 +; RV32IZBKB-NEXT: sb a3, 6(a0) +; RV32IZBKB-NEXT: srli a2, a2, 8 +; RV32IZBKB-NEXT: sb a2, 5(a0) +; RV32IZBKB-NEXT: srli a2, a1, 24 +; RV32IZBKB-NEXT: sb a2, 3(a0) +; RV32IZBKB-NEXT: srli a2, a1, 16 +; RV32IZBKB-NEXT: sb a2, 2(a0) +; RV32IZBKB-NEXT: srli a1, a1, 8 +; RV32IZBKB-NEXT: sb a1, 1(a0) +; RV32IZBKB-NEXT: ret +; +; RV64IZBKB-LABEL: store_i64: +; RV64IZBKB: # %bb.0: +; RV64IZBKB-NEXT: sb a1, 0(a0) +; RV64IZBKB-NEXT: srli a2, a1, 56 +; RV64IZBKB-NEXT: sb a2, 7(a0) +; RV64IZBKB-NEXT: srli a2, a1, 48 +; RV64IZBKB-NEXT: sb a2, 6(a0) +; RV64IZBKB-NEXT: srli a2, a1, 40 +; RV64IZBKB-NEXT: sb a2, 5(a0) +; RV64IZBKB-NEXT: srli a2, a1, 32 +; RV64IZBKB-NEXT: sb a2, 4(a0) +; RV64IZBKB-NEXT: srli a2, a1, 24 +; RV64IZBKB-NEXT: sb a2, 3(a0) +; RV64IZBKB-NEXT: srli a2, a1, 16 +; RV64IZBKB-NEXT: sb a2, 2(a0) +; RV64IZBKB-NEXT: srli a1, a1, 8 +; RV64IZBKB-NEXT: sb a1, 1(a0) +; RV64IZBKB-NEXT: ret +; ; RV32I-FAST-LABEL: store_i64: ; RV32I-FAST: # %bb.0: ; RV32I-FAST-NEXT: sw a2, 4(a0)