Files
clang-p2996/llvm/test/CodeGen/LoongArch/unaligned-memcpy-inline.ll
Weining Lu 47601815ec [LoongArch] Define ual feature and override allowsMisalignedMemoryAccesses
Some CPUs do not allow memory accesses to be unaligned, e.g. 2k1000la
who uses the la264 core on which misaligned access will trigger an
exception.

In this patch, a backend feature called `ual` is defined to decribe
whether the CPU supports unaligned memroy accesses. And this feature
can be toggled by clang options `-m[no-]unaligned-access` or the
aliases `-m[no-]strict-align`. When this feature is on,
`allowsMisalignedMemoryAccesses` sets the speed number to 1 and returns
true that allows the codegen to generate unaligned memory access insns.

Clang options `-m[no-]unaligned-access` are moved from `m_arm_Features_Group`
to `m_Group` because now more than one targets use them. And a test
is added to show that they remain unused on a target that does not
support them. In addition, to keep compatible with gcc, a new alias
`-mno-strict-align` is added which is equal to `-munaligned-access`.

The feature name `ual` is consistent with linux kernel [1] and the
output of `lscpu` or `/proc/cpuinfo` [2].

There is an `LLT` variant of `allowsMisalignedMemoryAccesses`, but
seems that curently it is only used in GlobalISel which LoongArch
doesn't support yet. So this variant is not implemented in this patch.

[1]: https://github.com/torvalds/linux/blob/master/arch/loongarch/include/asm/cpu.h#L77
[2]: https://github.com/torvalds/linux/blob/master/arch/loongarch/kernel/proc.c#L75

Reviewed By: xen0n

Differential Revision: https://reviews.llvm.org/D149946
2023-06-07 13:40:58 +08:00

98 lines
3.1 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
;; Test how memcpy is optimized when ual is turned off which is similar to AArch64/arm64-misaligned-memcpy-inline.ll.
; RUN: llc --mtriple=loongarch32 --mattr=-ual < %s | FileCheck %s --check-prefix=LA32
; RUN: llc --mtriple=loongarch64 --mattr=-ual < %s | FileCheck %s --check-prefix=LA64
;; Small (16 bytes here) unaligned memcpy() should be a function call if
;; ual is turned off.
define void @t0(ptr %out, ptr %in) {
; LA32-LABEL: t0:
; LA32: # %bb.0: # %entry
; LA32-NEXT: addi.w $sp, $sp, -16
; LA32-NEXT: .cfi_def_cfa_offset 16
; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
; LA32-NEXT: .cfi_offset 1, -4
; LA32-NEXT: ori $a2, $zero, 16
; LA32-NEXT: bl %plt(memcpy)
; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
; LA32-NEXT: addi.w $sp, $sp, 16
; LA32-NEXT: ret
;
; LA64-LABEL: t0:
; LA64: # %bb.0: # %entry
; LA64-NEXT: addi.d $sp, $sp, -16
; LA64-NEXT: .cfi_def_cfa_offset 16
; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
; LA64-NEXT: .cfi_offset 1, -8
; LA64-NEXT: ori $a2, $zero, 16
; LA64-NEXT: bl %plt(memcpy)
; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
; LA64-NEXT: addi.d $sp, $sp, 16
; LA64-NEXT: ret
entry:
call void @llvm.memcpy.p0.p0.i64(ptr %out, ptr %in, i64 16, i1 false)
ret void
}
;; Small (16 bytes here) aligned memcpy() should be inlined even if
;; ual is turned off.
define void @t1(ptr align 8 %out, ptr align 8 %in) {
; LA32-LABEL: t1:
; LA32: # %bb.0: # %entry
; LA32-NEXT: ld.w $a2, $a1, 12
; LA32-NEXT: st.w $a2, $a0, 12
; LA32-NEXT: ld.w $a2, $a1, 8
; LA32-NEXT: st.w $a2, $a0, 8
; LA32-NEXT: ld.w $a2, $a1, 4
; LA32-NEXT: st.w $a2, $a0, 4
; LA32-NEXT: ld.w $a1, $a1, 0
; LA32-NEXT: st.w $a1, $a0, 0
; LA32-NEXT: ret
;
; LA64-LABEL: t1:
; LA64: # %bb.0: # %entry
; LA64-NEXT: ld.d $a2, $a1, 8
; LA64-NEXT: st.d $a2, $a0, 8
; LA64-NEXT: ld.d $a1, $a1, 0
; LA64-NEXT: st.d $a1, $a0, 0
; LA64-NEXT: ret
entry:
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %out, ptr align 8 %in, i64 16, i1 false)
ret void
}
;; Tiny (4 bytes here) unaligned memcpy() should be inlined with byte sized
;; loads and stores if ual is turned off.
define void @t2(ptr %out, ptr %in) {
; LA32-LABEL: t2:
; LA32: # %bb.0: # %entry
; LA32-NEXT: ld.b $a2, $a1, 3
; LA32-NEXT: st.b $a2, $a0, 3
; LA32-NEXT: ld.b $a2, $a1, 2
; LA32-NEXT: st.b $a2, $a0, 2
; LA32-NEXT: ld.b $a2, $a1, 1
; LA32-NEXT: st.b $a2, $a0, 1
; LA32-NEXT: ld.b $a1, $a1, 0
; LA32-NEXT: st.b $a1, $a0, 0
; LA32-NEXT: ret
;
; LA64-LABEL: t2:
; LA64: # %bb.0: # %entry
; LA64-NEXT: ld.b $a2, $a1, 3
; LA64-NEXT: st.b $a2, $a0, 3
; LA64-NEXT: ld.b $a2, $a1, 2
; LA64-NEXT: st.b $a2, $a0, 2
; LA64-NEXT: ld.b $a2, $a1, 1
; LA64-NEXT: st.b $a2, $a0, 1
; LA64-NEXT: ld.b $a1, $a1, 0
; LA64-NEXT: st.b $a1, $a0, 0
; LA64-NEXT: ret
entry:
call void @llvm.memcpy.p0.p0.i64(ptr %out, ptr %in, i64 4, i1 false)
ret void
}
declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1)