Some CPUs do not allow memory accesses to be unaligned, e.g. 2k1000la who uses the la264 core on which misaligned access will trigger an exception. In this patch, a backend feature called `ual` is defined to decribe whether the CPU supports unaligned memroy accesses. And this feature can be toggled by clang options `-m[no-]unaligned-access` or the aliases `-m[no-]strict-align`. When this feature is on, `allowsMisalignedMemoryAccesses` sets the speed number to 1 and returns true that allows the codegen to generate unaligned memory access insns. Clang options `-m[no-]unaligned-access` are moved from `m_arm_Features_Group` to `m_Group` because now more than one targets use them. And a test is added to show that they remain unused on a target that does not support them. In addition, to keep compatible with gcc, a new alias `-mno-strict-align` is added which is equal to `-munaligned-access`. The feature name `ual` is consistent with linux kernel [1] and the output of `lscpu` or `/proc/cpuinfo` [2]. There is an `LLT` variant of `allowsMisalignedMemoryAccesses`, but seems that curently it is only used in GlobalISel which LoongArch doesn't support yet. So this variant is not implemented in this patch. [1]: https://github.com/torvalds/linux/blob/master/arch/loongarch/include/asm/cpu.h#L77 [2]: https://github.com/torvalds/linux/blob/master/arch/loongarch/kernel/proc.c#L75 Reviewed By: xen0n Differential Revision: https://reviews.llvm.org/D149946
98 lines
3.1 KiB
LLVM
98 lines
3.1 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
|
|
|
|
;; Test how memcpy is optimized when ual is turned off which is similar to AArch64/arm64-misaligned-memcpy-inline.ll.
|
|
|
|
; RUN: llc --mtriple=loongarch32 --mattr=-ual < %s | FileCheck %s --check-prefix=LA32
|
|
; RUN: llc --mtriple=loongarch64 --mattr=-ual < %s | FileCheck %s --check-prefix=LA64
|
|
|
|
;; Small (16 bytes here) unaligned memcpy() should be a function call if
|
|
;; ual is turned off.
|
|
define void @t0(ptr %out, ptr %in) {
|
|
; LA32-LABEL: t0:
|
|
; LA32: # %bb.0: # %entry
|
|
; LA32-NEXT: addi.w $sp, $sp, -16
|
|
; LA32-NEXT: .cfi_def_cfa_offset 16
|
|
; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
|
|
; LA32-NEXT: .cfi_offset 1, -4
|
|
; LA32-NEXT: ori $a2, $zero, 16
|
|
; LA32-NEXT: bl %plt(memcpy)
|
|
; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
|
|
; LA32-NEXT: addi.w $sp, $sp, 16
|
|
; LA32-NEXT: ret
|
|
;
|
|
; LA64-LABEL: t0:
|
|
; LA64: # %bb.0: # %entry
|
|
; LA64-NEXT: addi.d $sp, $sp, -16
|
|
; LA64-NEXT: .cfi_def_cfa_offset 16
|
|
; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
|
|
; LA64-NEXT: .cfi_offset 1, -8
|
|
; LA64-NEXT: ori $a2, $zero, 16
|
|
; LA64-NEXT: bl %plt(memcpy)
|
|
; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
|
|
; LA64-NEXT: addi.d $sp, $sp, 16
|
|
; LA64-NEXT: ret
|
|
entry:
|
|
call void @llvm.memcpy.p0.p0.i64(ptr %out, ptr %in, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
;; Small (16 bytes here) aligned memcpy() should be inlined even if
|
|
;; ual is turned off.
|
|
define void @t1(ptr align 8 %out, ptr align 8 %in) {
|
|
; LA32-LABEL: t1:
|
|
; LA32: # %bb.0: # %entry
|
|
; LA32-NEXT: ld.w $a2, $a1, 12
|
|
; LA32-NEXT: st.w $a2, $a0, 12
|
|
; LA32-NEXT: ld.w $a2, $a1, 8
|
|
; LA32-NEXT: st.w $a2, $a0, 8
|
|
; LA32-NEXT: ld.w $a2, $a1, 4
|
|
; LA32-NEXT: st.w $a2, $a0, 4
|
|
; LA32-NEXT: ld.w $a1, $a1, 0
|
|
; LA32-NEXT: st.w $a1, $a0, 0
|
|
; LA32-NEXT: ret
|
|
;
|
|
; LA64-LABEL: t1:
|
|
; LA64: # %bb.0: # %entry
|
|
; LA64-NEXT: ld.d $a2, $a1, 8
|
|
; LA64-NEXT: st.d $a2, $a0, 8
|
|
; LA64-NEXT: ld.d $a1, $a1, 0
|
|
; LA64-NEXT: st.d $a1, $a0, 0
|
|
; LA64-NEXT: ret
|
|
entry:
|
|
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %out, ptr align 8 %in, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
;; Tiny (4 bytes here) unaligned memcpy() should be inlined with byte sized
|
|
;; loads and stores if ual is turned off.
|
|
define void @t2(ptr %out, ptr %in) {
|
|
; LA32-LABEL: t2:
|
|
; LA32: # %bb.0: # %entry
|
|
; LA32-NEXT: ld.b $a2, $a1, 3
|
|
; LA32-NEXT: st.b $a2, $a0, 3
|
|
; LA32-NEXT: ld.b $a2, $a1, 2
|
|
; LA32-NEXT: st.b $a2, $a0, 2
|
|
; LA32-NEXT: ld.b $a2, $a1, 1
|
|
; LA32-NEXT: st.b $a2, $a0, 1
|
|
; LA32-NEXT: ld.b $a1, $a1, 0
|
|
; LA32-NEXT: st.b $a1, $a0, 0
|
|
; LA32-NEXT: ret
|
|
;
|
|
; LA64-LABEL: t2:
|
|
; LA64: # %bb.0: # %entry
|
|
; LA64-NEXT: ld.b $a2, $a1, 3
|
|
; LA64-NEXT: st.b $a2, $a0, 3
|
|
; LA64-NEXT: ld.b $a2, $a1, 2
|
|
; LA64-NEXT: st.b $a2, $a0, 2
|
|
; LA64-NEXT: ld.b $a2, $a1, 1
|
|
; LA64-NEXT: st.b $a2, $a0, 1
|
|
; LA64-NEXT: ld.b $a1, $a1, 0
|
|
; LA64-NEXT: st.b $a1, $a0, 0
|
|
; LA64-NEXT: ret
|
|
entry:
|
|
call void @llvm.memcpy.p0.p0.i64(ptr %out, ptr %in, i64 4, i1 false)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1)
|