Files
clang-p2996/llvm/test/CodeGen/AArch64/arm64-inline-asm.ll
David Green adec922361 [AArch64] Make -mcpu=generic schedule for an in-order core
We would like to start pushing -mcpu=generic towards enabling the set of
features that improves performance for some CPUs, without hurting any
others. A blend of the performance options hopefully beneficial to all
CPUs. The largest part of that is enabling in-order scheduling using the
Cortex-A55 schedule model. This is similar to the Arm backend change
from eecb353d0e which made -mcpu=generic perform in-order scheduling
using the cortex-a8 schedule model.

The idea is that in-order cpu's require the most help in instruction
scheduling, whereas out-of-order cpus can for the most part out-of-order
schedule around different codegen. Our benchmarking suggests that
hypothesis holds. When running on an in-order core this improved
performance by 3.8% geomean on a set of DSP workloads, 2% geomean on
some other embedded benchmark and between 1% and 1.8% on a set of
singlecore and multicore workloads, all running on a Cortex-A55 cluster.

On an out-of-order cpu the results are a lot more noisy but show flat
performance or an improvement. On the set of DSP and embedded
benchmarks, run on a Cortex-A78 there was a very noisy 1% speed
improvement. Using the most detailed results I could find, SPEC2006 runs
on a Neoverse N1 show a small increase in instruction count (+0.127%),
but a decrease in cycle counts (-0.155%, on average). The instruction
count is very low noise, the cycle count is more noisy with a 0.15%
decrease not being significant. SPEC2k17 shows a small decrease (-0.2%)
in instruction count leading to a -0.296% decrease in cycle count. These
results are within noise margins but tend to show a small improvement in
general.

When specifying an Apple target, clang will set "-target-cpu apple-a7"
on the command line, so should not be affected by this change when
running from clang. This also doesn't enable more runtime unrolling like
-mcpu=cortex-a55 does, only changing the schedule used.

A lot of existing tests have updated. This is a summary of the important
differences:
 - Most changes are the same instructions in a different order.
 - Sometimes this leads to very minor inefficiencies, such as requiring
   an extra mov to move variables into r0/v0 for the return value of a test
   function.
 - misched-fusion.ll was no longer fusing the pairs of instructions it
   should, as per D110561. I've changed the schedule used in the test
   for now.
 - neon-mla-mls.ll now uses "mul; sub" as opposed to "neg; mla" due to
   the different latencies. This seems fine to me.
 - Some SVE tests do not always remove movprfx where they did before due
   to different register allocation giving different destructive forms.
 - The tests argument-blocks-array-of-struct.ll and arm64-windows-calls.ll
   produce two LDR where they previously produced an LDP due to
   store-pair-suppress kicking in.
 - arm64-ldp.ll and arm64-neon-copy.ll are missing pre/postinc on LPD.
 - Some tests such as arm64-neon-mul-div.ll and
   ragreedy-local-interval-cost.ll have more, less or just different
   spilling.
 - In aarch64_generated_funcs.ll.generated.expected one part of the
   function is no longer outlined. Interestingly if I switch this to use
   any other scheduled even less is outlined.

Some of these are expected to happen, such as differences in outlining
or register spilling. There will be places where these result in worse
codegen, places where they are better, with the SPEC instruction counts
suggesting it is not a decrease overall, on average.

Differential Revision: https://reviews.llvm.org/D110830
2021-10-09 15:58:31 +01:00

521 lines
16 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=arm64-apple-ios -aarch64-neon-syntax=apple -no-integrated-as | FileCheck %s
; rdar://9167275
define i32 @t1() nounwind ssp {
; CHECK-LABEL: t1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: mov w0, 7
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ret
entry:
%0 = tail call i32 asm "mov ${0:w}, 7", "=r"() nounwind
ret i32 %0
}
define i64 @t2() nounwind ssp {
; CHECK-LABEL: t2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: mov x0, 7
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ret
entry:
%0 = tail call i64 asm "mov $0, 7", "=r"() nounwind
ret i64 %0
}
define i64 @t3() nounwind ssp {
; CHECK-LABEL: t3:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: mov w0, 7
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ret
entry:
%0 = tail call i64 asm "mov ${0:w}, 7", "=r"() nounwind
ret i64 %0
}
; rdar://9281206
define void @t4(i64 %op) nounwind {
; CHECK-LABEL: t4:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: mov x8, x0
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: mov x0, x8; svc #0;
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ret
entry:
%0 = tail call i64 asm sideeffect "mov x0, $1; svc #0;", "=r,r,r,~{x0}"(i64 %op, i64 undef) nounwind
ret void
}
; rdar://9394290
define float @t5(float %x) nounwind {
; CHECK-LABEL: t5:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: fadd s0, s0, s0
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ret
entry:
%0 = tail call float asm "fadd ${0:s}, ${0:s}, ${0:s}", "=w,0"(float %x) nounwind
ret float %0
}
; rdar://9553599
define zeroext i8 @t6(i8* %src) nounwind {
; CHECK-LABEL: t6:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: ldtrb w8, [x0]
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: and w0, w8, #0xff
; CHECK-NEXT: ret
entry:
%0 = tail call i8 asm "ldtrb ${0:w}, [$1]", "=r,r"(i8* %src) nounwind
ret i8 %0
}
define void @t7(i8* %f, i32 %g) nounwind {
; CHECK-LABEL: t7:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: add x8, sp, #8
; CHECK-NEXT: str x0, [sp, #8]
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: str w1, [x8]
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
entry:
%f.addr = alloca i8*, align 8
store i8* %f, i8** %f.addr, align 8
call void asm "str ${1:w}, $0", "=*Q,r"(i8** %f.addr, i32 %g) nounwind
ret void
}
; rdar://10258229
; ARM64TargetLowering::getRegForInlineAsmConstraint() should recognize 'v'
; registers.
define void @t8() nounwind ssp {
; CHECK-LABEL: t8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: stp d9, d8, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: nop
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ldp d9, d8, [sp], #16 ; 16-byte Folded Reload
; CHECK-NEXT: ret
entry:
tail call void asm sideeffect "nop", "~{v8}"() nounwind
ret void
}
define i32 @constraint_I(i32 %i, i32 %j) nounwind {
; CHECK-LABEL: constraint_I:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: add w8, w0, 16773120
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: add w0, w0, 4096
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ret
entry:
%0 = tail call i32 asm sideeffect "add ${0:w}, ${1:w}, $2", "=r,r,I"(i32 %i, i32 16773120) nounwind
%1 = tail call i32 asm sideeffect "add ${0:w}, ${1:w}, $2", "=r,r,I"(i32 %i, i32 4096) nounwind
ret i32 %1
}
define i32 @constraint_J(i32 %i, i32 %j, i64 %k) nounwind {
; CHECK-LABEL: constraint_J:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: sub w8, w0, -16773120
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: sub w0, w0, -1
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: sub x8, x2, -1
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: sub x8, x2, -1
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ret
entry:
%0 = tail call i32 asm sideeffect "sub ${0:w}, ${1:w}, $2", "=r,r,J"(i32 %i, i32 -16773120) nounwind
%1 = tail call i32 asm sideeffect "sub ${0:w}, ${1:w}, $2", "=r,r,J"(i32 %i, i32 -1) nounwind
%2 = tail call i64 asm sideeffect "sub ${0:x}, ${1:x}, $2", "=r,r,J"(i64 %k, i32 -1) nounwind
%3 = tail call i64 asm sideeffect "sub ${0:x}, ${1:x}, $2", "=r,r,J"(i64 %k, i64 -1) nounwind
ret i32 %1
}
define i32 @constraint_KL(i32 %i, i32 %j) nounwind {
; CHECK-LABEL: constraint_KL:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: eor w8, w0, 255
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: eor w0, w0, 16711680
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ret
entry:
%0 = tail call i32 asm sideeffect "eor ${0:w}, ${1:w}, $2", "=r,r,K"(i32 %i, i32 255) nounwind
%1 = tail call i32 asm sideeffect "eor ${0:w}, ${1:w}, $2", "=r,r,L"(i32 %i, i64 16711680) nounwind
ret i32 %1
}
define i32 @constraint_MN(i32 %i, i32 %j) nounwind {
; CHECK-LABEL: constraint_MN:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: movk w8, 65535
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: movz w0, 0
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ret
entry:
%0 = tail call i32 asm sideeffect "movk ${0:w}, $1", "=r,M"(i32 65535) nounwind
%1 = tail call i32 asm sideeffect "movz ${0:w}, $1", "=r,N"(i64 0) nounwind
ret i32 %1
}
define void @t9() nounwind {
; CHECK-LABEL: t9:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: ldr q0, [sp], #16
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: mov.2d v4, v0
; CHECK-EMPTY:
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ret
entry:
%data = alloca <2 x double>, align 16
%0 = load <2 x double>, <2 x double>* %data, align 16
call void asm sideeffect "mov.2d v4, $0\0A", "w,~{v4}"(<2 x double> %0) nounwind
ret void
}
define void @t10() nounwind {
; CHECK-LABEL: t10:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: ldr d0, [sp, #8]
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: ldr z0, [x8]
; CHECK-EMPTY:
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: ldr q0, [x8]
; CHECK-EMPTY:
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: ldr d0, [x8]
; CHECK-EMPTY:
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: ldr s0, [x8]
; CHECK-EMPTY:
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: ldr h0, [x8]
; CHECK-EMPTY:
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: ldr b0, [x8]
; CHECK-EMPTY:
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
entry:
%data = alloca <2 x float>, align 8
%a = alloca [2 x float], align 4
%arraydecay = getelementptr inbounds [2 x float], [2 x float]* %a, i32 0, i32 0
%0 = load <2 x float>, <2 x float>* %data, align 8
call void asm sideeffect "ldr ${1:z}, [$0]\0A", "r,w"(float* %arraydecay, <2 x float> %0) nounwind
call void asm sideeffect "ldr ${1:q}, [$0]\0A", "r,w"(float* %arraydecay, <2 x float> %0) nounwind
call void asm sideeffect "ldr ${1:d}, [$0]\0A", "r,w"(float* %arraydecay, <2 x float> %0) nounwind
call void asm sideeffect "ldr ${1:s}, [$0]\0A", "r,w"(float* %arraydecay, <2 x float> %0) nounwind
call void asm sideeffect "ldr ${1:h}, [$0]\0A", "r,w"(float* %arraydecay, <2 x float> %0) nounwind
call void asm sideeffect "ldr ${1:b}, [$0]\0A", "r,w"(float* %arraydecay, <2 x float> %0) nounwind
ret void
}
define void @t11() nounwind {
; CHECK-LABEL: t11:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: ldr w8, [sp, #12]
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: mov xzr, x8
; CHECK-EMPTY:
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ldr w8, [sp, #12]
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: mov wzr, w8
; CHECK-EMPTY:
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
entry:
%a = alloca i32, align 4
%0 = load i32, i32* %a, align 4
call void asm sideeffect "mov ${1:x}, ${0:x}\0A", "r,i"(i32 %0, i32 0) nounwind
%1 = load i32, i32* %a, align 4
call void asm sideeffect "mov ${1:w}, ${0:w}\0A", "r,i"(i32 %1, i32 0) nounwind
ret void
}
define void @t12() nounwind {
; CHECK-LABEL: t12:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: ldr q0, [sp], #16
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: mov.2d v4, v0
; CHECK-EMPTY:
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ret
entry:
%data = alloca <4 x float>, align 16
%0 = load <4 x float>, <4 x float>* %data, align 16
call void asm sideeffect "mov.2d v4, $0\0A", "x,~{v4}"(<4 x float> %0) nounwind
ret void
}
define void @t13() nounwind {
; CHECK-LABEL: t13:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: mov x4, 1311673391471656960
; CHECK-EMPTY:
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: mov x4, -4662
; CHECK-EMPTY:
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: mov x4, 4660
; CHECK-EMPTY:
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: mov x4, -71777214294589696
; CHECK-EMPTY:
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ret
entry:
tail call void asm sideeffect "mov x4, $0\0A", "N"(i64 1311673391471656960) nounwind
tail call void asm sideeffect "mov x4, $0\0A", "N"(i64 -4662) nounwind
tail call void asm sideeffect "mov x4, $0\0A", "N"(i64 4660) nounwind
call void asm sideeffect "mov x4, $0\0A", "N"(i64 -71777214294589696) nounwind
ret void
}
define void @t14() nounwind {
; CHECK-LABEL: t14:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: mov w4, 305397760
; CHECK-EMPTY:
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: mov w4, 4294962634
; CHECK-EMPTY:
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: mov w4, 4660
; CHECK-EMPTY:
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: mov w4, 4278255360
; CHECK-EMPTY:
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ret
entry:
tail call void asm sideeffect "mov w4, $0\0A", "M"(i32 305397760) nounwind
tail call void asm sideeffect "mov w4, $0\0A", "M"(i32 -4662) nounwind
tail call void asm sideeffect "mov w4, $0\0A", "M"(i32 4660) nounwind
call void asm sideeffect "mov w4, $0\0A", "M"(i32 -16711936) nounwind
ret void
}
define void @t15() nounwind {
; CHECK-LABEL: t15:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: fmov x8, d8
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ret
entry:
%0 = tail call double asm sideeffect "fmov $0, d8", "=r"() nounwind
ret void
}
; rdar://problem/14285178
define void @test_zero_reg(i32* %addr) {
; CHECK-LABEL: test_zero_reg:
; CHECK: ; %bb.0:
; CHECK-NEXT: mov w8, #1
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: USE(xzr)
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: USE(wzr)
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: USE(w8)
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: USE(xzr), USE(xzr)
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: USE(xzr), USE(wzr)
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ret
tail call void asm sideeffect "USE($0)", "z"(i32 0) nounwind
tail call void asm sideeffect "USE(${0:w})", "zr"(i32 0)
tail call void asm sideeffect "USE(${0:w})", "zr"(i32 1)
tail call void asm sideeffect "USE($0), USE($1)", "z,z"(i32 0, i32 0) nounwind
tail call void asm sideeffect "USE($0), USE(${1:w})", "z,z"(i32 0, i32 0) nounwind
ret void
}
define <2 x float> @test_vreg_64bit(<2 x float> %in) nounwind {
; CHECK-LABEL: test_vreg_64bit:
; CHECK: ; %bb.0:
; CHECK-NEXT: stp d15, d14, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: fadd v14.2s, v0.2s, v0.2s
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: fmov d0, d14
; CHECK-NEXT: ldp d15, d14, [sp], #16 ; 16-byte Folded Reload
; CHECK-NEXT: ret
%1 = tail call <2 x float> asm sideeffect "fadd ${0}.2s, ${1}.2s, ${1}.2s", "={v14},w"(<2 x float> %in) nounwind
ret <2 x float> %1
}
define <4 x float> @test_vreg_128bit(<4 x float> %in) nounwind {
; CHECK-LABEL: test_vreg_128bit:
; CHECK: ; %bb.0:
; CHECK-NEXT: stp d15, d14, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: fadd v14.4s, v0.4s, v0.4s
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: mov.16b v0, v14
; CHECK-NEXT: ldp d15, d14, [sp], #16 ; 16-byte Folded Reload
; CHECK-NEXT: ret
%1 = tail call <4 x float> asm sideeffect "fadd ${0}.4s, ${1}.4s, ${1}.4s", "={v14},w"(<4 x float> %in) nounwind
ret <4 x float> %1
}
define void @test_constraint_w(i32 %a) {
; CHECK-LABEL: test_constraint_w:
; CHECK: ; %bb.0:
; CHECK-NEXT: fmov s0, w0
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: sqxtn h0, s0
; CHECK-EMPTY:
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ret
tail call void asm sideeffect "sqxtn h0, ${0:s}\0A", "w"(i32 %a)
ret void
}
define void @test_inline_modifier_a(i8* %ptr) nounwind {
; CHECK-LABEL: test_inline_modifier_a:
; CHECK: ; %bb.0:
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: prfm pldl1keep, [x0]
; CHECK-EMPTY:
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ret
tail call void asm sideeffect "prfm pldl1keep, ${0:a}\0A", "r"(i8* %ptr)
ret void
}
; PR33134
define void @test_zero_address() {
; CHECK-LABEL: test_zero_address:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: ldr x8, [x8]
; CHECK-EMPTY:
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ret
entry:
tail call i32 asm sideeffect "ldr $0, $1 \0A", "=r,*Q"(i32* null)
ret void
}
; No '#' in lane specifier
define void @test_no_hash_in_lane_specifier() {
; CHECK-LABEL: test_no_hash_in_lane_specifier:
; CHECK: ; %bb.0:
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: fmla v2.4s, v0.4s, v1.s[1]
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: ret
tail call void asm sideeffect "fmla v2.4s, v0.4s, v1.s[$0]", "I"(i32 1) #1
ret void
}
define void @test_vector_too_large_r_m(<9 x float>* nocapture readonly %0) {
; CHECK-LABEL: test_vector_too_large_r_m:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: sub sp, sp, #64
; CHECK-NEXT: .cfi_def_cfa_offset 64
; CHECK-NEXT: ldp q2, q1, [x0]
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: ldr s0, [x0, #32]
; CHECK-NEXT: stp q2, q1, [sp]
; CHECK-NEXT: str s0, [sp, #32]
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: add sp, sp, #64
; CHECK-NEXT: ret
; CHECK-DAG stp [[Q0]], [[Q1]], [sp]
entry:
%m.addr = alloca <9 x float>, align 16
%m = load <9 x float>, <9 x float>* %0, align 16
store <9 x float> %m, <9 x float>* %m.addr, align 16
call void asm sideeffect "", "=*r|m,0,~{memory}"(<9 x float>* nonnull %m.addr, <9 x float> %m)
ret void
}
define void @test_o_output_constraint() {
; CHECK-LABEL: test_o_output_constraint:
; CHECK: ; %bb.0:
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: add x8, sp, #15
; CHECK-NEXT: ; InlineAsm Start
; CHECK-NEXT: mov [x8], 7
; CHECK-NEXT: ; InlineAsm End
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%b = alloca i8, align 1
call void asm "mov $0, 7", "=*o"(i8* %b)
ret void
}