[MC] Honour alignment directive fill value for non-intel (#100136)
As reported in https://llvm.org/PR30955, `.balign` with a fill-value of 0 did not actually align using zeroes, on non-x86 targets. This is because the check of whether to use the code alignment routines or whether to just use the fill value was checking whether the fill value was equal to `TextAlignFillValue`, which has not been changed from its default of 0 on most targets (it has been changed for x86). However, most targets do not set the fill value because it doesn't entirely make sense -- i.e. on AArch64 there's no reasonable byte value to use for alignment, as instructions are word-sized and have to be well-aligned. I think the check at the end `AsmParser::parseDirectiveAlign` is suspicious even on x86 - if you use `.balign <align>, 0x90` in a code section, you don't end up with a block of `0x90` repeated, you end up with a block of NOPs of various widths. This functionality is never tested. The fix here is to modify the check to ignore the default text align fill value when choosing to do code alignment or not. Fixes #30303
This commit is contained in:
@@ -14,7 +14,7 @@ target triple = "x86_64-pc-windows-msvc19.14.26433"
|
||||
|
||||
define dllexport void @foo() #0 {
|
||||
entry:
|
||||
call void asm sideeffect ".p2align 4, 0x90", "~{dirflag},~{fpsr},~{flags}"()
|
||||
call void asm sideeffect ".p2align 4", "~{dirflag},~{fpsr},~{flags}"()
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @foo() #0 {
|
||||
entry:
|
||||
call void asm sideeffect ".p2align 4, 0x90", "~{dirflag},~{fpsr},~{flags}"()
|
||||
call void asm sideeffect ".p2align 4", "~{dirflag},~{fpsr},~{flags}"()
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16
|
||||
|
||||
define void @_start() #0 {
|
||||
entry:
|
||||
call void asm sideeffect ".p2align 4, 0x90", "~{dirflag},~{fpsr},~{flags}"()
|
||||
call void asm sideeffect ".p2align 4", "~{dirflag},~{fpsr},~{flags}"()
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16
|
||||
|
||||
define void @foo() #0 {
|
||||
entry:
|
||||
call void asm sideeffect ".p2align 4, 0x90", "~{dirflag},~{fpsr},~{flags}"()
|
||||
call void asm sideeffect ".p2align 4", "~{dirflag},~{fpsr},~{flags}"()
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
@@ -65,12 +65,20 @@ Changes to Interprocedural Optimizations
|
||||
Changes to the AArch64 Backend
|
||||
------------------------------
|
||||
|
||||
* `.balign N, 0`, `.p2align N, 0`, `.align N, 0` in code sections will now fill
|
||||
the required alignment space with a sequence of `0x0` bytes (the requested
|
||||
fill value) rather than NOPs.
|
||||
|
||||
Changes to the AMDGPU Backend
|
||||
-----------------------------
|
||||
|
||||
Changes to the ARM Backend
|
||||
--------------------------
|
||||
|
||||
* `.balign N, 0`, `.p2align N, 0`, `.align N, 0` in code sections will now fill
|
||||
the required alignment space with a sequence of `0x0` bytes (the requested
|
||||
fill value) rather than NOPs.
|
||||
|
||||
Changes to the AVR Backend
|
||||
--------------------------
|
||||
|
||||
@@ -92,6 +100,10 @@ Changes to the PowerPC Backend
|
||||
Changes to the RISC-V Backend
|
||||
-----------------------------
|
||||
|
||||
* `.balign N, 0`, `.p2align N, 0`, `.align N, 0` in code sections will now fill
|
||||
the required alignment space with a sequence of `0x0` bytes (the requested
|
||||
fill value) rather than NOPs.
|
||||
|
||||
Changes to the WebAssembly Backend
|
||||
----------------------------------
|
||||
|
||||
@@ -101,6 +113,13 @@ Changes to the Windows Target
|
||||
Changes to the X86 Backend
|
||||
--------------------------
|
||||
|
||||
* `.balign N, 0x90`, `.p2align N, 0x90`, and `.align N, 0x90` in code sections
|
||||
now fill the required alignment space with repeating `0x90` bytes, rather than
|
||||
using optimised NOP filling. Optimised NOP filling fills the space with NOP
|
||||
instructions of various widths, not just those that use the `0x90` byte
|
||||
encoding. To use optimised NOP filling in a code section, leave off the
|
||||
"fillval" argument, i.e. `.balign N`, `.p2align N` or `.align N` respectively.
|
||||
|
||||
Changes to the OCaml bindings
|
||||
-----------------------------
|
||||
|
||||
|
||||
@@ -3462,17 +3462,6 @@ bool AsmParser::parseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
|
||||
}
|
||||
}
|
||||
|
||||
if (HasFillExpr && FillExpr != 0) {
|
||||
MCSection *Sec = getStreamer().getCurrentSectionOnly();
|
||||
if (Sec && Sec->isVirtualSection()) {
|
||||
ReturnVal |=
|
||||
Warning(FillExprLoc, "ignoring non-zero fill value in " +
|
||||
Sec->getVirtualSectionKind() + " section '" +
|
||||
Sec->getName() + "'");
|
||||
FillExpr = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Diagnose non-sensical max bytes to align.
|
||||
if (MaxBytesLoc.isValid()) {
|
||||
if (MaxBytesToFill < 1) {
|
||||
@@ -3489,13 +3478,20 @@ bool AsmParser::parseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
|
||||
}
|
||||
}
|
||||
|
||||
// Check whether we should use optimal code alignment for this .align
|
||||
// directive.
|
||||
const MCSection *Section = getStreamer().getCurrentSectionOnly();
|
||||
assert(Section && "must have section to emit alignment");
|
||||
bool useCodeAlign = Section->useCodeAlign();
|
||||
if ((!HasFillExpr || Lexer.getMAI().getTextAlignFillValue() == FillExpr) &&
|
||||
ValueSize == 1 && useCodeAlign) {
|
||||
|
||||
if (HasFillExpr && FillExpr != 0 && Section->isVirtualSection()) {
|
||||
ReturnVal |=
|
||||
Warning(FillExprLoc, "ignoring non-zero fill value in " +
|
||||
Section->getVirtualSectionKind() +
|
||||
" section '" + Section->getName() + "'");
|
||||
FillExpr = 0;
|
||||
}
|
||||
|
||||
// Check whether we should use optimal code alignment for this .align
|
||||
// directive.
|
||||
if (Section->useCodeAlign() && !HasFillExpr) {
|
||||
getStreamer().emitCodeAlignment(
|
||||
Align(Alignment), &getTargetParser().getSTI(), MaxBytesToFill);
|
||||
} else {
|
||||
|
||||
22
llvm/test/MC/AArch64/align-fill-byte-zero.s
Normal file
22
llvm/test/MC/AArch64/align-fill-byte-zero.s
Normal file
@@ -0,0 +1,22 @@
|
||||
// RUN: llvm-mc -triple aarch64 %s -o - | FileCheck %s --check-prefix=ASM
|
||||
// RUN: llvm-mc -triple aarch64 -filetype obj %s -o - | \
|
||||
// RUN: llvm-objdump -dz - | FileCheck %s --check-prefix=OBJ
|
||||
|
||||
// llvm.org/pr30955 - LLVM was handling `.balign <alignment>, 0` strangely on
|
||||
// non-x86 targets.
|
||||
|
||||
.text
|
||||
|
||||
// ASM: add x14, x14, #1
|
||||
// OBJ: 910005ce add x14, x14, #0x1
|
||||
add x14, x14, 0x1
|
||||
|
||||
// ASM: .p2align 4, 0x0
|
||||
// OBJ-NEXT: 00000000 udf #0x0
|
||||
// OBJ-NEXT: 00000000 udf #0x0
|
||||
// OBJ-NEXT: 00000000 udf #0x0
|
||||
.balign 0x10, 0
|
||||
|
||||
// ASM: add x14, x14, #1
|
||||
// OBJ-NEXT: 910005ce add x14, x14, #0x1
|
||||
add x14, x14, 0x1
|
||||
40
llvm/test/MC/ARM/align-fill-byte-zero.s
Normal file
40
llvm/test/MC/ARM/align-fill-byte-zero.s
Normal file
@@ -0,0 +1,40 @@
|
||||
// RUN: llvm-mc -triple armv7a %s -o - | FileCheck %s --check-prefix=ASM-ARM
|
||||
// RUN: llvm-mc -triple armv7a -filetype obj %s -o - | \
|
||||
// RUN: llvm-objdump --triple=armv7a -dz - | FileCheck %s --check-prefix=OBJ-ARM
|
||||
|
||||
// RUN: llvm-mc -triple thumbv7a %s -o - | FileCheck %s --check-prefix=ASM-THUMB
|
||||
// RUN: llvm-mc -triple thumbv7a -filetype obj %s -o - | \
|
||||
// RUN: llvm-objdump --triple=thumbv7a -dz - | FileCheck %s --check-prefix=OBJ-THUMB
|
||||
|
||||
// llvm.org/pr30955 - LLVM was handling `.balign <alignment>, 0` strangely on
|
||||
// non-x86 targets.
|
||||
|
||||
.text
|
||||
|
||||
// ASM-ARM: add r0, r0, #1
|
||||
// OBJ-ARM: e2800001 add r0, r0, #1
|
||||
|
||||
// ASM-THUMB: add.w r0, r0, #1
|
||||
// OBJ-THUMB: f100 0001 add.w r0, r0, #0x1
|
||||
add r0, r0, 0x1
|
||||
|
||||
// ASM-ARM: .p2align 4, 0x0
|
||||
// OBJ-ARM-NEXT: 00000000 andeq r0, r0, r0
|
||||
// OBJ-ARM-NEXT: 00000000 andeq r0, r0, r0
|
||||
// OBJ-ARM-NEXT: 00000000 andeq r0, r0, r0
|
||||
|
||||
// ASM-THUMB: .p2align 4, 0x0
|
||||
// OBJ-THUMB-NEXT: 0000 movs r0, r0
|
||||
// OBJ-THUMB-NEXT: 0000 movs r0, r0
|
||||
// OBJ-THUMB-NEXT: 0000 movs r0, r0
|
||||
// OBJ-THUMB-NEXT: 0000 movs r0, r0
|
||||
// OBJ-THUMB-NEXT: 0000 movs r0, r0
|
||||
// OBJ-THUMB-NEXT: 0000 movs r0, r0
|
||||
.balign 0x10, 0
|
||||
|
||||
// ASM-ARM: add r0, r0, #1
|
||||
// OBJ-ARM-NEXT: e2800001 add r0, r0, #1
|
||||
|
||||
// ASM-THUMB: add.w r0, r0, #1
|
||||
// OBJ-THUMB-NEXT: f100 0001 add.w r0, r0, #0x1
|
||||
add r0, r0, 0x1
|
||||
@@ -1,6 +1,8 @@
|
||||
# RUN: not llvm-mc -triple i386-apple-darwin9 %s 2> %t.err | FileCheck %s
|
||||
# RUN: FileCheck < %t.err %s --check-prefix=CHECK-WARN
|
||||
|
||||
.data
|
||||
|
||||
# CHECK: TEST0:
|
||||
# CHECK: .p2align 1
|
||||
TEST0:
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
.text
|
||||
f0:
|
||||
.long 0
|
||||
.align 8, 0x90
|
||||
.align 8
|
||||
.long 0
|
||||
.align 8
|
||||
|
||||
|
||||
@@ -4,14 +4,14 @@
|
||||
.text
|
||||
f0:
|
||||
.long 0
|
||||
.align 8, 0x00000090
|
||||
.align 8
|
||||
.long 0
|
||||
.align 8
|
||||
|
||||
// But not in another section
|
||||
.data
|
||||
.long 0
|
||||
.align 8, 0x00000090
|
||||
.align 8, 0x90
|
||||
.long 0
|
||||
.align 8
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
ret
|
||||
# nop
|
||||
# 0x90
|
||||
.align 1, 0x90
|
||||
.align 1
|
||||
ret
|
||||
# 2 byte nop test
|
||||
.align 4, 0 # start with 16 byte alignment filled with zeros
|
||||
@@ -13,14 +13,14 @@
|
||||
ret
|
||||
# xchg %ax,%ax
|
||||
# 0x66, 0x90
|
||||
.align 2, 0x90
|
||||
.align 2
|
||||
ret
|
||||
# 3 byte nop test
|
||||
.align 4, 0 # start with 16 byte alignment filled with zeros
|
||||
ret
|
||||
# nopl (%[re]ax)
|
||||
# 0x0f, 0x1f, 0x00
|
||||
.align 2, 0x90
|
||||
.align 2
|
||||
ret
|
||||
# 4 byte nop test
|
||||
.align 4, 0 # start with 16 byte alignment filled with zeros
|
||||
@@ -30,7 +30,7 @@
|
||||
ret
|
||||
# nopl 0(%[re]ax)
|
||||
# 0x0f, 0x1f, 0x40, 0x00
|
||||
.align 3, 0x90
|
||||
.align 3
|
||||
ret
|
||||
# 5 byte nop test
|
||||
.align 4, 0 # start with 16 byte alignment filled with zeros
|
||||
@@ -39,7 +39,7 @@
|
||||
ret
|
||||
# nopl 0(%[re]ax,%[re]ax,1)
|
||||
# 0x0f, 0x1f, 0x44, 0x00, 0x00
|
||||
.align 3, 0x90
|
||||
.align 3
|
||||
ret
|
||||
# 6 byte nop test
|
||||
.align 4, 0 # start with 16 byte alignment filled with zeros
|
||||
@@ -47,14 +47,14 @@
|
||||
ret
|
||||
# nopw 0(%[re]ax,%[re]ax,1)
|
||||
# 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00
|
||||
.align 3, 0x90
|
||||
.align 3
|
||||
ret
|
||||
# 7 byte nop test
|
||||
.align 4, 0 # start with 16 byte alignment filled with zeros
|
||||
ret
|
||||
# nopl 0L(%[re]ax)
|
||||
# 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00
|
||||
.align 3, 0x90
|
||||
.align 3
|
||||
ret
|
||||
# 8 byte nop test
|
||||
.align 4, 0 # start with 16 byte alignment filled with zeros
|
||||
@@ -68,7 +68,7 @@
|
||||
ret
|
||||
# nopl 0L(%[re]ax,%[re]ax,1)
|
||||
# 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00
|
||||
.align 3, 0x90
|
||||
.align 3
|
||||
ret
|
||||
# 9 byte nop test
|
||||
.align 4, 0 # start with 16 byte alignment filled with zeros
|
||||
@@ -81,7 +81,7 @@
|
||||
ret
|
||||
# nopw 0L(%[re]ax,%[re]ax,1)
|
||||
# 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00
|
||||
.align 4, 0x90
|
||||
.align 4
|
||||
ret
|
||||
# 10 byte nop test
|
||||
.align 4, 0 # start with 16 byte alignment filled with zeros
|
||||
@@ -94,7 +94,7 @@
|
||||
ret
|
||||
# nopw %cs:0L(%[re]ax,%[re]ax,1)
|
||||
# 0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00
|
||||
.align 4, 0x90
|
||||
.align 4
|
||||
ret
|
||||
# 11 byte nop test
|
||||
.align 4, 0 # start with 16 byte alignment filled with zeros
|
||||
@@ -105,7 +105,7 @@
|
||||
ret
|
||||
# nopw %cs:0L(%[re]ax,%[re]ax,1)
|
||||
# 0x66, 0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00
|
||||
.align 4, 0x90
|
||||
.align 4
|
||||
ret
|
||||
# 12 byte nop test
|
||||
.align 4, 0 # start with 16 byte alignment filled with zeros
|
||||
@@ -117,7 +117,7 @@
|
||||
# nopw 0(%[re]ax,%[re]ax,1)
|
||||
# 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00,
|
||||
# 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00
|
||||
.align 4, 0x90
|
||||
.align 4
|
||||
ret
|
||||
# 13 byte nop test
|
||||
.align 4, 0 # start with 16 byte alignment filled with zeros
|
||||
@@ -128,7 +128,7 @@
|
||||
# nopl 0L(%[re]ax)
|
||||
# 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00,
|
||||
# 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00
|
||||
.align 4, 0x90
|
||||
.align 4
|
||||
ret
|
||||
# 14 byte nop test
|
||||
.align 4, 0 # start with 16 byte alignment filled with zeros
|
||||
@@ -138,7 +138,7 @@
|
||||
# nopl 0L(%[re]ax)
|
||||
# 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00,
|
||||
# 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00
|
||||
.align 4, 0x90
|
||||
.align 4
|
||||
ret
|
||||
# 15 byte nop test
|
||||
.align 4, 0 # start with 16 byte alignment filled with zeros
|
||||
@@ -147,7 +147,7 @@
|
||||
# nopl 0L(%[re]ax,%[re]ax,1)
|
||||
# 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00,
|
||||
# 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00
|
||||
.align 4, 0x90
|
||||
.align 4
|
||||
ret
|
||||
|
||||
# Only the .text sections gets optimal nops.
|
||||
|
||||
29
llvm/test/MC/RISCV/align-fill-byte-zero.s
Normal file
29
llvm/test/MC/RISCV/align-fill-byte-zero.s
Normal file
@@ -0,0 +1,29 @@
|
||||
// RUN: llvm-mc -triple riscv32 %s -o - | FileCheck %s --check-prefix=ASM
|
||||
// RUN: llvm-mc -triple riscv32 -filetype obj %s -o - | \
|
||||
// RUN: llvm-objdump -dz - | FileCheck %s --check-prefix=OBJ
|
||||
|
||||
// RUN: llvm-mc -triple riscv64 %s -o - | FileCheck %s --check-prefix=ASM
|
||||
// RUN: llvm-mc -triple riscv64 -filetype obj %s -o - | \
|
||||
// RUN: llvm-objdump -dz - | FileCheck %s --check-prefix=OBJ
|
||||
|
||||
// llvm.org/pr30955 - LLVM was handling `.balign <alignment>, 0` strangely on
|
||||
// non-x86 targets.
|
||||
|
||||
.text
|
||||
|
||||
// ASM: addi a0, a0, 1
|
||||
// OBJ: 00150513 addi a0, a0, 0x1
|
||||
addi a0, a0, 0x1
|
||||
|
||||
// ASM: .p2align 4, 0x0
|
||||
// OBJ-NEXT: 0000 <unknown>
|
||||
// OBJ-NEXT: 0000 <unknown>
|
||||
// OBJ-NEXT: 0000 <unknown>
|
||||
// OBJ-NEXT: 0000 <unknown>
|
||||
// OBJ-NEXT: 0000 <unknown>
|
||||
// OBJ-NEXT: 0000 <unknown>
|
||||
.balign 0x10, 0
|
||||
|
||||
// ASM: addi a0, a0, 1
|
||||
// OBJ-NEXT: 00150513 addi a0, a0, 0x1
|
||||
addi a0, a0, 0x1
|
||||
@@ -21,19 +21,19 @@
|
||||
.text
|
||||
.code16gcc
|
||||
.globl test
|
||||
.p2align 4, 0x90
|
||||
.p2align 4
|
||||
.type test,@function
|
||||
test:
|
||||
.nops 34
|
||||
movl %eax, %edi
|
||||
xorl %ebx, %ebx
|
||||
.p2align 4, 0x90
|
||||
.p2align 4
|
||||
movzbl (%esi,%ebx), %ecx
|
||||
calll called
|
||||
.nops 3
|
||||
retl
|
||||
|
||||
.p2align 4, 0x90
|
||||
.p2align 4
|
||||
.type called,@function
|
||||
called:
|
||||
.nops 1
|
||||
|
||||
Reference in New Issue
Block a user