Files
clang-p2996/llvm/test/CodeGen/X86/pr53842.ll
Jeremy Morse e6bf48d110 [X86] Don't request 0x90 nop filling in p2align directives (#110134)
As of rev ea222be0d, LLVMs assembler will actually try to honour the
"fill value" part of p2align directives. X86 printed these as 0x90, which
isn't actually what it wanted: we want multi-byte nops for .text
padding. Compiling via a textual assembly file produces single-byte
nop padding since ea222be0d but the built-in assembler will produce
multi-byte nops. This divergent behaviour is undesirable.

To fix: don't set the byte padding field for x86, which allows the
assembler to pick multi-byte nops. Test that we get the same multi-byte
padding when compiled via textual assembly or directly to object file.
Added same-align-bytes-with-llasm-llobj.ll to that effect, updated
numerous other tests to not contain check-lines for the explicit padding.
2024-10-02 11:14:05 +01:00

39 lines
1.8 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512dq | FileCheck %s
define void @PR53842() {
; CHECK-LABEL: PR53842:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vpmovzxbq {{.*#+}} zmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vpcmpeqq %ymm3, %ymm2, %ymm2
; CHECK-NEXT: vpcmpeqq %ymm3, %ymm1, %ymm1
; CHECK-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0
; CHECK-NEXT: jmp .LBB0_1
entry:
br label %vector.body
vector.body:
%index = phi i64 [ 0, %entry ], [ 0, %vector.body ]
%vec.phi = phi <8 x i64> [ zeroinitializer, %entry ], [ %i2, %vector.body ]
%wide.load23 = load <8 x i8>, ptr undef, align 1
%i = icmp eq <8 x i8> zeroinitializer, %wide.load23
%i1 = zext <8 x i1> %i to <8 x i64>
%i2 = add <8 x i64> %vec.phi, %i1
br i1 false, label %middle.block, label %vector.body
middle.block:
%bin.rdx = add <8 x i64> undef, %i2
unreachable
}