Files
clang-p2996/llvm/test/Transforms/LoopStrengthReduce/X86/macro-fuse-cmp.ll
Jeremy Morse e6bf48d110 [X86] Don't request 0x90 nop filling in p2align directives (#110134)
As of rev ea222be0d, LLVMs assembler will actually try to honour the
"fill value" part of p2align directives. X86 printed these as 0x90, which
isn't actually what it wanted: we want multi-byte nops for .text
padding. Compiling via a textual assembly file produces single-byte
nop padding since ea222be0d but the built-in assembler will produce
multi-byte nops. This divergent behaviour is undesirable.

To fix: don't set the byte padding field for x86, which allows the
assembler to pick multi-byte nops. Test that we get the same multi-byte
padding when compiled via textual assembly or directly to object file.
Added same-align-bytes-with-llasm-llobj.ll to that effect, updated
numerous other tests to not contain check-lines for the explicit padding.
2024-10-02 11:14:05 +01:00

128 lines
5.9 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: opt < %s -loop-reduce -mcpu=btver2 -S | FileCheck %s --check-prefix=JAG
; RUN: opt < %s -loop-reduce -mcpu=bdver2 -S | FileCheck %s --check-prefix=BUL
; RUN: opt < %s -loop-reduce -mcpu=haswell -S | FileCheck %s --check-prefix=HSW
; RUN: llc < %s | FileCheck %s --check-prefix=BASE
; RUN: llc < %s -mattr=macrofusion | FileCheck %s --check-prefix=FUSE
; RUN: llc < %s -mattr=branchfusion | FileCheck %s --check-prefix=FUSE
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-unknown"
; PR35681 - https://bugs.llvm.org/show_bug.cgi?id=35681
; FIXME: If a CPU can macro-fuse a compare and branch, then we discount that
; cost in LSR and avoid generating large offsets in each memory access.
; This reduces code size and may improve decode throughput.
define void @maxArray(ptr noalias nocapture %x, ptr noalias nocapture readonly %y) {
; JAG-LABEL: @maxArray(
; JAG-NEXT: entry:
; JAG-NEXT: br label [[VECTOR_BODY:%.*]]
; JAG: vector.body:
; JAG-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[VECTOR_BODY]] ], [ -524288, [[ENTRY:%.*]] ]
; JAG-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[X:%.*]], i64 [[LSR_IV]]
; JAG-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[SCEVGEP2]], i64 524288
; JAG-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[Y:%.*]], i64 [[LSR_IV]]
; JAG-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 524288
; JAG-NEXT: [[XVAL:%.*]] = load <2 x double>, ptr [[SCEVGEP3]], align 8
; JAG-NEXT: [[YVAL:%.*]] = load <2 x double>, ptr [[SCEVGEP1]], align 8
; JAG-NEXT: [[CMP:%.*]] = fcmp ogt <2 x double> [[YVAL]], [[XVAL]]
; JAG-NEXT: [[MAX:%.*]] = select <2 x i1> [[CMP]], <2 x double> [[YVAL]], <2 x double> [[XVAL]]
; JAG-NEXT: store <2 x double> [[MAX]], ptr [[SCEVGEP3]], align 8
; JAG-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], 16
; JAG-NEXT: [[DONE:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
; JAG-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
; JAG: exit:
; JAG-NEXT: ret void
;
; BUL-LABEL: @maxArray(
; BUL-NEXT: entry:
; BUL-NEXT: br label [[VECTOR_BODY:%.*]]
; BUL: vector.body:
; BUL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; BUL-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[INDEX]], 3
; BUL-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[X:%.*]], i64 [[TMP0]]
; BUL-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[INDEX]], 3
; BUL-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[Y:%.*]], i64 [[TMP1]]
; BUL-NEXT: [[XVAL:%.*]] = load <2 x double>, ptr [[SCEVGEP1]], align 8
; BUL-NEXT: [[YVAL:%.*]] = load <2 x double>, ptr [[SCEVGEP]], align 8
; BUL-NEXT: [[CMP:%.*]] = fcmp ogt <2 x double> [[YVAL]], [[XVAL]]
; BUL-NEXT: [[MAX:%.*]] = select <2 x i1> [[CMP]], <2 x double> [[YVAL]], <2 x double> [[XVAL]]
; BUL-NEXT: store <2 x double> [[MAX]], ptr [[SCEVGEP1]], align 8
; BUL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
; BUL-NEXT: [[DONE:%.*]] = icmp eq i64 [[INDEX_NEXT]], 65536
; BUL-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
; BUL: exit:
; BUL-NEXT: ret void
;
; HSW-LABEL: @maxArray(
; HSW-NEXT: entry:
; HSW-NEXT: br label [[VECTOR_BODY:%.*]]
; HSW: vector.body:
; HSW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; HSW-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[INDEX]], 3
; HSW-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[X:%.*]], i64 [[TMP0]]
; HSW-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[INDEX]], 3
; HSW-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[Y:%.*]], i64 [[TMP1]]
; HSW-NEXT: [[XVAL:%.*]] = load <2 x double>, ptr [[SCEVGEP1]], align 8
; HSW-NEXT: [[YVAL:%.*]] = load <2 x double>, ptr [[SCEVGEP]], align 8
; HSW-NEXT: [[CMP:%.*]] = fcmp ogt <2 x double> [[YVAL]], [[XVAL]]
; HSW-NEXT: [[MAX:%.*]] = select <2 x i1> [[CMP]], <2 x double> [[YVAL]], <2 x double> [[XVAL]]
; HSW-NEXT: store <2 x double> [[MAX]], ptr [[SCEVGEP1]], align 8
; HSW-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
; HSW-NEXT: [[DONE:%.*]] = icmp eq i64 [[INDEX_NEXT]], 65536
; HSW-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
; HSW: exit:
; HSW-NEXT: ret void
;
; BASE-LABEL: maxArray:
; BASE: # %bb.0: # %entry
; BASE-NEXT: movq $-524288, %rax # imm = 0xFFF80000
; BASE-NEXT: .p2align 4
; BASE-NEXT: .LBB0_1: # %vector.body
; BASE-NEXT: # =>This Inner Loop Header: Depth=1
; BASE-NEXT: movupd 524288(%rdi,%rax), %xmm0
; BASE-NEXT: movupd 524288(%rsi,%rax), %xmm1
; BASE-NEXT: maxpd %xmm0, %xmm1
; BASE-NEXT: movupd %xmm1, 524288(%rdi,%rax)
; BASE-NEXT: addq $16, %rax
; BASE-NEXT: jne .LBB0_1
; BASE-NEXT: # %bb.2: # %exit
; BASE-NEXT: retq
; FUSE-LABEL: maxArray:
; FUSE: # %bb.0: # %entry
; FUSE-NEXT: xorl %eax, %eax
; FUSE-NEXT: .p2align 4
; FUSE-NEXT: .LBB0_1: # %vector.body
; FUSE-NEXT: # =>This Inner Loop Header: Depth=1
; FUSE-NEXT: movupd (%rdi,%rax,8), %xmm0
; FUSE-NEXT: movupd (%rsi,%rax,8), %xmm1
; FUSE-NEXT: maxpd %xmm0, %xmm1
; FUSE-NEXT: movupd %xmm1, (%rdi,%rax,8)
; FUSE-NEXT: addq $2, %rax
; FUSE-NEXT: cmpq $65536, %rax # imm = 0x10000
; FUSE-NEXT: jne .LBB0_1
; FUSE-NEXT: # %bb.2: # %exit
; FUSE-NEXT: retq
entry:
br label %vector.body
vector.body:
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
%gepx = getelementptr inbounds double, ptr %x, i64 %index
%gepy = getelementptr inbounds double, ptr %y, i64 %index
%xval = load <2 x double>, ptr %gepx, align 8
%yval = load <2 x double>, ptr %gepy, align 8
%cmp = fcmp ogt <2 x double> %yval, %xval
%max = select <2 x i1> %cmp, <2 x double> %yval, <2 x double> %xval
store <2 x double> %max, ptr %gepx, align 8
%index.next = add i64 %index, 2
%done = icmp eq i64 %index.next, 65536
br i1 %done, label %exit, label %vector.body
exit:
ret void
}