Don't overwrite existing target-cpu attributes. I've often found the replacement behavior annoying, and this is inconsistent with how the fast math command line flags interact with the function attributes. Does not yet change target-features, since I think that should behave as a concatenation.
521 lines
21 KiB
LLVM
521 lines
21 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-linux -mcpu=x86-64 -verify-machineinstrs | FileCheck %s -check-prefix=CHECK
|
|
; RUN: llc < %s -mtriple=x86_64-linux -mcpu=x86-64 --x86-disable-avoid-SFB -verify-machineinstrs | FileCheck %s --check-prefix=DISABLED
|
|
; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core-avx2 -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-AVX2
|
|
; RUN: llc < %s -mtriple=x86_64-linux -mcpu=skx -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-AVX512
|
|
|
|
; ModuleID = '../testSFB/testOverlapBlocks.c'
|
|
source_filename = "../testSFB/testOverlapBlocks.c"
|
|
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
|
target triple = "x86_64-unknown-linux-gnu"
|
|
|
|
; Function Attrs: nounwind uwtable
|
|
define dso_local void @test_overlap_1(i8* nocapture %A, i32 %x) local_unnamed_addr #0 {
|
|
; CHECK-LABEL: test_overlap_1:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: movl $7, -8(%rdi)
|
|
; CHECK-NEXT: movq -16(%rdi), %rax
|
|
; CHECK-NEXT: movq %rax, (%rdi)
|
|
; CHECK-NEXT: movl -8(%rdi), %eax
|
|
; CHECK-NEXT: movl %eax, 8(%rdi)
|
|
; CHECK-NEXT: movl -4(%rdi), %eax
|
|
; CHECK-NEXT: movl %eax, 12(%rdi)
|
|
; CHECK-NEXT: movslq %esi, %rax
|
|
; CHECK-NEXT: movq %rax, -9(%rdi)
|
|
; CHECK-NEXT: movq %rax, -16(%rdi)
|
|
; CHECK-NEXT: movb $0, -1(%rdi)
|
|
; CHECK-NEXT: movq -16(%rdi), %rax
|
|
; CHECK-NEXT: movq %rax, 16(%rdi)
|
|
; CHECK-NEXT: movl -8(%rdi), %eax
|
|
; CHECK-NEXT: movl %eax, 24(%rdi)
|
|
; CHECK-NEXT: movzwl -4(%rdi), %eax
|
|
; CHECK-NEXT: movw %ax, 28(%rdi)
|
|
; CHECK-NEXT: movb -2(%rdi), %al
|
|
; CHECK-NEXT: movb %al, 30(%rdi)
|
|
; CHECK-NEXT: movb -1(%rdi), %al
|
|
; CHECK-NEXT: movb %al, 31(%rdi)
|
|
; CHECK-NEXT: retq
|
|
;
|
|
; DISABLED-LABEL: test_overlap_1:
|
|
; DISABLED: # %bb.0: # %entry
|
|
; DISABLED-NEXT: movl $7, -8(%rdi)
|
|
; DISABLED-NEXT: movups -16(%rdi), %xmm0
|
|
; DISABLED-NEXT: movups %xmm0, (%rdi)
|
|
; DISABLED-NEXT: movslq %esi, %rax
|
|
; DISABLED-NEXT: movq %rax, -9(%rdi)
|
|
; DISABLED-NEXT: movq %rax, -16(%rdi)
|
|
; DISABLED-NEXT: movb $0, -1(%rdi)
|
|
; DISABLED-NEXT: movups -16(%rdi), %xmm0
|
|
; DISABLED-NEXT: movups %xmm0, 16(%rdi)
|
|
; DISABLED-NEXT: retq
|
|
;
|
|
; CHECK-AVX2-LABEL: test_overlap_1:
|
|
; CHECK-AVX2: # %bb.0: # %entry
|
|
; CHECK-AVX2-NEXT: movl $7, -8(%rdi)
|
|
; CHECK-AVX2-NEXT: movq -16(%rdi), %rax
|
|
; CHECK-AVX2-NEXT: movq %rax, (%rdi)
|
|
; CHECK-AVX2-NEXT: movl -8(%rdi), %eax
|
|
; CHECK-AVX2-NEXT: movl %eax, 8(%rdi)
|
|
; CHECK-AVX2-NEXT: movl -4(%rdi), %eax
|
|
; CHECK-AVX2-NEXT: movl %eax, 12(%rdi)
|
|
; CHECK-AVX2-NEXT: movslq %esi, %rax
|
|
; CHECK-AVX2-NEXT: movq %rax, -9(%rdi)
|
|
; CHECK-AVX2-NEXT: movq %rax, -16(%rdi)
|
|
; CHECK-AVX2-NEXT: movb $0, -1(%rdi)
|
|
; CHECK-AVX2-NEXT: movq -16(%rdi), %rax
|
|
; CHECK-AVX2-NEXT: movq %rax, 16(%rdi)
|
|
; CHECK-AVX2-NEXT: movl -8(%rdi), %eax
|
|
; CHECK-AVX2-NEXT: movl %eax, 24(%rdi)
|
|
; CHECK-AVX2-NEXT: movzwl -4(%rdi), %eax
|
|
; CHECK-AVX2-NEXT: movw %ax, 28(%rdi)
|
|
; CHECK-AVX2-NEXT: movb -2(%rdi), %al
|
|
; CHECK-AVX2-NEXT: movb %al, 30(%rdi)
|
|
; CHECK-AVX2-NEXT: movb -1(%rdi), %al
|
|
; CHECK-AVX2-NEXT: movb %al, 31(%rdi)
|
|
; CHECK-AVX2-NEXT: retq
|
|
;
|
|
; CHECK-AVX512-LABEL: test_overlap_1:
|
|
; CHECK-AVX512: # %bb.0: # %entry
|
|
; CHECK-AVX512-NEXT: movl $7, -8(%rdi)
|
|
; CHECK-AVX512-NEXT: movq -16(%rdi), %rax
|
|
; CHECK-AVX512-NEXT: movq %rax, (%rdi)
|
|
; CHECK-AVX512-NEXT: movl -8(%rdi), %eax
|
|
; CHECK-AVX512-NEXT: movl %eax, 8(%rdi)
|
|
; CHECK-AVX512-NEXT: movl -4(%rdi), %eax
|
|
; CHECK-AVX512-NEXT: movl %eax, 12(%rdi)
|
|
; CHECK-AVX512-NEXT: movslq %esi, %rax
|
|
; CHECK-AVX512-NEXT: movq %rax, -9(%rdi)
|
|
; CHECK-AVX512-NEXT: movq %rax, -16(%rdi)
|
|
; CHECK-AVX512-NEXT: movb $0, -1(%rdi)
|
|
; CHECK-AVX512-NEXT: movq -16(%rdi), %rax
|
|
; CHECK-AVX512-NEXT: movq %rax, 16(%rdi)
|
|
; CHECK-AVX512-NEXT: movl -8(%rdi), %eax
|
|
; CHECK-AVX512-NEXT: movl %eax, 24(%rdi)
|
|
; CHECK-AVX512-NEXT: movzwl -4(%rdi), %eax
|
|
; CHECK-AVX512-NEXT: movw %ax, 28(%rdi)
|
|
; CHECK-AVX512-NEXT: movb -2(%rdi), %al
|
|
; CHECK-AVX512-NEXT: movb %al, 30(%rdi)
|
|
; CHECK-AVX512-NEXT: movb -1(%rdi), %al
|
|
; CHECK-AVX512-NEXT: movb %al, 31(%rdi)
|
|
; CHECK-AVX512-NEXT: retq
|
|
entry:
|
|
%add.ptr = getelementptr inbounds i8, i8* %A, i64 -16
|
|
%add.ptr1 = getelementptr inbounds i8, i8* %A, i64 -8
|
|
%0 = bitcast i8* %add.ptr1 to i32*
|
|
store i32 7, i32* %0, align 4
|
|
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %A, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
|
|
%conv = sext i32 %x to i64
|
|
%add.ptr2 = getelementptr inbounds i8, i8* %A, i64 -9
|
|
%1 = bitcast i8* %add.ptr2 to i64*
|
|
store i64 %conv, i64* %1, align 8
|
|
%2 = bitcast i8* %add.ptr to i64*
|
|
store i64 %conv, i64* %2, align 8
|
|
%add.ptr5 = getelementptr inbounds i8, i8* %A, i64 -1
|
|
store i8 0, i8* %add.ptr5, align 1
|
|
%add.ptr6 = getelementptr inbounds i8, i8* %A, i64 16
|
|
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr6, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs: argmemonly nounwind
|
|
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1
|
|
|
|
; Function Attrs: nounwind uwtable
|
|
define dso_local void @test_overlap_2(i8* nocapture %A, i32 %x) local_unnamed_addr #0 {
|
|
; CHECK-LABEL: test_overlap_2:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: movslq %esi, %rax
|
|
; CHECK-NEXT: movq %rax, -16(%rdi)
|
|
; CHECK-NEXT: movq -16(%rdi), %rcx
|
|
; CHECK-NEXT: movq %rcx, (%rdi)
|
|
; CHECK-NEXT: movq -8(%rdi), %rcx
|
|
; CHECK-NEXT: movq %rcx, 8(%rdi)
|
|
; CHECK-NEXT: movq %rax, -8(%rdi)
|
|
; CHECK-NEXT: movl $7, -12(%rdi)
|
|
; CHECK-NEXT: movl -16(%rdi), %eax
|
|
; CHECK-NEXT: movl %eax, 16(%rdi)
|
|
; CHECK-NEXT: movl -12(%rdi), %eax
|
|
; CHECK-NEXT: movl %eax, 20(%rdi)
|
|
; CHECK-NEXT: movq -8(%rdi), %rax
|
|
; CHECK-NEXT: movq %rax, 24(%rdi)
|
|
; CHECK-NEXT: retq
|
|
;
|
|
; DISABLED-LABEL: test_overlap_2:
|
|
; DISABLED: # %bb.0: # %entry
|
|
; DISABLED-NEXT: movslq %esi, %rax
|
|
; DISABLED-NEXT: movq %rax, -16(%rdi)
|
|
; DISABLED-NEXT: movups -16(%rdi), %xmm0
|
|
; DISABLED-NEXT: movups %xmm0, (%rdi)
|
|
; DISABLED-NEXT: movq %rax, -8(%rdi)
|
|
; DISABLED-NEXT: movl $7, -12(%rdi)
|
|
; DISABLED-NEXT: movups -16(%rdi), %xmm0
|
|
; DISABLED-NEXT: movups %xmm0, 16(%rdi)
|
|
; DISABLED-NEXT: retq
|
|
;
|
|
; CHECK-AVX2-LABEL: test_overlap_2:
|
|
; CHECK-AVX2: # %bb.0: # %entry
|
|
; CHECK-AVX2-NEXT: movslq %esi, %rax
|
|
; CHECK-AVX2-NEXT: movq %rax, -16(%rdi)
|
|
; CHECK-AVX2-NEXT: movq -16(%rdi), %rcx
|
|
; CHECK-AVX2-NEXT: movq %rcx, (%rdi)
|
|
; CHECK-AVX2-NEXT: movq -8(%rdi), %rcx
|
|
; CHECK-AVX2-NEXT: movq %rcx, 8(%rdi)
|
|
; CHECK-AVX2-NEXT: movq %rax, -8(%rdi)
|
|
; CHECK-AVX2-NEXT: movl $7, -12(%rdi)
|
|
; CHECK-AVX2-NEXT: movl -16(%rdi), %eax
|
|
; CHECK-AVX2-NEXT: movl %eax, 16(%rdi)
|
|
; CHECK-AVX2-NEXT: movl -12(%rdi), %eax
|
|
; CHECK-AVX2-NEXT: movl %eax, 20(%rdi)
|
|
; CHECK-AVX2-NEXT: movq -8(%rdi), %rax
|
|
; CHECK-AVX2-NEXT: movq %rax, 24(%rdi)
|
|
; CHECK-AVX2-NEXT: retq
|
|
;
|
|
; CHECK-AVX512-LABEL: test_overlap_2:
|
|
; CHECK-AVX512: # %bb.0: # %entry
|
|
; CHECK-AVX512-NEXT: movslq %esi, %rax
|
|
; CHECK-AVX512-NEXT: movq %rax, -16(%rdi)
|
|
; CHECK-AVX512-NEXT: movq -16(%rdi), %rcx
|
|
; CHECK-AVX512-NEXT: movq %rcx, (%rdi)
|
|
; CHECK-AVX512-NEXT: movq -8(%rdi), %rcx
|
|
; CHECK-AVX512-NEXT: movq %rcx, 8(%rdi)
|
|
; CHECK-AVX512-NEXT: movq %rax, -8(%rdi)
|
|
; CHECK-AVX512-NEXT: movl $7, -12(%rdi)
|
|
; CHECK-AVX512-NEXT: movl -16(%rdi), %eax
|
|
; CHECK-AVX512-NEXT: movl %eax, 16(%rdi)
|
|
; CHECK-AVX512-NEXT: movl -12(%rdi), %eax
|
|
; CHECK-AVX512-NEXT: movl %eax, 20(%rdi)
|
|
; CHECK-AVX512-NEXT: movq -8(%rdi), %rax
|
|
; CHECK-AVX512-NEXT: movq %rax, 24(%rdi)
|
|
; CHECK-AVX512-NEXT: retq
|
|
entry:
|
|
%add.ptr = getelementptr inbounds i8, i8* %A, i64 -16
|
|
%conv = sext i32 %x to i64
|
|
%0 = bitcast i8* %add.ptr to i64*
|
|
store i64 %conv, i64* %0, align 8
|
|
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %A, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
|
|
%add.ptr3 = getelementptr inbounds i8, i8* %A, i64 -8
|
|
%1 = bitcast i8* %add.ptr3 to i64*
|
|
store i64 %conv, i64* %1, align 8
|
|
%add.ptr4 = getelementptr inbounds i8, i8* %A, i64 -12
|
|
%2 = bitcast i8* %add.ptr4 to i32*
|
|
store i32 7, i32* %2, align 4
|
|
%add.ptr5 = getelementptr inbounds i8, i8* %A, i64 16
|
|
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr5, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs: nounwind uwtable
|
|
define dso_local void @test_overlap_3(i8* nocapture %A, i32 %x) local_unnamed_addr #0 {
|
|
; CHECK-LABEL: test_overlap_3:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: movl $7, -10(%rdi)
|
|
; CHECK-NEXT: movl -16(%rdi), %eax
|
|
; CHECK-NEXT: movl %eax, (%rdi)
|
|
; CHECK-NEXT: movzwl -12(%rdi), %eax
|
|
; CHECK-NEXT: movw %ax, 4(%rdi)
|
|
; CHECK-NEXT: movl -10(%rdi), %eax
|
|
; CHECK-NEXT: movl %eax, 6(%rdi)
|
|
; CHECK-NEXT: movl -6(%rdi), %eax
|
|
; CHECK-NEXT: movl %eax, 10(%rdi)
|
|
; CHECK-NEXT: movzwl -2(%rdi), %eax
|
|
; CHECK-NEXT: movw %ax, 14(%rdi)
|
|
; CHECK-NEXT: movslq %esi, %rax
|
|
; CHECK-NEXT: movq %rax, -9(%rdi)
|
|
; CHECK-NEXT: movq %rax, -16(%rdi)
|
|
; CHECK-NEXT: movb $0, -1(%rdi)
|
|
; CHECK-NEXT: movq -16(%rdi), %rax
|
|
; CHECK-NEXT: movq %rax, 16(%rdi)
|
|
; CHECK-NEXT: movzwl -8(%rdi), %eax
|
|
; CHECK-NEXT: movw %ax, 24(%rdi)
|
|
; CHECK-NEXT: movl -6(%rdi), %eax
|
|
; CHECK-NEXT: movl %eax, 26(%rdi)
|
|
; CHECK-NEXT: movb -2(%rdi), %al
|
|
; CHECK-NEXT: movb %al, 30(%rdi)
|
|
; CHECK-NEXT: movb -1(%rdi), %al
|
|
; CHECK-NEXT: movb %al, 31(%rdi)
|
|
; CHECK-NEXT: retq
|
|
;
|
|
; DISABLED-LABEL: test_overlap_3:
|
|
; DISABLED: # %bb.0: # %entry
|
|
; DISABLED-NEXT: movl $7, -10(%rdi)
|
|
; DISABLED-NEXT: movups -16(%rdi), %xmm0
|
|
; DISABLED-NEXT: movups %xmm0, (%rdi)
|
|
; DISABLED-NEXT: movslq %esi, %rax
|
|
; DISABLED-NEXT: movq %rax, -9(%rdi)
|
|
; DISABLED-NEXT: movq %rax, -16(%rdi)
|
|
; DISABLED-NEXT: movb $0, -1(%rdi)
|
|
; DISABLED-NEXT: movups -16(%rdi), %xmm0
|
|
; DISABLED-NEXT: movups %xmm0, 16(%rdi)
|
|
; DISABLED-NEXT: retq
|
|
;
|
|
; CHECK-AVX2-LABEL: test_overlap_3:
|
|
; CHECK-AVX2: # %bb.0: # %entry
|
|
; CHECK-AVX2-NEXT: movl $7, -10(%rdi)
|
|
; CHECK-AVX2-NEXT: movl -16(%rdi), %eax
|
|
; CHECK-AVX2-NEXT: movl %eax, (%rdi)
|
|
; CHECK-AVX2-NEXT: movzwl -12(%rdi), %eax
|
|
; CHECK-AVX2-NEXT: movw %ax, 4(%rdi)
|
|
; CHECK-AVX2-NEXT: movl -10(%rdi), %eax
|
|
; CHECK-AVX2-NEXT: movl %eax, 6(%rdi)
|
|
; CHECK-AVX2-NEXT: movl -6(%rdi), %eax
|
|
; CHECK-AVX2-NEXT: movl %eax, 10(%rdi)
|
|
; CHECK-AVX2-NEXT: movzwl -2(%rdi), %eax
|
|
; CHECK-AVX2-NEXT: movw %ax, 14(%rdi)
|
|
; CHECK-AVX2-NEXT: movslq %esi, %rax
|
|
; CHECK-AVX2-NEXT: movq %rax, -9(%rdi)
|
|
; CHECK-AVX2-NEXT: movq %rax, -16(%rdi)
|
|
; CHECK-AVX2-NEXT: movb $0, -1(%rdi)
|
|
; CHECK-AVX2-NEXT: movq -16(%rdi), %rax
|
|
; CHECK-AVX2-NEXT: movq %rax, 16(%rdi)
|
|
; CHECK-AVX2-NEXT: movzwl -8(%rdi), %eax
|
|
; CHECK-AVX2-NEXT: movw %ax, 24(%rdi)
|
|
; CHECK-AVX2-NEXT: movl -6(%rdi), %eax
|
|
; CHECK-AVX2-NEXT: movl %eax, 26(%rdi)
|
|
; CHECK-AVX2-NEXT: movb -2(%rdi), %al
|
|
; CHECK-AVX2-NEXT: movb %al, 30(%rdi)
|
|
; CHECK-AVX2-NEXT: movb -1(%rdi), %al
|
|
; CHECK-AVX2-NEXT: movb %al, 31(%rdi)
|
|
; CHECK-AVX2-NEXT: retq
|
|
;
|
|
; CHECK-AVX512-LABEL: test_overlap_3:
|
|
; CHECK-AVX512: # %bb.0: # %entry
|
|
; CHECK-AVX512-NEXT: movl $7, -10(%rdi)
|
|
; CHECK-AVX512-NEXT: movl -16(%rdi), %eax
|
|
; CHECK-AVX512-NEXT: movl %eax, (%rdi)
|
|
; CHECK-AVX512-NEXT: movzwl -12(%rdi), %eax
|
|
; CHECK-AVX512-NEXT: movw %ax, 4(%rdi)
|
|
; CHECK-AVX512-NEXT: movl -10(%rdi), %eax
|
|
; CHECK-AVX512-NEXT: movl %eax, 6(%rdi)
|
|
; CHECK-AVX512-NEXT: movl -6(%rdi), %eax
|
|
; CHECK-AVX512-NEXT: movl %eax, 10(%rdi)
|
|
; CHECK-AVX512-NEXT: movzwl -2(%rdi), %eax
|
|
; CHECK-AVX512-NEXT: movw %ax, 14(%rdi)
|
|
; CHECK-AVX512-NEXT: movslq %esi, %rax
|
|
; CHECK-AVX512-NEXT: movq %rax, -9(%rdi)
|
|
; CHECK-AVX512-NEXT: movq %rax, -16(%rdi)
|
|
; CHECK-AVX512-NEXT: movb $0, -1(%rdi)
|
|
; CHECK-AVX512-NEXT: movq -16(%rdi), %rax
|
|
; CHECK-AVX512-NEXT: movq %rax, 16(%rdi)
|
|
; CHECK-AVX512-NEXT: movzwl -8(%rdi), %eax
|
|
; CHECK-AVX512-NEXT: movw %ax, 24(%rdi)
|
|
; CHECK-AVX512-NEXT: movl -6(%rdi), %eax
|
|
; CHECK-AVX512-NEXT: movl %eax, 26(%rdi)
|
|
; CHECK-AVX512-NEXT: movb -2(%rdi), %al
|
|
; CHECK-AVX512-NEXT: movb %al, 30(%rdi)
|
|
; CHECK-AVX512-NEXT: movb -1(%rdi), %al
|
|
; CHECK-AVX512-NEXT: movb %al, 31(%rdi)
|
|
; CHECK-AVX512-NEXT: retq
|
|
entry:
|
|
%add.ptr = getelementptr inbounds i8, i8* %A, i64 -16
|
|
%add.ptr1 = getelementptr inbounds i8, i8* %A, i64 -10
|
|
%0 = bitcast i8* %add.ptr1 to i32*
|
|
store i32 7, i32* %0, align 4
|
|
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %A, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
|
|
%conv = sext i32 %x to i64
|
|
%add.ptr2 = getelementptr inbounds i8, i8* %A, i64 -9
|
|
%1 = bitcast i8* %add.ptr2 to i64*
|
|
store i64 %conv, i64* %1, align 8
|
|
%2 = bitcast i8* %add.ptr to i64*
|
|
store i64 %conv, i64* %2, align 8
|
|
%add.ptr5 = getelementptr inbounds i8, i8* %A, i64 -1
|
|
store i8 0, i8* %add.ptr5, align 1
|
|
%add.ptr6 = getelementptr inbounds i8, i8* %A, i64 16
|
|
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr6, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs: nounwind uwtable
|
|
define dso_local void @test_overlap_4(i8* nocapture %A, i32 %x) local_unnamed_addr #0 {
|
|
; CHECK-LABEL: test_overlap_4:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: movups -16(%rdi), %xmm0
|
|
; CHECK-NEXT: movups %xmm0, (%rdi)
|
|
; CHECK-NEXT: movslq %esi, %rax
|
|
; CHECK-NEXT: movq %rax, -8(%rdi)
|
|
; CHECK-NEXT: movl %eax, -16(%rdi)
|
|
; CHECK-NEXT: movl $0, -11(%rdi)
|
|
; CHECK-NEXT: movl -16(%rdi), %eax
|
|
; CHECK-NEXT: movl %eax, 16(%rdi)
|
|
; CHECK-NEXT: movb -12(%rdi), %al
|
|
; CHECK-NEXT: movb %al, 20(%rdi)
|
|
; CHECK-NEXT: movl -11(%rdi), %eax
|
|
; CHECK-NEXT: movl %eax, 21(%rdi)
|
|
; CHECK-NEXT: movl -7(%rdi), %eax
|
|
; CHECK-NEXT: movl %eax, 25(%rdi)
|
|
; CHECK-NEXT: movzwl -3(%rdi), %eax
|
|
; CHECK-NEXT: movw %ax, 29(%rdi)
|
|
; CHECK-NEXT: movb -1(%rdi), %al
|
|
; CHECK-NEXT: movb %al, 31(%rdi)
|
|
; CHECK-NEXT: retq
|
|
;
|
|
; DISABLED-LABEL: test_overlap_4:
|
|
; DISABLED: # %bb.0: # %entry
|
|
; DISABLED-NEXT: movups -16(%rdi), %xmm0
|
|
; DISABLED-NEXT: movups %xmm0, (%rdi)
|
|
; DISABLED-NEXT: movslq %esi, %rax
|
|
; DISABLED-NEXT: movq %rax, -8(%rdi)
|
|
; DISABLED-NEXT: movl %eax, -16(%rdi)
|
|
; DISABLED-NEXT: movl $0, -11(%rdi)
|
|
; DISABLED-NEXT: movups -16(%rdi), %xmm0
|
|
; DISABLED-NEXT: movups %xmm0, 16(%rdi)
|
|
; DISABLED-NEXT: retq
|
|
;
|
|
; CHECK-AVX2-LABEL: test_overlap_4:
|
|
; CHECK-AVX2: # %bb.0: # %entry
|
|
; CHECK-AVX2-NEXT: vmovups -16(%rdi), %xmm0
|
|
; CHECK-AVX2-NEXT: vmovups %xmm0, (%rdi)
|
|
; CHECK-AVX2-NEXT: movslq %esi, %rax
|
|
; CHECK-AVX2-NEXT: movq %rax, -8(%rdi)
|
|
; CHECK-AVX2-NEXT: movl %eax, -16(%rdi)
|
|
; CHECK-AVX2-NEXT: movl $0, -11(%rdi)
|
|
; CHECK-AVX2-NEXT: movl -16(%rdi), %eax
|
|
; CHECK-AVX2-NEXT: movl %eax, 16(%rdi)
|
|
; CHECK-AVX2-NEXT: movb -12(%rdi), %al
|
|
; CHECK-AVX2-NEXT: movb %al, 20(%rdi)
|
|
; CHECK-AVX2-NEXT: movl -11(%rdi), %eax
|
|
; CHECK-AVX2-NEXT: movl %eax, 21(%rdi)
|
|
; CHECK-AVX2-NEXT: movl -7(%rdi), %eax
|
|
; CHECK-AVX2-NEXT: movl %eax, 25(%rdi)
|
|
; CHECK-AVX2-NEXT: movzwl -3(%rdi), %eax
|
|
; CHECK-AVX2-NEXT: movw %ax, 29(%rdi)
|
|
; CHECK-AVX2-NEXT: movb -1(%rdi), %al
|
|
; CHECK-AVX2-NEXT: movb %al, 31(%rdi)
|
|
; CHECK-AVX2-NEXT: retq
|
|
;
|
|
; CHECK-AVX512-LABEL: test_overlap_4:
|
|
; CHECK-AVX512: # %bb.0: # %entry
|
|
; CHECK-AVX512-NEXT: vmovups -16(%rdi), %xmm0
|
|
; CHECK-AVX512-NEXT: vmovups %xmm0, (%rdi)
|
|
; CHECK-AVX512-NEXT: movslq %esi, %rax
|
|
; CHECK-AVX512-NEXT: movq %rax, -8(%rdi)
|
|
; CHECK-AVX512-NEXT: movl %eax, -16(%rdi)
|
|
; CHECK-AVX512-NEXT: movl $0, -11(%rdi)
|
|
; CHECK-AVX512-NEXT: movl -16(%rdi), %eax
|
|
; CHECK-AVX512-NEXT: movl %eax, 16(%rdi)
|
|
; CHECK-AVX512-NEXT: movb -12(%rdi), %al
|
|
; CHECK-AVX512-NEXT: movb %al, 20(%rdi)
|
|
; CHECK-AVX512-NEXT: movl -11(%rdi), %eax
|
|
; CHECK-AVX512-NEXT: movl %eax, 21(%rdi)
|
|
; CHECK-AVX512-NEXT: movl -7(%rdi), %eax
|
|
; CHECK-AVX512-NEXT: movl %eax, 25(%rdi)
|
|
; CHECK-AVX512-NEXT: movzwl -3(%rdi), %eax
|
|
; CHECK-AVX512-NEXT: movw %ax, 29(%rdi)
|
|
; CHECK-AVX512-NEXT: movb -1(%rdi), %al
|
|
; CHECK-AVX512-NEXT: movb %al, 31(%rdi)
|
|
; CHECK-AVX512-NEXT: retq
|
|
entry:
|
|
%add.ptr = getelementptr inbounds i8, i8* %A, i64 -16
|
|
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %A, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
|
|
%conv = sext i32 %x to i64
|
|
%add.ptr1 = getelementptr inbounds i8, i8* %A, i64 -8
|
|
%0 = bitcast i8* %add.ptr1 to i64*
|
|
store i64 %conv, i64* %0, align 8
|
|
%1 = bitcast i8* %add.ptr to i32*
|
|
store i32 %x, i32* %1, align 4
|
|
%add.ptr3 = getelementptr inbounds i8, i8* %A, i64 -11
|
|
%2 = bitcast i8* %add.ptr3 to i32*
|
|
store i32 0, i32* %2, align 4
|
|
%add.ptr4 = getelementptr inbounds i8, i8* %A, i64 16
|
|
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr4, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs: nounwind uwtable
|
|
define dso_local void @test_overlap_5(i8* nocapture %A, i32 %x) local_unnamed_addr #0 {
|
|
; CHECK-LABEL: test_overlap_5:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: movups -16(%rdi), %xmm0
|
|
; CHECK-NEXT: movups %xmm0, (%rdi)
|
|
; CHECK-NEXT: movslq %esi, %rax
|
|
; CHECK-NEXT: movq %rax, -16(%rdi)
|
|
; CHECK-NEXT: movb %al, -14(%rdi)
|
|
; CHECK-NEXT: movb $0, -11(%rdi)
|
|
; CHECK-NEXT: movzwl -16(%rdi), %eax
|
|
; CHECK-NEXT: movw %ax, 16(%rdi)
|
|
; CHECK-NEXT: movb -14(%rdi), %al
|
|
; CHECK-NEXT: movb %al, 18(%rdi)
|
|
; CHECK-NEXT: movzwl -13(%rdi), %eax
|
|
; CHECK-NEXT: movw %ax, 19(%rdi)
|
|
; CHECK-NEXT: movb -11(%rdi), %al
|
|
; CHECK-NEXT: movb %al, 21(%rdi)
|
|
; CHECK-NEXT: movq -10(%rdi), %rax
|
|
; CHECK-NEXT: movq %rax, 22(%rdi)
|
|
; CHECK-NEXT: movzwl -2(%rdi), %eax
|
|
; CHECK-NEXT: movw %ax, 30(%rdi)
|
|
; CHECK-NEXT: retq
|
|
;
|
|
; DISABLED-LABEL: test_overlap_5:
|
|
; DISABLED: # %bb.0: # %entry
|
|
; DISABLED-NEXT: movups -16(%rdi), %xmm0
|
|
; DISABLED-NEXT: movups %xmm0, (%rdi)
|
|
; DISABLED-NEXT: movslq %esi, %rax
|
|
; DISABLED-NEXT: movq %rax, -16(%rdi)
|
|
; DISABLED-NEXT: movb %al, -14(%rdi)
|
|
; DISABLED-NEXT: movb $0, -11(%rdi)
|
|
; DISABLED-NEXT: movups -16(%rdi), %xmm0
|
|
; DISABLED-NEXT: movups %xmm0, 16(%rdi)
|
|
; DISABLED-NEXT: retq
|
|
;
|
|
; CHECK-AVX2-LABEL: test_overlap_5:
|
|
; CHECK-AVX2: # %bb.0: # %entry
|
|
; CHECK-AVX2-NEXT: vmovups -16(%rdi), %xmm0
|
|
; CHECK-AVX2-NEXT: vmovups %xmm0, (%rdi)
|
|
; CHECK-AVX2-NEXT: movslq %esi, %rax
|
|
; CHECK-AVX2-NEXT: movq %rax, -16(%rdi)
|
|
; CHECK-AVX2-NEXT: movb %al, -14(%rdi)
|
|
; CHECK-AVX2-NEXT: movb $0, -11(%rdi)
|
|
; CHECK-AVX2-NEXT: movzwl -16(%rdi), %eax
|
|
; CHECK-AVX2-NEXT: movw %ax, 16(%rdi)
|
|
; CHECK-AVX2-NEXT: movb -14(%rdi), %al
|
|
; CHECK-AVX2-NEXT: movb %al, 18(%rdi)
|
|
; CHECK-AVX2-NEXT: movzwl -13(%rdi), %eax
|
|
; CHECK-AVX2-NEXT: movw %ax, 19(%rdi)
|
|
; CHECK-AVX2-NEXT: movb -11(%rdi), %al
|
|
; CHECK-AVX2-NEXT: movb %al, 21(%rdi)
|
|
; CHECK-AVX2-NEXT: movq -10(%rdi), %rax
|
|
; CHECK-AVX2-NEXT: movq %rax, 22(%rdi)
|
|
; CHECK-AVX2-NEXT: movzwl -2(%rdi), %eax
|
|
; CHECK-AVX2-NEXT: movw %ax, 30(%rdi)
|
|
; CHECK-AVX2-NEXT: retq
|
|
;
|
|
; CHECK-AVX512-LABEL: test_overlap_5:
|
|
; CHECK-AVX512: # %bb.0: # %entry
|
|
; CHECK-AVX512-NEXT: vmovups -16(%rdi), %xmm0
|
|
; CHECK-AVX512-NEXT: vmovups %xmm0, (%rdi)
|
|
; CHECK-AVX512-NEXT: movslq %esi, %rax
|
|
; CHECK-AVX512-NEXT: movq %rax, -16(%rdi)
|
|
; CHECK-AVX512-NEXT: movb %al, -14(%rdi)
|
|
; CHECK-AVX512-NEXT: movb $0, -11(%rdi)
|
|
; CHECK-AVX512-NEXT: movzwl -16(%rdi), %eax
|
|
; CHECK-AVX512-NEXT: movw %ax, 16(%rdi)
|
|
; CHECK-AVX512-NEXT: movb -14(%rdi), %al
|
|
; CHECK-AVX512-NEXT: movb %al, 18(%rdi)
|
|
; CHECK-AVX512-NEXT: movzwl -13(%rdi), %eax
|
|
; CHECK-AVX512-NEXT: movw %ax, 19(%rdi)
|
|
; CHECK-AVX512-NEXT: movb -11(%rdi), %al
|
|
; CHECK-AVX512-NEXT: movb %al, 21(%rdi)
|
|
; CHECK-AVX512-NEXT: movq -10(%rdi), %rax
|
|
; CHECK-AVX512-NEXT: movq %rax, 22(%rdi)
|
|
; CHECK-AVX512-NEXT: movzwl -2(%rdi), %eax
|
|
; CHECK-AVX512-NEXT: movw %ax, 30(%rdi)
|
|
; CHECK-AVX512-NEXT: retq
|
|
entry:
|
|
%add.ptr = getelementptr inbounds i8, i8* %A, i64 -16
|
|
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %A, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
|
|
%conv = sext i32 %x to i64
|
|
%0 = bitcast i8* %add.ptr to i64*
|
|
store i64 %conv, i64* %0, align 8
|
|
%conv2 = trunc i32 %x to i8
|
|
%add.ptr3 = getelementptr inbounds i8, i8* %A, i64 -14
|
|
store i8 %conv2, i8* %add.ptr3, align 1
|
|
%add.ptr4 = getelementptr inbounds i8, i8* %A, i64 -11
|
|
store i8 0, i8* %add.ptr4, align 1
|
|
%add.ptr5 = getelementptr inbounds i8, i8* %A, i64 16
|
|
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr5, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
|
|
|
attributes #1 = { argmemonly nounwind }
|