By default, EmitCmp avoids cmpw with i16 immediates due to 66/67h length-changing prefixes causing stalls, instead extending the value to i32 and using a cmpl with a i32 immediate, unless it has the TuningFastImm16 flag or we're building for optsize/minsize. However, if we're loading the value for comparison, the performance costs of the decode stalls are likely to be exceeded by the impact of the load latency of the folded load, the shorter encoding and not needing an extra register to store the ext-load. This matches the behaviour of gcc and msvc. Fixes #90355
613 lines
19 KiB
LLVM
613 lines
19 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64,X64-SSE2
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2
|
|
|
|
; This tests codegen time inlining/optimization of memcmp
|
|
; rdar://6480398
|
|
|
|
@.str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1
|
|
|
|
declare dso_local i32 @memcmp(ptr, ptr, i64)
|
|
declare dso_local i32 @bcmp(ptr, ptr, i64)
|
|
|
|
define i32 @length2(ptr %X, ptr %Y) nounwind !prof !14 {
|
|
; X64-LABEL: length2:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movzwl (%rdi), %eax
|
|
; X64-NEXT: movzwl (%rsi), %ecx
|
|
; X64-NEXT: rolw $8, %ax
|
|
; X64-NEXT: rolw $8, %cx
|
|
; X64-NEXT: movzwl %ax, %eax
|
|
; X64-NEXT: movzwl %cx, %ecx
|
|
; X64-NEXT: subl %ecx, %eax
|
|
; X64-NEXT: retq
|
|
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
|
|
ret i32 %m
|
|
}
|
|
|
|
define i1 @length2_eq(ptr %X, ptr %Y) nounwind !prof !14 {
|
|
; X64-LABEL: length2_eq:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movzwl (%rdi), %eax
|
|
; X64-NEXT: cmpw (%rsi), %ax
|
|
; X64-NEXT: sete %al
|
|
; X64-NEXT: retq
|
|
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
|
|
%c = icmp eq i32 %m, 0
|
|
ret i1 %c
|
|
}
|
|
|
|
define i1 @length2_eq_const(ptr %X) nounwind !prof !14 {
|
|
; X64-LABEL: length2_eq_const:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: cmpw $12849, (%rdi) # imm = 0x3231
|
|
; X64-NEXT: setne %al
|
|
; X64-NEXT: retq
|
|
%m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind
|
|
%c = icmp ne i32 %m, 0
|
|
ret i1 %c
|
|
}
|
|
|
|
define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind !prof !14 {
|
|
; X64-LABEL: length2_eq_nobuiltin_attr:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: pushq %rax
|
|
; X64-NEXT: movl $2, %edx
|
|
; X64-NEXT: callq memcmp
|
|
; X64-NEXT: testl %eax, %eax
|
|
; X64-NEXT: sete %al
|
|
; X64-NEXT: popq %rcx
|
|
; X64-NEXT: retq
|
|
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind nobuiltin
|
|
%c = icmp eq i32 %m, 0
|
|
ret i1 %c
|
|
}
|
|
|
|
define i32 @length3(ptr %X, ptr %Y) nounwind !prof !14 {
|
|
; X64-LABEL: length3:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movzwl (%rdi), %ecx
|
|
; X64-NEXT: movzwl (%rsi), %edx
|
|
; X64-NEXT: rolw $8, %cx
|
|
; X64-NEXT: rolw $8, %dx
|
|
; X64-NEXT: cmpw %dx, %cx
|
|
; X64-NEXT: jne .LBB4_3
|
|
; X64-NEXT: # %bb.1: # %loadbb1
|
|
; X64-NEXT: movzbl 2(%rdi), %eax
|
|
; X64-NEXT: movzbl 2(%rsi), %ecx
|
|
; X64-NEXT: subl %ecx, %eax
|
|
; X64-NEXT: retq
|
|
; X64-NEXT: .LBB4_3: # %res_block
|
|
; X64-NEXT: xorl %eax, %eax
|
|
; X64-NEXT: cmpw %dx, %cx
|
|
; X64-NEXT: sbbl %eax, %eax
|
|
; X64-NEXT: orl $1, %eax
|
|
; X64-NEXT: retq
|
|
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind
|
|
ret i32 %m
|
|
}
|
|
|
|
define i1 @length3_eq(ptr %X, ptr %Y) nounwind !prof !14 {
|
|
; X64-LABEL: length3_eq:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movzwl (%rdi), %eax
|
|
; X64-NEXT: xorw (%rsi), %ax
|
|
; X64-NEXT: movb 2(%rdi), %cl
|
|
; X64-NEXT: xorb 2(%rsi), %cl
|
|
; X64-NEXT: movzbl %cl, %ecx
|
|
; X64-NEXT: orw %ax, %cx
|
|
; X64-NEXT: setne %al
|
|
; X64-NEXT: retq
|
|
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind
|
|
%c = icmp ne i32 %m, 0
|
|
ret i1 %c
|
|
}
|
|
|
|
define i32 @length4(ptr %X, ptr %Y) nounwind !prof !14 {
|
|
; X64-LABEL: length4:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movl (%rdi), %ecx
|
|
; X64-NEXT: movl (%rsi), %edx
|
|
; X64-NEXT: bswapl %ecx
|
|
; X64-NEXT: bswapl %edx
|
|
; X64-NEXT: xorl %eax, %eax
|
|
; X64-NEXT: cmpl %edx, %ecx
|
|
; X64-NEXT: seta %al
|
|
; X64-NEXT: sbbl $0, %eax
|
|
; X64-NEXT: retq
|
|
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
|
|
ret i32 %m
|
|
}
|
|
|
|
define i1 @length4_eq(ptr %X, ptr %Y) nounwind !prof !14 {
|
|
; X64-LABEL: length4_eq:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movl (%rdi), %eax
|
|
; X64-NEXT: cmpl (%rsi), %eax
|
|
; X64-NEXT: setne %al
|
|
; X64-NEXT: retq
|
|
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
|
|
%c = icmp ne i32 %m, 0
|
|
ret i1 %c
|
|
}
|
|
|
|
define i1 @length4_eq_const(ptr %X) nounwind !prof !14 {
|
|
; X64-LABEL: length4_eq_const:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: cmpl $875770417, (%rdi) # imm = 0x34333231
|
|
; X64-NEXT: sete %al
|
|
; X64-NEXT: retq
|
|
%m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 4) nounwind
|
|
%c = icmp eq i32 %m, 0
|
|
ret i1 %c
|
|
}
|
|
|
|
define i32 @length5(ptr %X, ptr %Y) nounwind !prof !14 {
|
|
; X64-LABEL: length5:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movl (%rdi), %ecx
|
|
; X64-NEXT: movl (%rsi), %edx
|
|
; X64-NEXT: bswapl %ecx
|
|
; X64-NEXT: bswapl %edx
|
|
; X64-NEXT: cmpl %edx, %ecx
|
|
; X64-NEXT: jne .LBB9_3
|
|
; X64-NEXT: # %bb.1: # %loadbb1
|
|
; X64-NEXT: movzbl 4(%rdi), %eax
|
|
; X64-NEXT: movzbl 4(%rsi), %ecx
|
|
; X64-NEXT: subl %ecx, %eax
|
|
; X64-NEXT: retq
|
|
; X64-NEXT: .LBB9_3: # %res_block
|
|
; X64-NEXT: xorl %eax, %eax
|
|
; X64-NEXT: cmpl %edx, %ecx
|
|
; X64-NEXT: sbbl %eax, %eax
|
|
; X64-NEXT: orl $1, %eax
|
|
; X64-NEXT: retq
|
|
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
|
|
ret i32 %m
|
|
}
|
|
|
|
define i1 @length5_eq(ptr %X, ptr %Y) nounwind !prof !14 {
|
|
; X64-LABEL: length5_eq:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movl (%rdi), %eax
|
|
; X64-NEXT: xorl (%rsi), %eax
|
|
; X64-NEXT: movb 4(%rdi), %cl
|
|
; X64-NEXT: xorb 4(%rsi), %cl
|
|
; X64-NEXT: movzbl %cl, %ecx
|
|
; X64-NEXT: orl %eax, %ecx
|
|
; X64-NEXT: setne %al
|
|
; X64-NEXT: retq
|
|
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
|
|
%c = icmp ne i32 %m, 0
|
|
ret i1 %c
|
|
}
|
|
|
|
define i32 @length8(ptr %X, ptr %Y) nounwind !prof !14 {
|
|
; X64-LABEL: length8:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movq (%rdi), %rcx
|
|
; X64-NEXT: movq (%rsi), %rdx
|
|
; X64-NEXT: bswapq %rcx
|
|
; X64-NEXT: bswapq %rdx
|
|
; X64-NEXT: xorl %eax, %eax
|
|
; X64-NEXT: cmpq %rdx, %rcx
|
|
; X64-NEXT: seta %al
|
|
; X64-NEXT: sbbl $0, %eax
|
|
; X64-NEXT: retq
|
|
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind
|
|
ret i32 %m
|
|
}
|
|
|
|
define i1 @length8_eq(ptr %X, ptr %Y) nounwind !prof !14 {
|
|
; X64-LABEL: length8_eq:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movq (%rdi), %rax
|
|
; X64-NEXT: cmpq (%rsi), %rax
|
|
; X64-NEXT: sete %al
|
|
; X64-NEXT: retq
|
|
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind
|
|
%c = icmp eq i32 %m, 0
|
|
ret i1 %c
|
|
}
|
|
|
|
define i1 @length8_eq_const(ptr %X) nounwind !prof !14 {
|
|
; X64-LABEL: length8_eq_const:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movabsq $3978425819141910832, %rax # imm = 0x3736353433323130
|
|
; X64-NEXT: cmpq %rax, (%rdi)
|
|
; X64-NEXT: setne %al
|
|
; X64-NEXT: retq
|
|
%m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 8) nounwind
|
|
%c = icmp ne i32 %m, 0
|
|
ret i1 %c
|
|
}
|
|
|
|
define i1 @length12_eq(ptr %X, ptr %Y) nounwind !prof !14 {
|
|
; X64-LABEL: length12_eq:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movq (%rdi), %rax
|
|
; X64-NEXT: xorq (%rsi), %rax
|
|
; X64-NEXT: movl 8(%rdi), %ecx
|
|
; X64-NEXT: xorl 8(%rsi), %ecx
|
|
; X64-NEXT: orq %rax, %rcx
|
|
; X64-NEXT: setne %al
|
|
; X64-NEXT: retq
|
|
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind
|
|
%c = icmp ne i32 %m, 0
|
|
ret i1 %c
|
|
}
|
|
|
|
define i32 @length12(ptr %X, ptr %Y) nounwind !prof !14 {
|
|
; X64-LABEL: length12:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movq (%rdi), %rcx
|
|
; X64-NEXT: movq (%rsi), %rdx
|
|
; X64-NEXT: bswapq %rcx
|
|
; X64-NEXT: bswapq %rdx
|
|
; X64-NEXT: cmpq %rdx, %rcx
|
|
; X64-NEXT: jne .LBB15_2
|
|
; X64-NEXT: # %bb.1: # %loadbb1
|
|
; X64-NEXT: movl 8(%rdi), %ecx
|
|
; X64-NEXT: movl 8(%rsi), %edx
|
|
; X64-NEXT: bswapl %ecx
|
|
; X64-NEXT: bswapl %edx
|
|
; X64-NEXT: xorl %eax, %eax
|
|
; X64-NEXT: cmpq %rdx, %rcx
|
|
; X64-NEXT: je .LBB15_3
|
|
; X64-NEXT: .LBB15_2: # %res_block
|
|
; X64-NEXT: xorl %eax, %eax
|
|
; X64-NEXT: cmpq %rdx, %rcx
|
|
; X64-NEXT: sbbl %eax, %eax
|
|
; X64-NEXT: orl $1, %eax
|
|
; X64-NEXT: .LBB15_3: # %endblock
|
|
; X64-NEXT: retq
|
|
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind
|
|
ret i32 %m
|
|
}
|
|
|
|
; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329
|
|
|
|
define i32 @length16(ptr %X, ptr %Y) nounwind !prof !14 {
|
|
; X64-LABEL: length16:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movq (%rdi), %rcx
|
|
; X64-NEXT: movq (%rsi), %rdx
|
|
; X64-NEXT: bswapq %rcx
|
|
; X64-NEXT: bswapq %rdx
|
|
; X64-NEXT: cmpq %rdx, %rcx
|
|
; X64-NEXT: jne .LBB16_2
|
|
; X64-NEXT: # %bb.1: # %loadbb1
|
|
; X64-NEXT: movq 8(%rdi), %rcx
|
|
; X64-NEXT: movq 8(%rsi), %rdx
|
|
; X64-NEXT: bswapq %rcx
|
|
; X64-NEXT: bswapq %rdx
|
|
; X64-NEXT: xorl %eax, %eax
|
|
; X64-NEXT: cmpq %rdx, %rcx
|
|
; X64-NEXT: je .LBB16_3
|
|
; X64-NEXT: .LBB16_2: # %res_block
|
|
; X64-NEXT: xorl %eax, %eax
|
|
; X64-NEXT: cmpq %rdx, %rcx
|
|
; X64-NEXT: sbbl %eax, %eax
|
|
; X64-NEXT: orl $1, %eax
|
|
; X64-NEXT: .LBB16_3: # %endblock
|
|
; X64-NEXT: retq
|
|
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 16) nounwind
|
|
ret i32 %m
|
|
}
|
|
|
|
define i1 @length16_eq(ptr %x, ptr %y) nounwind !prof !14 {
|
|
; X64-SSE2-LABEL: length16_eq:
|
|
; X64-SSE2: # %bb.0:
|
|
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
|
|
; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
|
|
; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
|
|
; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
|
|
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
|
|
; X64-SSE2-NEXT: setne %al
|
|
; X64-SSE2-NEXT: retq
|
|
;
|
|
; X64-AVX-LABEL: length16_eq:
|
|
; X64-AVX: # %bb.0:
|
|
; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
|
|
; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
|
|
; X64-AVX-NEXT: vptest %xmm0, %xmm0
|
|
; X64-AVX-NEXT: setne %al
|
|
; X64-AVX-NEXT: retq
|
|
%call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind
|
|
%cmp = icmp ne i32 %call, 0
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @length16_eq_const(ptr %X) nounwind !prof !14 {
|
|
; X64-SSE2-LABEL: length16_eq_const:
|
|
; X64-SSE2: # %bb.0:
|
|
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
|
|
; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
|
; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
|
|
; X64-SSE2-NEXT: sete %al
|
|
; X64-SSE2-NEXT: retq
|
|
;
|
|
; X64-AVX-LABEL: length16_eq_const:
|
|
; X64-AVX: # %bb.0:
|
|
; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
|
|
; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
|
; X64-AVX-NEXT: vptest %xmm0, %xmm0
|
|
; X64-AVX-NEXT: sete %al
|
|
; X64-AVX-NEXT: retq
|
|
%m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 16) nounwind
|
|
%c = icmp eq i32 %m, 0
|
|
ret i1 %c
|
|
}
|
|
|
|
; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914
|
|
|
|
define i32 @length24(ptr %X, ptr %Y) nounwind !prof !14 {
|
|
; X64-LABEL: length24:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movl $24, %edx
|
|
; X64-NEXT: jmp memcmp # TAILCALL
|
|
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 24) nounwind
|
|
ret i32 %m
|
|
}
|
|
|
|
define i1 @length24_eq(ptr %x, ptr %y) nounwind !prof !14 {
|
|
; X64-SSE2-LABEL: length24_eq:
|
|
; X64-SSE2: # %bb.0:
|
|
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
|
|
; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
|
|
; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
|
|
; X64-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
|
|
; X64-SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
|
|
; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
|
|
; X64-SSE2-NEXT: pand %xmm1, %xmm2
|
|
; X64-SSE2-NEXT: pmovmskb %xmm2, %eax
|
|
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
|
|
; X64-SSE2-NEXT: sete %al
|
|
; X64-SSE2-NEXT: retq
|
|
;
|
|
; X64-AVX-LABEL: length24_eq:
|
|
; X64-AVX: # %bb.0:
|
|
; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
|
|
; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
|
; X64-AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
|
|
; X64-AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
|
|
; X64-AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
|
|
; X64-AVX-NEXT: vptest %xmm0, %xmm0
|
|
; X64-AVX-NEXT: sete %al
|
|
; X64-AVX-NEXT: retq
|
|
%call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind
|
|
%cmp = icmp eq i32 %call, 0
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @length24_eq_const(ptr %X) nounwind !prof !14 {
|
|
; X64-SSE2-LABEL: length24_eq_const:
|
|
; X64-SSE2: # %bb.0:
|
|
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
|
|
; X64-SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
|
|
; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
|
; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
|
; X64-SSE2-NEXT: pand %xmm1, %xmm0
|
|
; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
|
|
; X64-SSE2-NEXT: setne %al
|
|
; X64-SSE2-NEXT: retq
|
|
;
|
|
; X64-AVX-LABEL: length24_eq_const:
|
|
; X64-AVX: # %bb.0:
|
|
; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
|
|
; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
|
; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
|
; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
|
; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
|
|
; X64-AVX-NEXT: vptest %xmm0, %xmm0
|
|
; X64-AVX-NEXT: setne %al
|
|
; X64-AVX-NEXT: retq
|
|
%m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 24) nounwind
|
|
%c = icmp ne i32 %m, 0
|
|
ret i1 %c
|
|
}
|
|
|
|
define i32 @length32(ptr %X, ptr %Y) nounwind !prof !14 {
|
|
; X64-LABEL: length32:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movl $32, %edx
|
|
; X64-NEXT: jmp memcmp # TAILCALL
|
|
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 32) nounwind
|
|
ret i32 %m
|
|
}
|
|
|
|
; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
|
|
|
|
define i1 @length32_eq(ptr %x, ptr %y) nounwind !prof !14 {
|
|
; X64-SSE2-LABEL: length32_eq:
|
|
; X64-SSE2: # %bb.0:
|
|
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
|
|
; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
|
|
; X64-SSE2-NEXT: movdqu (%rsi), %xmm2
|
|
; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
|
|
; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0
|
|
; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
|
|
; X64-SSE2-NEXT: pand %xmm2, %xmm0
|
|
; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
|
|
; X64-SSE2-NEXT: sete %al
|
|
; X64-SSE2-NEXT: retq
|
|
;
|
|
; X64-AVX1-LABEL: length32_eq:
|
|
; X64-AVX1: # %bb.0:
|
|
; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
|
|
; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0
|
|
; X64-AVX1-NEXT: vptest %ymm0, %ymm0
|
|
; X64-AVX1-NEXT: sete %al
|
|
; X64-AVX1-NEXT: vzeroupper
|
|
; X64-AVX1-NEXT: retq
|
|
;
|
|
; X64-AVX2-LABEL: length32_eq:
|
|
; X64-AVX2: # %bb.0:
|
|
; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
|
|
; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0
|
|
; X64-AVX2-NEXT: vptest %ymm0, %ymm0
|
|
; X64-AVX2-NEXT: sete %al
|
|
; X64-AVX2-NEXT: vzeroupper
|
|
; X64-AVX2-NEXT: retq
|
|
%call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind
|
|
%cmp = icmp eq i32 %call, 0
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @length32_eq_const(ptr %X) nounwind !prof !14 {
|
|
; X64-SSE2-LABEL: length32_eq_const:
|
|
; X64-SSE2: # %bb.0:
|
|
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
|
|
; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
|
|
; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
|
; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
|
; X64-SSE2-NEXT: pand %xmm1, %xmm0
|
|
; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
|
|
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
|
|
; X64-SSE2-NEXT: setne %al
|
|
; X64-SSE2-NEXT: retq
|
|
;
|
|
; X64-AVX1-LABEL: length32_eq_const:
|
|
; X64-AVX1: # %bb.0:
|
|
; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
|
|
; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
|
; X64-AVX1-NEXT: vptest %ymm0, %ymm0
|
|
; X64-AVX1-NEXT: setne %al
|
|
; X64-AVX1-NEXT: vzeroupper
|
|
; X64-AVX1-NEXT: retq
|
|
;
|
|
; X64-AVX2-LABEL: length32_eq_const:
|
|
; X64-AVX2: # %bb.0:
|
|
; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
|
|
; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
|
; X64-AVX2-NEXT: vptest %ymm0, %ymm0
|
|
; X64-AVX2-NEXT: setne %al
|
|
; X64-AVX2-NEXT: vzeroupper
|
|
; X64-AVX2-NEXT: retq
|
|
%m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 32) nounwind
|
|
%c = icmp ne i32 %m, 0
|
|
ret i1 %c
|
|
}
|
|
|
|
define i32 @length64(ptr %X, ptr %Y) nounwind !prof !14 {
|
|
; X64-LABEL: length64:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movl $64, %edx
|
|
; X64-NEXT: jmp memcmp # TAILCALL
|
|
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 64) nounwind
|
|
ret i32 %m
|
|
}
|
|
|
|
define i1 @length64_eq(ptr %x, ptr %y) nounwind !prof !14 {
|
|
; X64-SSE2-LABEL: length64_eq:
|
|
; X64-SSE2: # %bb.0:
|
|
; X64-SSE2-NEXT: pushq %rax
|
|
; X64-SSE2-NEXT: movl $64, %edx
|
|
; X64-SSE2-NEXT: callq memcmp
|
|
; X64-SSE2-NEXT: testl %eax, %eax
|
|
; X64-SSE2-NEXT: setne %al
|
|
; X64-SSE2-NEXT: popq %rcx
|
|
; X64-SSE2-NEXT: retq
|
|
;
|
|
; X64-AVX1-LABEL: length64_eq:
|
|
; X64-AVX1: # %bb.0:
|
|
; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
|
|
; X64-AVX1-NEXT: vmovups 32(%rdi), %ymm1
|
|
; X64-AVX1-NEXT: vxorps 32(%rsi), %ymm1, %ymm1
|
|
; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0
|
|
; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
|
|
; X64-AVX1-NEXT: vptest %ymm0, %ymm0
|
|
; X64-AVX1-NEXT: setne %al
|
|
; X64-AVX1-NEXT: vzeroupper
|
|
; X64-AVX1-NEXT: retq
|
|
;
|
|
; X64-AVX2-LABEL: length64_eq:
|
|
; X64-AVX2: # %bb.0:
|
|
; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
|
|
; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
|
|
; X64-AVX2-NEXT: vpxor 32(%rsi), %ymm1, %ymm1
|
|
; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0
|
|
; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
|
|
; X64-AVX2-NEXT: vptest %ymm0, %ymm0
|
|
; X64-AVX2-NEXT: setne %al
|
|
; X64-AVX2-NEXT: vzeroupper
|
|
; X64-AVX2-NEXT: retq
|
|
%call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind
|
|
%cmp = icmp ne i32 %call, 0
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @length64_eq_const(ptr %X) nounwind !prof !14 {
|
|
; X64-SSE2-LABEL: length64_eq_const:
|
|
; X64-SSE2: # %bb.0:
|
|
; X64-SSE2-NEXT: pushq %rax
|
|
; X64-SSE2-NEXT: movl $.L.str, %esi
|
|
; X64-SSE2-NEXT: movl $64, %edx
|
|
; X64-SSE2-NEXT: callq memcmp
|
|
; X64-SSE2-NEXT: testl %eax, %eax
|
|
; X64-SSE2-NEXT: sete %al
|
|
; X64-SSE2-NEXT: popq %rcx
|
|
; X64-SSE2-NEXT: retq
|
|
;
|
|
; X64-AVX1-LABEL: length64_eq_const:
|
|
; X64-AVX1: # %bb.0:
|
|
; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
|
|
; X64-AVX1-NEXT: vmovups 32(%rdi), %ymm1
|
|
; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
|
|
; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
|
; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
|
|
; X64-AVX1-NEXT: vptest %ymm0, %ymm0
|
|
; X64-AVX1-NEXT: sete %al
|
|
; X64-AVX1-NEXT: vzeroupper
|
|
; X64-AVX1-NEXT: retq
|
|
;
|
|
; X64-AVX2-LABEL: length64_eq_const:
|
|
; X64-AVX2: # %bb.0:
|
|
; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
|
|
; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
|
|
; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
|
|
; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
|
; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
|
|
; X64-AVX2-NEXT: vptest %ymm0, %ymm0
|
|
; X64-AVX2-NEXT: sete %al
|
|
; X64-AVX2-NEXT: vzeroupper
|
|
; X64-AVX2-NEXT: retq
|
|
%m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 64) nounwind
|
|
%c = icmp eq i32 %m, 0
|
|
ret i1 %c
|
|
}
|
|
|
|
define i32 @bcmp_length2(ptr %X, ptr %Y) nounwind !prof !14 {
|
|
; X64-LABEL: bcmp_length2:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movzwl (%rdi), %ecx
|
|
; X64-NEXT: xorl %eax, %eax
|
|
; X64-NEXT: cmpw (%rsi), %cx
|
|
; X64-NEXT: setne %al
|
|
; X64-NEXT: retq
|
|
%m = tail call i32 @bcmp(ptr %X, ptr %Y, i64 2) nounwind
|
|
ret i32 %m
|
|
}
|
|
|
|
!llvm.module.flags = !{!0}
|
|
!0 = !{i32 1, !"ProfileSummary", !1}
|
|
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
|
!2 = !{!"ProfileFormat", !"InstrProf"}
|
|
!3 = !{!"TotalCount", i64 10000}
|
|
!4 = !{!"MaxCount", i64 10}
|
|
!5 = !{!"MaxInternalCount", i64 1}
|
|
!6 = !{!"MaxFunctionCount", i64 1000}
|
|
!7 = !{!"NumCounts", i64 3}
|
|
!8 = !{!"NumFunctions", i64 3}
|
|
!9 = !{!"DetailedSummary", !10}
|
|
!10 = !{!11, !12, !13}
|
|
!11 = !{i32 10000, i64 100, i32 1}
|
|
!12 = !{i32 999000, i64 100, i32 1}
|
|
!13 = !{i32 999999, i64 1, i32 2}
|
|
!14 = !{!"function_entry_count", i64 0}
|