Files
clang-p2996/llvm/test/CodeGen/X86/memcmp-pgso.ll
Simon Pilgrim e2d74a25eb [X86] EmitCmp - always use cmpw with foldable loads (#92251)
By default, EmitCmp avoids cmpw with i16 immediates due to 66/67h length-changing prefixes causing stalls, instead extending the value to i32 and using a cmpl with a i32 immediate, unless it has the TuningFastImm16 flag or we're building for optsize/minsize.

However, if we're loading the value for comparison, the performance costs of the decode stalls are likely to be exceeded by the impact of the load latency of the folded load, the shorter encoding and not needing an extra register to store the ext-load.

This matches the behaviour of gcc and msvc.

Fixes #90355
2024-05-15 17:46:49 +01:00

613 lines
19 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64,X64-SSE2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2
; This tests codegen time inlining/optimization of memcmp
; rdar://6480398
@.str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1
declare dso_local i32 @memcmp(ptr, ptr, i64)
declare dso_local i32 @bcmp(ptr, ptr, i64)
define i32 @length2(ptr %X, ptr %Y) nounwind !prof !14 {
; X64-LABEL: length2:
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
; X64-NEXT: rolw $8, %ax
; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %ax, %eax
; X64-NEXT: movzwl %cx, %ecx
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
ret i32 %m
}
define i1 @length2_eq(ptr %X, ptr %Y) nounwind !prof !14 {
; X64-LABEL: length2_eq:
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: cmpw (%rsi), %ax
; X64-NEXT: sete %al
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind
%c = icmp eq i32 %m, 0
ret i1 %c
}
define i1 @length2_eq_const(ptr %X) nounwind !prof !14 {
; X64-LABEL: length2_eq_const:
; X64: # %bb.0:
; X64-NEXT: cmpw $12849, (%rdi) # imm = 0x3231
; X64-NEXT: setne %al
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind
%c = icmp ne i32 %m, 0
ret i1 %c
}
define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind !prof !14 {
; X64-LABEL: length2_eq_nobuiltin_attr:
; X64: # %bb.0:
; X64-NEXT: pushq %rax
; X64-NEXT: movl $2, %edx
; X64-NEXT: callq memcmp
; X64-NEXT: testl %eax, %eax
; X64-NEXT: sete %al
; X64-NEXT: popq %rcx
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind nobuiltin
%c = icmp eq i32 %m, 0
ret i1 %c
}
define i32 @length3(ptr %X, ptr %Y) nounwind !prof !14 {
; X64-LABEL: length3:
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %ecx
; X64-NEXT: movzwl (%rsi), %edx
; X64-NEXT: rolw $8, %cx
; X64-NEXT: rolw $8, %dx
; X64-NEXT: cmpw %dx, %cx
; X64-NEXT: jne .LBB4_3
; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movzbl 2(%rdi), %eax
; X64-NEXT: movzbl 2(%rsi), %ecx
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq
; X64-NEXT: .LBB4_3: # %res_block
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpw %dx, %cx
; X64-NEXT: sbbl %eax, %eax
; X64-NEXT: orl $1, %eax
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind
ret i32 %m
}
define i1 @length3_eq(ptr %X, ptr %Y) nounwind !prof !14 {
; X64-LABEL: length3_eq:
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: xorw (%rsi), %ax
; X64-NEXT: movb 2(%rdi), %cl
; X64-NEXT: xorb 2(%rsi), %cl
; X64-NEXT: movzbl %cl, %ecx
; X64-NEXT: orw %ax, %cx
; X64-NEXT: setne %al
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind
%c = icmp ne i32 %m, 0
ret i1 %c
}
define i32 @length4(ptr %X, ptr %Y) nounwind !prof !14 {
; X64-LABEL: length4:
; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %ecx
; X64-NEXT: movl (%rsi), %edx
; X64-NEXT: bswapl %ecx
; X64-NEXT: bswapl %edx
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpl %edx, %ecx
; X64-NEXT: seta %al
; X64-NEXT: sbbl $0, %eax
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
ret i32 %m
}
define i1 @length4_eq(ptr %X, ptr %Y) nounwind !prof !14 {
; X64-LABEL: length4_eq:
; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: cmpl (%rsi), %eax
; X64-NEXT: setne %al
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind
%c = icmp ne i32 %m, 0
ret i1 %c
}
define i1 @length4_eq_const(ptr %X) nounwind !prof !14 {
; X64-LABEL: length4_eq_const:
; X64: # %bb.0:
; X64-NEXT: cmpl $875770417, (%rdi) # imm = 0x34333231
; X64-NEXT: sete %al
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 4) nounwind
%c = icmp eq i32 %m, 0
ret i1 %c
}
define i32 @length5(ptr %X, ptr %Y) nounwind !prof !14 {
; X64-LABEL: length5:
; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %ecx
; X64-NEXT: movl (%rsi), %edx
; X64-NEXT: bswapl %ecx
; X64-NEXT: bswapl %edx
; X64-NEXT: cmpl %edx, %ecx
; X64-NEXT: jne .LBB9_3
; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movzbl 4(%rdi), %eax
; X64-NEXT: movzbl 4(%rsi), %ecx
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq
; X64-NEXT: .LBB9_3: # %res_block
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpl %edx, %ecx
; X64-NEXT: sbbl %eax, %eax
; X64-NEXT: orl $1, %eax
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
ret i32 %m
}
define i1 @length5_eq(ptr %X, ptr %Y) nounwind !prof !14 {
; X64-LABEL: length5_eq:
; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: xorl (%rsi), %eax
; X64-NEXT: movb 4(%rdi), %cl
; X64-NEXT: xorb 4(%rsi), %cl
; X64-NEXT: movzbl %cl, %ecx
; X64-NEXT: orl %eax, %ecx
; X64-NEXT: setne %al
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind
%c = icmp ne i32 %m, 0
ret i1 %c
}
define i32 @length8(ptr %X, ptr %Y) nounwind !prof !14 {
; X64-LABEL: length8:
; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rcx
; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rcx
; X64-NEXT: bswapq %rdx
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: seta %al
; X64-NEXT: sbbl $0, %eax
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind
ret i32 %m
}
define i1 @length8_eq(ptr %X, ptr %Y) nounwind !prof !14 {
; X64-LABEL: length8_eq:
; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: cmpq (%rsi), %rax
; X64-NEXT: sete %al
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind
%c = icmp eq i32 %m, 0
ret i1 %c
}
define i1 @length8_eq_const(ptr %X) nounwind !prof !14 {
; X64-LABEL: length8_eq_const:
; X64: # %bb.0:
; X64-NEXT: movabsq $3978425819141910832, %rax # imm = 0x3736353433323130
; X64-NEXT: cmpq %rax, (%rdi)
; X64-NEXT: setne %al
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 8) nounwind
%c = icmp ne i32 %m, 0
ret i1 %c
}
define i1 @length12_eq(ptr %X, ptr %Y) nounwind !prof !14 {
; X64-LABEL: length12_eq:
; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: xorq (%rsi), %rax
; X64-NEXT: movl 8(%rdi), %ecx
; X64-NEXT: xorl 8(%rsi), %ecx
; X64-NEXT: orq %rax, %rcx
; X64-NEXT: setne %al
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind
%c = icmp ne i32 %m, 0
ret i1 %c
}
define i32 @length12(ptr %X, ptr %Y) nounwind !prof !14 {
; X64-LABEL: length12:
; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rcx
; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rcx
; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB15_2
; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movl 8(%rdi), %ecx
; X64-NEXT: movl 8(%rsi), %edx
; X64-NEXT: bswapl %ecx
; X64-NEXT: bswapl %edx
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: je .LBB15_3
; X64-NEXT: .LBB15_2: # %res_block
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: sbbl %eax, %eax
; X64-NEXT: orl $1, %eax
; X64-NEXT: .LBB15_3: # %endblock
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind
ret i32 %m
}
; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329
define i32 @length16(ptr %X, ptr %Y) nounwind !prof !14 {
; X64-LABEL: length16:
; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rcx
; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rcx
; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB16_2
; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movq 8(%rdi), %rcx
; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rcx
; X64-NEXT: bswapq %rdx
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: je .LBB16_3
; X64-NEXT: .LBB16_2: # %res_block
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: sbbl %eax, %eax
; X64-NEXT: orl $1, %eax
; X64-NEXT: .LBB16_3: # %endblock
; X64-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 16) nounwind
ret i32 %m
}
define i1 @length16_eq(ptr %x, ptr %y) nounwind !prof !14 {
; X64-SSE2-LABEL: length16_eq:
; X64-SSE2: # %bb.0:
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
; X64-SSE2-NEXT: pmovmskb %xmm1, %eax
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; X64-SSE2-NEXT: setne %al
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: length16_eq:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
; X64-AVX-NEXT: vptest %xmm0, %xmm0
; X64-AVX-NEXT: setne %al
; X64-AVX-NEXT: retq
%call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind
%cmp = icmp ne i32 %call, 0
ret i1 %cmp
}
define i1 @length16_eq_const(ptr %X) nounwind !prof !14 {
; X64-SSE2-LABEL: length16_eq_const:
; X64-SSE2: # %bb.0:
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; X64-SSE2-NEXT: sete %al
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: length16_eq_const:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vptest %xmm0, %xmm0
; X64-AVX-NEXT: sete %al
; X64-AVX-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 16) nounwind
%c = icmp eq i32 %m, 0
ret i1 %c
}
; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914
define i32 @length24(ptr %X, ptr %Y) nounwind !prof !14 {
; X64-LABEL: length24:
; X64: # %bb.0:
; X64-NEXT: movl $24, %edx
; X64-NEXT: jmp memcmp # TAILCALL
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 24) nounwind
ret i32 %m
}
define i1 @length24_eq(ptr %x, ptr %y) nounwind !prof !14 {
; X64-SSE2-LABEL: length24_eq:
; X64-SSE2: # %bb.0:
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
; X64-SSE2-NEXT: movdqu (%rsi), %xmm1
; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1
; X64-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; X64-SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
; X64-SSE2-NEXT: pand %xmm1, %xmm2
; X64-SSE2-NEXT: pmovmskb %xmm2, %eax
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; X64-SSE2-NEXT: sete %al
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: length24_eq:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; X64-AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
; X64-AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0
; X64-AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
; X64-AVX-NEXT: vptest %xmm0, %xmm0
; X64-AVX-NEXT: sete %al
; X64-AVX-NEXT: retq
%call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind
%cmp = icmp eq i32 %call, 0
ret i1 %cmp
}
define i1 @length24_eq_const(ptr %X) nounwind !prof !14 {
; X64-SSE2-LABEL: length24_eq_const:
; X64-SSE2: # %bb.0:
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
; X64-SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: pand %xmm1, %xmm0
; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; X64-SSE2-NEXT: setne %al
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: length24_eq_const:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
; X64-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vptest %xmm0, %xmm0
; X64-AVX-NEXT: setne %al
; X64-AVX-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 24) nounwind
%c = icmp ne i32 %m, 0
ret i1 %c
}
define i32 @length32(ptr %X, ptr %Y) nounwind !prof !14 {
; X64-LABEL: length32:
; X64: # %bb.0:
; X64-NEXT: movl $32, %edx
; X64-NEXT: jmp memcmp # TAILCALL
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 32) nounwind
ret i32 %m
}
; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
define i1 @length32_eq(ptr %x, ptr %y) nounwind !prof !14 {
; X64-SSE2-LABEL: length32_eq:
; X64-SSE2: # %bb.0:
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
; X64-SSE2-NEXT: movdqu (%rsi), %xmm2
; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm2
; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0
; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
; X64-SSE2-NEXT: pand %xmm2, %xmm0
; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; X64-SSE2-NEXT: sete %al
; X64-SSE2-NEXT: retq
;
; X64-AVX1-LABEL: length32_eq:
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0
; X64-AVX1-NEXT: vptest %ymm0, %ymm0
; X64-AVX1-NEXT: sete %al
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
;
; X64-AVX2-LABEL: length32_eq:
; X64-AVX2: # %bb.0:
; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0
; X64-AVX2-NEXT: vptest %ymm0, %ymm0
; X64-AVX2-NEXT: sete %al
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
%call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind
%cmp = icmp eq i32 %call, 0
ret i1 %cmp
}
define i1 @length32_eq_const(ptr %X) nounwind !prof !14 {
; X64-SSE2-LABEL: length32_eq_const:
; X64-SSE2: # %bb.0:
; X64-SSE2-NEXT: movdqu (%rdi), %xmm0
; X64-SSE2-NEXT: movdqu 16(%rdi), %xmm1
; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: pand %xmm1, %xmm0
; X64-SSE2-NEXT: pmovmskb %xmm0, %eax
; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; X64-SSE2-NEXT: setne %al
; X64-SSE2-NEXT: retq
;
; X64-AVX1-LABEL: length32_eq_const:
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; X64-AVX1-NEXT: vptest %ymm0, %ymm0
; X64-AVX1-NEXT: setne %al
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
;
; X64-AVX2-LABEL: length32_eq_const:
; X64-AVX2: # %bb.0:
; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; X64-AVX2-NEXT: vptest %ymm0, %ymm0
; X64-AVX2-NEXT: setne %al
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 32) nounwind
%c = icmp ne i32 %m, 0
ret i1 %c
}
define i32 @length64(ptr %X, ptr %Y) nounwind !prof !14 {
; X64-LABEL: length64:
; X64: # %bb.0:
; X64-NEXT: movl $64, %edx
; X64-NEXT: jmp memcmp # TAILCALL
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 64) nounwind
ret i32 %m
}
define i1 @length64_eq(ptr %x, ptr %y) nounwind !prof !14 {
; X64-SSE2-LABEL: length64_eq:
; X64-SSE2: # %bb.0:
; X64-SSE2-NEXT: pushq %rax
; X64-SSE2-NEXT: movl $64, %edx
; X64-SSE2-NEXT: callq memcmp
; X64-SSE2-NEXT: testl %eax, %eax
; X64-SSE2-NEXT: setne %al
; X64-SSE2-NEXT: popq %rcx
; X64-SSE2-NEXT: retq
;
; X64-AVX1-LABEL: length64_eq:
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
; X64-AVX1-NEXT: vmovups 32(%rdi), %ymm1
; X64-AVX1-NEXT: vxorps 32(%rsi), %ymm1, %ymm1
; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0
; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
; X64-AVX1-NEXT: vptest %ymm0, %ymm0
; X64-AVX1-NEXT: setne %al
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
;
; X64-AVX2-LABEL: length64_eq:
; X64-AVX2: # %bb.0:
; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
; X64-AVX2-NEXT: vpxor 32(%rsi), %ymm1, %ymm1
; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0
; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vptest %ymm0, %ymm0
; X64-AVX2-NEXT: setne %al
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
%call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind
%cmp = icmp ne i32 %call, 0
ret i1 %cmp
}
define i1 @length64_eq_const(ptr %X) nounwind !prof !14 {
; X64-SSE2-LABEL: length64_eq_const:
; X64-SSE2: # %bb.0:
; X64-SSE2-NEXT: pushq %rax
; X64-SSE2-NEXT: movl $.L.str, %esi
; X64-SSE2-NEXT: movl $64, %edx
; X64-SSE2-NEXT: callq memcmp
; X64-SSE2-NEXT: testl %eax, %eax
; X64-SSE2-NEXT: sete %al
; X64-SSE2-NEXT: popq %rcx
; X64-SSE2-NEXT: retq
;
; X64-AVX1-LABEL: length64_eq_const:
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
; X64-AVX1-NEXT: vmovups 32(%rdi), %ymm1
; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
; X64-AVX1-NEXT: vptest %ymm0, %ymm0
; X64-AVX1-NEXT: sete %al
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
;
; X64-AVX2-LABEL: length64_eq_const:
; X64-AVX2: # %bb.0:
; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0
; X64-AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vptest %ymm0, %ymm0
; X64-AVX2-NEXT: sete %al
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
%m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 64) nounwind
%c = icmp eq i32 %m, 0
ret i1 %c
}
define i32 @bcmp_length2(ptr %X, ptr %Y) nounwind !prof !14 {
; X64-LABEL: bcmp_length2:
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %ecx
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpw (%rsi), %cx
; X64-NEXT: setne %al
; X64-NEXT: retq
%m = tail call i32 @bcmp(ptr %X, ptr %Y, i64 2) nounwind
ret i32 %m
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}