Files
clang-p2996/llvm/test/CodeGen/X86/cmp-concat.ll
Simon Pilgrim b53ea2b9c5 [DAG] visitAND - fold (and (any_ext V), c) -> (zero_ext (and (trunc V), c)) if profitable.
Try to more aggressively narrow masks of extended values.

This is mainly for cases where the mask is trying to zero out any_extended upper bits, assuming we can zext/trunc the values for free.

This catches a few actual missed folds, as well as helps canonicalize a number of other cases which were being caught in isel etc.

Differential Revision: https://reviews.llvm.org/D145866
2023-03-12 13:25:23 +00:00

120 lines
4.0 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s
define i1 @cmp_allbits_concat_i8(i8 %x, i8 %y) {
; CHECK-LABEL: cmp_allbits_concat_i8:
; CHECK: # %bb.0:
; CHECK-NEXT: andl %esi, %edi
; CHECK-NEXT: cmpb $-1, %dil
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
%zx = zext i8 %x to i16
%zy = zext i8 %y to i16
%sh = shl i16 %zx, 8
%or = or i16 %zy, %sh
%r = icmp eq i16 %or, -1
ret i1 %r
}
define i1 @cmp_anybits_concat_i32(i32 %x, i32 %y) {
; CHECK-LABEL: cmp_anybits_concat_i32:
; CHECK: # %bb.0:
; CHECK-NEXT: orl %esi, %edi
; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%zx = zext i32 %x to i64
%zy = zext i32 %y to i64
%sh = shl i64 %zx, 32
%or = or i64 %zy, %sh
%r = icmp ne i64 %or, 0
ret i1 %r
}
define i1 @cmp_anybits_concat_shl_shl_i16(i16 %x, i16 %y) {
; CHECK-LABEL: cmp_anybits_concat_shl_shl_i16:
; CHECK: # %bb.0:
; CHECK-NEXT: movzwl %di, %eax
; CHECK-NEXT: movzwl %si, %ecx
; CHECK-NEXT: shlq $8, %rcx
; CHECK-NEXT: orq %rax, %rcx
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
%zx = zext i16 %x to i64
%zy = zext i16 %y to i64
%sx = shl i64 %zx, 32
%sy = shl i64 %zy, 8
%or = or i64 %sx, %sy
%r = icmp eq i64 %or, 0
ret i1 %r
}
define i1 @cmp_anybits_concat_shl_shl_i16_commute(i16 %x, i16 %y) {
; CHECK-LABEL: cmp_anybits_concat_shl_shl_i16_commute:
; CHECK: # %bb.0:
; CHECK-NEXT: movzwl %di, %eax
; CHECK-NEXT: movzwl %si, %ecx
; CHECK-NEXT: shlq $8, %rcx
; CHECK-NEXT: orq %rax, %rcx
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
%zx = zext i16 %x to i64
%zy = zext i16 %y to i64
%sx = shl i64 %zx, 32
%sy = shl i64 %zy, 8
%or = or i64 %sy, %sx
%r = icmp eq i64 %or, 0
ret i1 %r
}
; FIXME: Add vector support, but its only worth it if we can freely truncate the
; concat'd vectors.
define <16 x i8> @cmp_allbits_concat_v16i8(<16 x i8> %x, <16 x i8> %y) {
; CHECK-LABEL: cmp_allbits_concat_v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: movdqa %xmm1, %xmm2
; CHECK-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
; CHECK-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
; CHECK-NEXT: pcmpeqw %xmm0, %xmm1
; CHECK-NEXT: pcmpeqw %xmm0, %xmm2
; CHECK-NEXT: packsswb %xmm1, %xmm2
; CHECK-NEXT: movdqa %xmm2, %xmm0
; CHECK-NEXT: retq
%zx = zext <16 x i8> %x to <16 x i16>
%zy = zext <16 x i8> %y to <16 x i16>
%sh = shl <16 x i16> %zx, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
%or = or <16 x i16> %zy, %sh
%r = icmp eq <16 x i16> %or, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
%s = sext <16 x i1> %r to <16 x i8>
ret <16 x i8> %s
}
define <2 x i64> @cmp_nobits_concat_v2i64(<2 x i64> %x, <2 x i64> %y) {
; CHECK-LABEL: cmp_nobits_concat_v2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %xmm0, %rax
; CHECK-NEXT: pextrq $1, %xmm0, %rcx
; CHECK-NEXT: movq %xmm1, %rdx
; CHECK-NEXT: pextrq $1, %xmm1, %rsi
; CHECK-NEXT: xorl %edi, %edi
; CHECK-NEXT: orq %rcx, %rsi
; CHECK-NEXT: sete %dil
; CHECK-NEXT: negq %rdi
; CHECK-NEXT: movq %rdi, %xmm1
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: orq %rax, %rdx
; CHECK-NEXT: sete %cl
; CHECK-NEXT: negq %rcx
; CHECK-NEXT: movq %rcx, %xmm0
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: retq
%zx = zext <2 x i64> %x to <2 x i128>
%zy = zext <2 x i64> %y to <2 x i128>
%sh = shl <2 x i128> %zx, <i128 64, i128 64>
%or = or <2 x i128> %zy, %sh
%r = icmp eq <2 x i128> %or, zeroinitializer
%s = sext <2 x i1> %r to <2 x i64>
ret <2 x i64> %s
}