Files
clang-p2996/llvm/test/CodeGen/X86/inline-asm-x-i128.ll
Simon Pilgrim 17eafe0841 [X86][SSE] lowerV2I64Shuffle - use undef elements in PSHUFD mask widening
If we lower a v2i64 shuffle to PSHUFD, we currently clamp undef elements to 0, (elements 0,1 of the v4i32) which can result in the shuffle referencing more elements of the source vector than expected, affecting later shuffle combines and KnownBits/SimplifyDemanded calls.

By ensuring we widen the undef mask element we allow getV4X86ShuffleImm8 to use inline elements as the default, which are more likely to fold.
2020-07-26 16:04:22 +01:00

34 lines
1.3 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnux32 | FileCheck %s
; RUN: not llc < %s -mtriple=i386-unknown-linux-gnu 2>&1 | FileCheck %s --check-prefix=ERROR
; For 32-bit we still error since __int128 isn't supported in the frontend.
; ERROR: error: couldn't allocate output register for constraint 'x'
define { i64, i64 } @foo(i64 %0, i64 %1) {
; CHECK-LABEL: foo:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rsi, %xmm0
; CHECK-NEXT: movq %rdi, %xmm1
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; CHECK-NEXT: #APP
; CHECK-NEXT: movdqa %xmm1, %xmm0
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: movq %xmm0, %rax
; CHECK-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: movq %xmm0, %rdx
; CHECK-NEXT: retq
%3 = zext i64 %1 to i128
%4 = shl nuw i128 %3, 64
%5 = zext i64 %0 to i128
%6 = or i128 %4, %5
%7 = tail call i128 asm sideeffect "movdqa $1, $0", "=x,x,~{dirflag},~{fpsr},~{flags}"(i128 %6)
%8 = trunc i128 %7 to i64
%9 = lshr i128 %7, 64
%10 = trunc i128 %9 to i64
%11 = insertvalue { i64, i64 } undef, i64 %8, 0
%12 = insertvalue { i64, i64 } %11, i64 %10, 1
ret { i64, i64 } %12
}