This improves some cases where a splat_vector uses a build_pair that can be simplified, e.g: (rotl x:i64, splat_vector (build_pair x1:i32, x2:i32)) rotl only demands the bottom 6 bits, so this patch allows it to simplify it to: (rotl x:i64, splat_vector (build_pair x1:i32, undef:i32)) Which in turn improves some cases where a splat_vector_parts is lowered on RV32. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D158839
48 lines
1.7 KiB
LLVM
48 lines
1.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=wasm32-- -mattr=+simd128 | FileCheck --check-prefix=CHECK-32 %s
|
|
; RUN: llc < %s -mtriple=wasm64-- -mattr=+simd128 | FileCheck --check-prefix=CHECK-64 %s
|
|
|
|
define i8 @f(ptr %0, ptr %1) {
|
|
; CHECK-32-LABEL: f:
|
|
; CHECK-32: .functype f (i32, i32) -> (i32)
|
|
; CHECK-32-NEXT: # %bb.0: # %BB
|
|
; CHECK-32-NEXT: local.get 0
|
|
; CHECK-32-NEXT: i32.const 0
|
|
; CHECK-32-NEXT: i32.store8 2
|
|
; CHECK-32-NEXT: local.get 0
|
|
; CHECK-32-NEXT: i32.const 0
|
|
; CHECK-32-NEXT: i32.store16 0
|
|
; CHECK-32-NEXT: local.get 1
|
|
; CHECK-32-NEXT: i32.const 0
|
|
; CHECK-32-NEXT: i32.store8 2
|
|
; CHECK-32-NEXT: local.get 1
|
|
; CHECK-32-NEXT: i32.const 0
|
|
; CHECK-32-NEXT: i32.store16 0
|
|
; CHECK-32-NEXT: i32.const 0
|
|
; CHECK-32-NEXT: # fallthrough-return
|
|
;
|
|
; CHECK-64-LABEL: f:
|
|
; CHECK-64: .functype f (i64, i64) -> (i32)
|
|
; CHECK-64-NEXT: # %bb.0: # %BB
|
|
; CHECK-64-NEXT: local.get 0
|
|
; CHECK-64-NEXT: i32.const 0
|
|
; CHECK-64-NEXT: i32.store8 2
|
|
; CHECK-64-NEXT: local.get 0
|
|
; CHECK-64-NEXT: i32.const 0
|
|
; CHECK-64-NEXT: i32.store16 0
|
|
; CHECK-64-NEXT: local.get 1
|
|
; CHECK-64-NEXT: i32.const 0
|
|
; CHECK-64-NEXT: i32.store16 0
|
|
; CHECK-64-NEXT: i32.const 0
|
|
; CHECK-64-NEXT: # fallthrough-return
|
|
BB:
|
|
store <3 x i8> zeroinitializer, ptr %0
|
|
%S = shufflevector <3 x i128> zeroinitializer, <3 x i128> <i128 0, i128 1, i128 2>, <3 x i32> undef
|
|
%C = icmp ule <3 x i128> %S, zeroinitializer
|
|
%C1 = zext <3 x i1> %C to <3 x i8>
|
|
%E = extractelement <3 x i8> %C1, i32 0
|
|
%B = sdiv <3 x i8> <i8 1, i8 3, i8 5>, %C1
|
|
store <3 x i8> %B, ptr %1
|
|
ret i8 %E
|
|
}
|