For 8-bit/16-bit vector loads/stores we scalarize and transfer to/from the vector unit, or use the (usually slow) PINSR/PEXTR instructions. Fixes #59867
31 lines
1.5 KiB
LLVM
31 lines
1.5 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown -mcpu=x86-64 -S | FileCheck %s
|
|
; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown -mcpu=x86-64-v2 -S | FileCheck %s
|
|
; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown -mcpu=x86-64-v3 -S | FileCheck %s
|
|
; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown -mcpu=x86-64-v4 -S | FileCheck %s
|
|
|
|
define i1 @PR59867(ptr %s1, ptr %s2) {
|
|
; CHECK-LABEL: @PR59867(
|
|
; CHECK-NEXT: [[V1_1:%.*]] = load i8, ptr [[S1:%.*]], align 1
|
|
; CHECK-NEXT: [[V2_1:%.*]] = load i8, ptr [[S2:%.*]], align 1
|
|
; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[V1_1]], [[V2_1]]
|
|
; CHECK-NEXT: [[S1_2:%.*]] = getelementptr inbounds i8, ptr [[S1]], i64 1
|
|
; CHECK-NEXT: [[V1_2:%.*]] = load i8, ptr [[S1_2]], align 1
|
|
; CHECK-NEXT: [[S2_2:%.*]] = getelementptr inbounds i8, ptr [[S2]], i64 1
|
|
; CHECK-NEXT: [[V2_2:%.*]] = load i8, ptr [[S2_2]], align 1
|
|
; CHECK-NEXT: [[C2:%.*]] = icmp eq i8 [[V1_2]], [[V2_2]]
|
|
; CHECK-NEXT: [[RES:%.*]] = select i1 [[C1]], i1 [[C2]], i1 false
|
|
; CHECK-NEXT: ret i1 [[RES]]
|
|
;
|
|
%v1.1 = load i8, ptr %s1, align 1
|
|
%v2.1 = load i8, ptr %s2, align 1
|
|
%c1 = icmp eq i8 %v1.1, %v2.1
|
|
%s1.2 = getelementptr inbounds i8, ptr %s1, i64 1
|
|
%v1.2 = load i8, ptr %s1.2, align 1
|
|
%s2.2 = getelementptr inbounds i8, ptr %s2, i64 1
|
|
%v2.2 = load i8, ptr %s2.2, align 1
|
|
%c2 = icmp eq i8 %v1.2, %v2.2
|
|
%res = select i1 %c1, i1 %c2, i1 false
|
|
ret i1 %res
|
|
}
|