It looks like we were trying to account for SLM costs, which are actually handled separately Fixes #62969
34 lines
1.7 KiB
LLVM
34 lines
1.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S -mcpu=x86-64 | FileCheck %s
|
|
; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S -mcpu=x86-64-v2 | FileCheck %s
|
|
; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S -mcpu=x86-64-v3 | FileCheck %s
|
|
; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S -mcpu=x86-64-v4 | FileCheck %s
|
|
|
|
define void @PR62969(ptr dereferenceable(16) %out, ptr dereferenceable(16) %in) {
|
|
; CHECK-LABEL: @PR62969(
|
|
; CHECK-NEXT: [[IN0:%.*]] = getelementptr inbounds [2 x i64], ptr [[IN:%.*]], i64 0, i64 0
|
|
; CHECK-NEXT: [[OUT0:%.*]] = getelementptr inbounds [2 x i64], ptr [[OUT:%.*]], i64 0, i64 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[IN0]], align 8
|
|
; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[TMP1]], <i64 4294967295, i64 4294967295>
|
|
; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[TMP1]], <i64 32, i64 32>
|
|
; CHECK-NEXT: [[TMP4:%.*]] = mul <2 x i64> [[TMP2]], [[TMP3]]
|
|
; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr [[OUT0]], align 8
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%in0 = getelementptr inbounds [2 x i64], ptr %in, i64 0, i64 0
|
|
%in1 = getelementptr inbounds [2 x i64], ptr %in, i64 0, i64 1
|
|
%x = load i64, ptr %in0, align 8
|
|
%y = load i64, ptr %in1, align 8
|
|
%xl = and i64 %x, 4294967295
|
|
%yl = and i64 %y, 4294967295
|
|
%xh = lshr i64 %x, 32
|
|
%yh = lshr i64 %y, 32
|
|
%m0 = mul i64 %xl, %xh
|
|
%m1 = mul i64 %yl, %yh
|
|
%out0 = getelementptr inbounds [2 x i64], ptr %out, i64 0, i64 0
|
|
%out1 = getelementptr inbounds [2 x i64], ptr %out, i64 0, i64 1
|
|
store i64 %m0, ptr %out0, align 8
|
|
store i64 %m1, ptr %out1, align 8
|
|
ret void
|
|
}
|