Detect (non-intrinsic) IR patterns corresponding to the semantics of the various widening and high-word multiplication instructions. Specifically, this is done by: - Recognizing even/odd widening multiplication patterns in DAGCombine - Recognizing widening multiply-and-add on top during ISel - Implementing the standard MULHS/MUHLU IR opcodes - Detecting high-word multiply-and-add (which common code does not) Depending on architecture level, this can support all integer vector types as well as the scalar i128 type. Fixes: https://github.com/llvm/llvm-project/issues/129705
73 lines
2.4 KiB
LLVM
73 lines
2.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; Test widening vector multiply-and-add on arch15.
|
|
;
|
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
|
|
|
|
; Test a v2i64 (even) -> i128 unsigned widening multiply-and-add.
|
|
define i128 @f1(<2 x i64> %val1, <2 x i64> %val2, i128 %val3) {
|
|
; CHECK-LABEL: f1:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vl %v0, 0(%r3), 3
|
|
; CHECK-NEXT: vmaleg %v0, %v24, %v26, %v0
|
|
; CHECK-NEXT: vst %v0, 0(%r2), 3
|
|
; CHECK-NEXT: br %r14
|
|
%elt1 = extractelement <2 x i64> %val1, i32 0
|
|
%zext1 = zext i64 %elt1 to i128
|
|
%elt2 = extractelement <2 x i64> %val2, i32 0
|
|
%zext2 = zext i64 %elt2 to i128
|
|
%mul = mul i128 %zext1, %zext2
|
|
%ret = add i128 %mul, %val3
|
|
ret i128 %ret
|
|
}
|
|
|
|
; Test a v2i64 (odd) -> i128 unsigned widening multiply-and-add.
|
|
define i128 @f2(<2 x i64> %val1, <2 x i64> %val2, i128 %val3) {
|
|
; CHECK-LABEL: f2:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vl %v0, 0(%r3), 3
|
|
; CHECK-NEXT: vmalog %v0, %v24, %v26, %v0
|
|
; CHECK-NEXT: vst %v0, 0(%r2), 3
|
|
; CHECK-NEXT: br %r14
|
|
%elt1 = extractelement <2 x i64> %val1, i32 1
|
|
%zext1 = zext i64 %elt1 to i128
|
|
%elt2 = extractelement <2 x i64> %val2, i32 1
|
|
%zext2 = zext i64 %elt2 to i128
|
|
%mul = mul i128 %zext1, %zext2
|
|
%ret = add i128 %mul, %val3
|
|
ret i128 %ret
|
|
}
|
|
|
|
; Test a v2i64 (even) -> i128 signed widening multiply-and-add.
|
|
define i128 @f3(<2 x i64> %val1, <2 x i64> %val2, i128 %val3) {
|
|
; CHECK-LABEL: f3:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vl %v0, 0(%r3), 3
|
|
; CHECK-NEXT: vmaeg %v0, %v24, %v26, %v0
|
|
; CHECK-NEXT: vst %v0, 0(%r2), 3
|
|
; CHECK-NEXT: br %r14
|
|
%elt1 = extractelement <2 x i64> %val1, i32 0
|
|
%sext1 = sext i64 %elt1 to i128
|
|
%elt2 = extractelement <2 x i64> %val2, i32 0
|
|
%sext2 = sext i64 %elt2 to i128
|
|
%mul = mul i128 %sext1, %sext2
|
|
%ret = add i128 %mul, %val3
|
|
ret i128 %ret
|
|
}
|
|
|
|
; Test a v2i64 (odd) -> i128 signed widening multiply-and-add.
|
|
define i128 @f4(<2 x i64> %val1, <2 x i64> %val2, i128 %val3) {
|
|
; CHECK-LABEL: f4:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vl %v0, 0(%r3), 3
|
|
; CHECK-NEXT: vmaog %v0, %v24, %v26, %v0
|
|
; CHECK-NEXT: vst %v0, 0(%r2), 3
|
|
; CHECK-NEXT: br %r14
|
|
%elt1 = extractelement <2 x i64> %val1, i32 1
|
|
%sext1 = sext i64 %elt1 to i128
|
|
%elt2 = extractelement <2 x i64> %val2, i32 1
|
|
%sext2 = sext i64 %elt2 to i128
|
|
%mul = mul i128 %sext1, %sext2
|
|
%ret = add i128 %mul, %val3
|
|
ret i128 %ret
|
|
}
|