Previously we had the same instructions being generated for `ISD::CTLZ` and `ISD::CTLZ_ZERO_UNDEF` which did not take advantage of the fact that zero is an invalid input for `ISD::CTLZ_ZERO_UNDEF`. This commit separates codegen for the two cases to allow for the optimization for the latter case. The details of the optimization are outlined in #82075 Fixes #82075 Co-authored-by: Manish Kausik H <hmamishkausik@gmail.com>
45 lines
1.3 KiB
LLVM
45 lines
1.3 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: llc < %s --mtriple=aarch64 | FileCheck %s
|
|
|
|
declare i8 @llvm.ctlz.i8(i8, i1 immarg)
|
|
declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>, i1 immarg)
|
|
declare i11 @llvm.ctlz.i11(i11, i1 immarg)
|
|
|
|
define i32 @clz_nzu8(i8 %self) {
|
|
; CHECK-LABEL: clz_nzu8:
|
|
; CHECK: // %bb.0: // %start
|
|
; CHECK-NEXT: lsl w8, w0, #24
|
|
; CHECK-NEXT: clz w0, w8
|
|
; CHECK-NEXT: ret
|
|
start:
|
|
%ctlz_res = call i8 @llvm.ctlz.i8(i8 %self, i1 true)
|
|
%ret = zext i8 %ctlz_res to i32
|
|
ret i32 %ret
|
|
}
|
|
|
|
; non standard bit size argument to ctlz
|
|
define i32 @clz_nzu11(i11 %self) {
|
|
; CHECK-LABEL: clz_nzu11:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: lsl w8, w0, #21
|
|
; CHECK-NEXT: clz w0, w8
|
|
; CHECK-NEXT: ret
|
|
%ctlz_res = call i11 @llvm.ctlz.i11(i11 %self, i1 true)
|
|
%ret = zext i11 %ctlz_res to i32
|
|
ret i32 %ret
|
|
}
|
|
|
|
; vector type argument to ctlz intrinsic
|
|
define <8 x i32> @clz_vec_nzu8(<8 x i8> %self) {
|
|
; CHECK-LABEL: clz_vec_nzu8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: clz v0.8b, v0.8b
|
|
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
|
|
; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0
|
|
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
|
|
; CHECK-NEXT: ret
|
|
%ctlz_res = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %self, i1 true)
|
|
%ret = zext <8 x i8> %ctlz_res to <8 x i32>
|
|
ret <8 x i32> %ret
|
|
}
|