Files
clang-p2996/llvm/test/CodeGen/AArch64/ctlz_zero_undef.ll
Manish Kausik H 69192e0193 [LegalizeDAG] Optimize CodeGen for ISD::CTLZ_ZERO_UNDEF (#83039)
Previously we had the same instructions being generated for `ISD::CTLZ` and `ISD::CTLZ_ZERO_UNDEF` which did not take advantage of the fact that zero is an invalid input for `ISD::CTLZ_ZERO_UNDEF`. This commit separates codegen for the two cases to allow for the optimization for the latter case.

The details of the optimization are outlined in #82075

Fixes #82075

Co-authored-by: Manish Kausik H <hmamishkausik@gmail.com>
2024-07-08 14:01:32 +01:00

45 lines
1.3 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc < %s --mtriple=aarch64 | FileCheck %s
declare i8 @llvm.ctlz.i8(i8, i1 immarg)
declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>, i1 immarg)
declare i11 @llvm.ctlz.i11(i11, i1 immarg)
define i32 @clz_nzu8(i8 %self) {
; CHECK-LABEL: clz_nzu8:
; CHECK: // %bb.0: // %start
; CHECK-NEXT: lsl w8, w0, #24
; CHECK-NEXT: clz w0, w8
; CHECK-NEXT: ret
start:
%ctlz_res = call i8 @llvm.ctlz.i8(i8 %self, i1 true)
%ret = zext i8 %ctlz_res to i32
ret i32 %ret
}
; non standard bit size argument to ctlz
define i32 @clz_nzu11(i11 %self) {
; CHECK-LABEL: clz_nzu11:
; CHECK: // %bb.0:
; CHECK-NEXT: lsl w8, w0, #21
; CHECK-NEXT: clz w0, w8
; CHECK-NEXT: ret
%ctlz_res = call i11 @llvm.ctlz.i11(i11 %self, i1 true)
%ret = zext i11 %ctlz_res to i32
ret i32 %ret
}
; vector type argument to ctlz intrinsic
define <8 x i32> @clz_vec_nzu8(<8 x i8> %self) {
; CHECK-LABEL: clz_vec_nzu8:
; CHECK: // %bb.0:
; CHECK-NEXT: clz v0.8b, v0.8b
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: ret
%ctlz_res = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %self, i1 true)
%ret = zext <8 x i8> %ctlz_res to <8 x i32>
ret <8 x i32> %ret
}