From 82c0a53763bc39f978d39c79b17e20ae1b57748d Mon Sep 17 00:00:00 2001 From: Qi Zhao Date: Wed, 2 Jul 2025 17:34:51 +0800 Subject: [PATCH] [LoongArch] Pre-commit for optimizing insert extracted pair elements --- .../insert-extract-pair-elements.ll | 132 ++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-pair-elements.ll diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-pair-elements.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-pair-elements.ll new file mode 100644 index 000000000000..88c3e4367ffa --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-pair-elements.ll @@ -0,0 +1,132 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define <32 x i8> @insert_extract_v32i8(<32 x i8> %a) nounwind { +; CHECK-LABEL: insert_extract_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: ld.b $a1, $sp, 31 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 1 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %b_lo = extractelement <32 x i8> %a, i32 15 + %b_hi = extractelement <32 x i8> %a, i32 31 + %c = insertelement <32 x i8> %a, i8 %b_lo, i32 1 + %d = insertelement <32 x i8> %c, i8 %b_hi, i32 17 + ret <32 x i8> %d +} + +define <16 x i16> @insert_extract_v16i16(<16 x i16> %a) nounwind { +; CHECK-LABEL: insert_extract_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: ld.h $a1, $sp, 30 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 1 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %b_lo = extractelement <16 x i16> %a, i32 7 + %b_hi = extractelement <16 x i16> %a, i32 15 + %c = insertelement <16 x i16> %a, i16 %b_lo, i32 1 + %d = insertelement <16 x i16> %c, i16 %b_hi, i32 9 + ret <16 x i16> %d +} + +define <8 x i32> @insert_extract_v8i32(<8 x i32> %a) nounwind { +; CHECK-LABEL: insert_extract_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 3 +; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 7 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 1 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 5 +; CHECK-NEXT: ret +entry: + %b_lo = extractelement <8 x i32> %a, i32 3 + %b_hi = extractelement <8 x i32> %a, i32 7 + %c = insertelement <8 x i32> %a, i32 %b_lo, i32 1 + %d = insertelement <8 x i32> %c, i32 %b_hi, i32 5 + ret <8 x i32> %d +} + +define <8 x float> @insert_extract_v8f32(<8 x float> %a) nounwind { +; CHECK-LABEL: insert_extract_v8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 3 +; CHECK-NEXT: movgr2fr.w $fa1, $a0 +; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7 +; CHECK-NEXT: movgr2fr.w $fa2, $a0 +; CHECK-NEXT: movfr2gr.s $a0, $fa1 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 1 +; CHECK-NEXT: movfr2gr.s $a0, $fa2 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 5 +; CHECK-NEXT: ret +entry: + %b_lo = extractelement <8 x float> %a, i32 3 + %b_hi = extractelement <8 x float> %a, i32 7 + %c = insertelement <8 x float> %a, float %b_lo, i32 1 + %d = insertelement <8 x float> %c, float %b_hi, i32 5 + ret <8 x float> %d +} + +define <4 x i64> @insert_extract_v4i64(<4 x i64> %a) nounwind { +; CHECK-LABEL: insert_extract_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1 +; CHECK-NEXT: xvpickve2gr.d $a1, $xr0, 3 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 0 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 2 +; CHECK-NEXT: ret +entry: + %b_lo = extractelement <4 x i64> %a, i32 1 + %b_hi = extractelement <4 x i64> %a, i32 3 + %c = insertelement <4 x i64> %a, i64 %b_lo, i32 0 + %d = insertelement <4 x i64> %c, i64 %b_hi, i32 2 + ret <4 x i64> %d +} + +define <4 x double> @insert_extract_v4f64(<4 x double> %a) nounwind { +; CHECK-LABEL: insert_extract_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1 +; CHECK-NEXT: movgr2fr.d $fa1, $a0 +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3 +; CHECK-NEXT: movgr2fr.d $fa2, $a0 +; CHECK-NEXT: movfr2gr.d $a0, $fa1 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 0 +; CHECK-NEXT: movfr2gr.d $a0, $fa2 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 2 +; CHECK-NEXT: ret +entry: + %b_lo = extractelement <4 x double> %a, i32 1 + %b_hi = extractelement <4 x double> %a, i32 3 + %c = insertelement <4 x double> %a, double %b_lo, i32 0 + %d = insertelement <4 x double> %c, double %b_hi, i32 2 + ret <4 x double> %d +}