[LoongArch] Pre-commit for optimizing insert extracted pair elements
This commit is contained in:
@@ -0,0 +1,132 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
|
||||
|
||||
define <32 x i8> @insert_extract_v32i8(<32 x i8> %a) nounwind {
|
||||
; CHECK-LABEL: insert_extract_v32i8:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: addi.d $sp, $sp, -64
|
||||
; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
|
||||
; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
|
||||
; CHECK-NEXT: addi.d $fp, $sp, 64
|
||||
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
|
||||
; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15
|
||||
; CHECK-NEXT: xvst $xr0, $sp, 0
|
||||
; CHECK-NEXT: ld.b $a1, $sp, 31
|
||||
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 1
|
||||
; CHECK-NEXT: xvori.b $xr1, $xr0, 0
|
||||
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
|
||||
; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 1
|
||||
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
|
||||
; CHECK-NEXT: addi.d $sp, $fp, -64
|
||||
; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
|
||||
; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
|
||||
; CHECK-NEXT: addi.d $sp, $sp, 64
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%b_lo = extractelement <32 x i8> %a, i32 15
|
||||
%b_hi = extractelement <32 x i8> %a, i32 31
|
||||
%c = insertelement <32 x i8> %a, i8 %b_lo, i32 1
|
||||
%d = insertelement <32 x i8> %c, i8 %b_hi, i32 17
|
||||
ret <32 x i8> %d
|
||||
}
|
||||
|
||||
define <16 x i16> @insert_extract_v16i16(<16 x i16> %a) nounwind {
|
||||
; CHECK-LABEL: insert_extract_v16i16:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: addi.d $sp, $sp, -64
|
||||
; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
|
||||
; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
|
||||
; CHECK-NEXT: addi.d $fp, $sp, 64
|
||||
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
|
||||
; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7
|
||||
; CHECK-NEXT: xvst $xr0, $sp, 0
|
||||
; CHECK-NEXT: ld.h $a1, $sp, 30
|
||||
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 1
|
||||
; CHECK-NEXT: xvori.b $xr1, $xr0, 0
|
||||
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
|
||||
; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 1
|
||||
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
|
||||
; CHECK-NEXT: addi.d $sp, $fp, -64
|
||||
; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
|
||||
; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
|
||||
; CHECK-NEXT: addi.d $sp, $sp, 64
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%b_lo = extractelement <16 x i16> %a, i32 7
|
||||
%b_hi = extractelement <16 x i16> %a, i32 15
|
||||
%c = insertelement <16 x i16> %a, i16 %b_lo, i32 1
|
||||
%d = insertelement <16 x i16> %c, i16 %b_hi, i32 9
|
||||
ret <16 x i16> %d
|
||||
}
|
||||
|
||||
define <8 x i32> @insert_extract_v8i32(<8 x i32> %a) nounwind {
|
||||
; CHECK-LABEL: insert_extract_v8i32:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 3
|
||||
; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 7
|
||||
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 1
|
||||
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 5
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%b_lo = extractelement <8 x i32> %a, i32 3
|
||||
%b_hi = extractelement <8 x i32> %a, i32 7
|
||||
%c = insertelement <8 x i32> %a, i32 %b_lo, i32 1
|
||||
%d = insertelement <8 x i32> %c, i32 %b_hi, i32 5
|
||||
ret <8 x i32> %d
|
||||
}
|
||||
|
||||
define <8 x float> @insert_extract_v8f32(<8 x float> %a) nounwind {
|
||||
; CHECK-LABEL: insert_extract_v8f32:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 3
|
||||
; CHECK-NEXT: movgr2fr.w $fa1, $a0
|
||||
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7
|
||||
; CHECK-NEXT: movgr2fr.w $fa2, $a0
|
||||
; CHECK-NEXT: movfr2gr.s $a0, $fa1
|
||||
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 1
|
||||
; CHECK-NEXT: movfr2gr.s $a0, $fa2
|
||||
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 5
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%b_lo = extractelement <8 x float> %a, i32 3
|
||||
%b_hi = extractelement <8 x float> %a, i32 7
|
||||
%c = insertelement <8 x float> %a, float %b_lo, i32 1
|
||||
%d = insertelement <8 x float> %c, float %b_hi, i32 5
|
||||
ret <8 x float> %d
|
||||
}
|
||||
|
||||
define <4 x i64> @insert_extract_v4i64(<4 x i64> %a) nounwind {
|
||||
; CHECK-LABEL: insert_extract_v4i64:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
|
||||
; CHECK-NEXT: xvpickve2gr.d $a1, $xr0, 3
|
||||
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 0
|
||||
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 2
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%b_lo = extractelement <4 x i64> %a, i32 1
|
||||
%b_hi = extractelement <4 x i64> %a, i32 3
|
||||
%c = insertelement <4 x i64> %a, i64 %b_lo, i32 0
|
||||
%d = insertelement <4 x i64> %c, i64 %b_hi, i32 2
|
||||
ret <4 x i64> %d
|
||||
}
|
||||
|
||||
define <4 x double> @insert_extract_v4f64(<4 x double> %a) nounwind {
|
||||
; CHECK-LABEL: insert_extract_v4f64:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
|
||||
; CHECK-NEXT: movgr2fr.d $fa1, $a0
|
||||
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
|
||||
; CHECK-NEXT: movgr2fr.d $fa2, $a0
|
||||
; CHECK-NEXT: movfr2gr.d $a0, $fa1
|
||||
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 0
|
||||
; CHECK-NEXT: movfr2gr.d $a0, $fa2
|
||||
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 2
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%b_lo = extractelement <4 x double> %a, i32 1
|
||||
%b_hi = extractelement <4 x double> %a, i32 3
|
||||
%c = insertelement <4 x double> %a, double %b_lo, i32 0
|
||||
%d = insertelement <4 x double> %c, double %b_hi, i32 2
|
||||
ret <4 x double> %d
|
||||
}
|
||||
Reference in New Issue
Block a user