Files
clang-p2996/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll
Alexey Bataev 48bc5b0a29 [SLP][PR64099]Fix unsound undef to poison transformation when handling
insertelement instructions.

If the original vector has undef, not poison values, which are not
rewritten by later insertelement instructions, need to transform shuffle
with the undef vector, not a poison vector, and actual indices, not
PoisonMaskElem, otherwise the transformation may produce more poisons
output than the input.
2023-07-27 16:09:49 -07:00

1173 lines
71 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX256NODQ
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver1 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX256NODQ
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX256NODQ
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX256DQ
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@src64 = common global [8 x i64] zeroinitializer, align 64
@src32 = common global [16 x i32] zeroinitializer, align 64
@src16 = common global [32 x i16] zeroinitializer, align 64
@src8 = common global [64 x i8] zeroinitializer, align 64
@dst64 = common global [8 x double] zeroinitializer, align 64
@dst32 = common global [16 x float] zeroinitializer, align 64
;
; SITOFP to vXf64
;
define void @sitofp_2i64_2f64() #0 {
; SSE-LABEL: @sitofp_2i64_2f64(
; SSE-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 64
; SSE-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
; SSE-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
; SSE-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
; SSE-NEXT: store double [[CVT0]], ptr @dst64, align 64
; SSE-NEXT: store double [[CVT1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
; SSE-NEXT: ret void
;
; AVX256NODQ-LABEL: @sitofp_2i64_2f64(
; AVX256NODQ-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 64
; AVX256NODQ-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
; AVX256NODQ-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
; AVX256NODQ-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
; AVX256NODQ-NEXT: store double [[CVT0]], ptr @dst64, align 64
; AVX256NODQ-NEXT: store double [[CVT1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
; AVX256NODQ-NEXT: ret void
;
; AVX512-LABEL: @sitofp_2i64_2f64(
; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 64
; AVX512-NEXT: [[TMP2:%.*]] = sitofp <2 x i64> [[TMP1]] to <2 x double>
; AVX512-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64
; AVX512-NEXT: ret void
;
; AVX256DQ-LABEL: @sitofp_2i64_2f64(
; AVX256DQ-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 64
; AVX256DQ-NEXT: [[TMP2:%.*]] = sitofp <2 x i64> [[TMP1]] to <2 x double>
; AVX256DQ-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64
; AVX256DQ-NEXT: ret void
;
%ld0 = load i64, ptr @src64, align 64
%ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
%cvt0 = sitofp i64 %ld0 to double
%cvt1 = sitofp i64 %ld1 to double
store double %cvt0, ptr @dst64, align 64
store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
ret void
}
define void @sitofp_4i64_4f64() #0 {
; SSE-LABEL: @sitofp_4i64_4f64(
; SSE-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 64
; SSE-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
; SSE-NEXT: [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
; SSE-NEXT: [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
; SSE-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
; SSE-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
; SSE-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to double
; SSE-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to double
; SSE-NEXT: store double [[CVT0]], ptr @dst64, align 64
; SSE-NEXT: store double [[CVT1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
; SSE-NEXT: store double [[CVT2]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
; SSE-NEXT: store double [[CVT3]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
; SSE-NEXT: ret void
;
; AVX256NODQ-LABEL: @sitofp_4i64_4f64(
; AVX256NODQ-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 64
; AVX256NODQ-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
; AVX256NODQ-NEXT: [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
; AVX256NODQ-NEXT: [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
; AVX256NODQ-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
; AVX256NODQ-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
; AVX256NODQ-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to double
; AVX256NODQ-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to double
; AVX256NODQ-NEXT: store double [[CVT0]], ptr @dst64, align 64
; AVX256NODQ-NEXT: store double [[CVT1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
; AVX256NODQ-NEXT: store double [[CVT2]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
; AVX256NODQ-NEXT: store double [[CVT3]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
; AVX256NODQ-NEXT: ret void
;
; AVX512-LABEL: @sitofp_4i64_4f64(
; AVX512-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 64
; AVX512-NEXT: [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x double>
; AVX512-NEXT: store <4 x double> [[TMP2]], ptr @dst64, align 64
; AVX512-NEXT: ret void
;
; AVX256DQ-LABEL: @sitofp_4i64_4f64(
; AVX256DQ-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 64
; AVX256DQ-NEXT: [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x double>
; AVX256DQ-NEXT: store <4 x double> [[TMP2]], ptr @dst64, align 64
; AVX256DQ-NEXT: ret void
;
%ld0 = load i64, ptr @src64, align 64
%ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
%ld2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
%ld3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
%cvt0 = sitofp i64 %ld0 to double
%cvt1 = sitofp i64 %ld1 to double
%cvt2 = sitofp i64 %ld2 to double
%cvt3 = sitofp i64 %ld3 to double
store double %cvt0, ptr @dst64, align 64
store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
ret void
}
define void @sitofp_8i64_8f64() #0 {
; SSE-LABEL: @sitofp_8i64_8f64(
; SSE-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 64
; SSE-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
; SSE-NEXT: [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
; SSE-NEXT: [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
; SSE-NEXT: [[LD4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
; SSE-NEXT: [[LD5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 5), align 8
; SSE-NEXT: [[LD6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 6), align 16
; SSE-NEXT: [[LD7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 7), align 8
; SSE-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
; SSE-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
; SSE-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to double
; SSE-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to double
; SSE-NEXT: [[CVT4:%.*]] = sitofp i64 [[LD4]] to double
; SSE-NEXT: [[CVT5:%.*]] = sitofp i64 [[LD5]] to double
; SSE-NEXT: [[CVT6:%.*]] = sitofp i64 [[LD6]] to double
; SSE-NEXT: [[CVT7:%.*]] = sitofp i64 [[LD7]] to double
; SSE-NEXT: store double [[CVT0]], ptr @dst64, align 64
; SSE-NEXT: store double [[CVT1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
; SSE-NEXT: store double [[CVT2]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
; SSE-NEXT: store double [[CVT3]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
; SSE-NEXT: store double [[CVT4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
; SSE-NEXT: store double [[CVT5]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
; SSE-NEXT: store double [[CVT6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
; SSE-NEXT: store double [[CVT7]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
; SSE-NEXT: ret void
;
; AVX256NODQ-LABEL: @sitofp_8i64_8f64(
; AVX256NODQ-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 64
; AVX256NODQ-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
; AVX256NODQ-NEXT: [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
; AVX256NODQ-NEXT: [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
; AVX256NODQ-NEXT: [[LD4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
; AVX256NODQ-NEXT: [[LD5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 5), align 8
; AVX256NODQ-NEXT: [[LD6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 6), align 16
; AVX256NODQ-NEXT: [[LD7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 7), align 8
; AVX256NODQ-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
; AVX256NODQ-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
; AVX256NODQ-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to double
; AVX256NODQ-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to double
; AVX256NODQ-NEXT: [[CVT4:%.*]] = sitofp i64 [[LD4]] to double
; AVX256NODQ-NEXT: [[CVT5:%.*]] = sitofp i64 [[LD5]] to double
; AVX256NODQ-NEXT: [[CVT6:%.*]] = sitofp i64 [[LD6]] to double
; AVX256NODQ-NEXT: [[CVT7:%.*]] = sitofp i64 [[LD7]] to double
; AVX256NODQ-NEXT: store double [[CVT0]], ptr @dst64, align 64
; AVX256NODQ-NEXT: store double [[CVT1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
; AVX256NODQ-NEXT: store double [[CVT2]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
; AVX256NODQ-NEXT: store double [[CVT3]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
; AVX256NODQ-NEXT: store double [[CVT4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
; AVX256NODQ-NEXT: store double [[CVT5]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
; AVX256NODQ-NEXT: store double [[CVT6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
; AVX256NODQ-NEXT: store double [[CVT7]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
; AVX256NODQ-NEXT: ret void
;
; AVX512-LABEL: @sitofp_8i64_8f64(
; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @src64, align 64
; AVX512-NEXT: [[TMP2:%.*]] = sitofp <8 x i64> [[TMP1]] to <8 x double>
; AVX512-NEXT: store <8 x double> [[TMP2]], ptr @dst64, align 64
; AVX512-NEXT: ret void
;
; AVX256DQ-LABEL: @sitofp_8i64_8f64(
; AVX256DQ-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 64
; AVX256DQ-NEXT: [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x double>
; AVX256DQ-NEXT: store <4 x double> [[TMP2]], ptr @dst64, align 64
; AVX256DQ-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
; AVX256DQ-NEXT: [[TMP4:%.*]] = sitofp <4 x i64> [[TMP3]] to <4 x double>
; AVX256DQ-NEXT: store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
; AVX256DQ-NEXT: ret void
;
%ld0 = load i64, ptr @src64, align 64
%ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
%ld2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
%ld3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
%ld4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
%ld5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 5), align 8
%ld6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 6), align 16
%ld7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 7), align 8
%cvt0 = sitofp i64 %ld0 to double
%cvt1 = sitofp i64 %ld1 to double
%cvt2 = sitofp i64 %ld2 to double
%cvt3 = sitofp i64 %ld3 to double
%cvt4 = sitofp i64 %ld4 to double
%cvt5 = sitofp i64 %ld5 to double
%cvt6 = sitofp i64 %ld6 to double
%cvt7 = sitofp i64 %ld7 to double
store double %cvt0, ptr @dst64, align 64
store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
store double %cvt4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
store double %cvt5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
store double %cvt6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
store double %cvt7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
ret void
}
define void @sitofp_2i32_2f64() #0 {
; CHECK-LABEL: @sitofp_2i32_2f64(
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @src32, align 64
; CHECK-NEXT: [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x double>
; CHECK-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64
; CHECK-NEXT: ret void
;
%ld0 = load i32, ptr @src32, align 64
%ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4
%cvt0 = sitofp i32 %ld0 to double
%cvt1 = sitofp i32 %ld1 to double
store double %cvt0, ptr @dst64, align 64
store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
ret void
}
define void @sitofp_4i32_4f64() #0 {
; SSE-LABEL: @sitofp_4i32_4f64(
; SSE-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @src32, align 64
; SSE-NEXT: [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x double>
; SSE-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64
; SSE-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
; SSE-NEXT: [[TMP4:%.*]] = sitofp <2 x i32> [[TMP3]] to <2 x double>
; SSE-NEXT: store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
; SSE-NEXT: ret void
;
; AVX-LABEL: @sitofp_4i32_4f64(
; AVX-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64
; AVX-NEXT: [[TMP2:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x double>
; AVX-NEXT: store <4 x double> [[TMP2]], ptr @dst64, align 64
; AVX-NEXT: ret void
;
%ld0 = load i32, ptr @src32, align 64
%ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4
%ld2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
%ld3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3), align 4
%cvt0 = sitofp i32 %ld0 to double
%cvt1 = sitofp i32 %ld1 to double
%cvt2 = sitofp i32 %ld2 to double
%cvt3 = sitofp i32 %ld3 to double
store double %cvt0, ptr @dst64, align 64
store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
ret void
}
define void @sitofp_8i32_8f64() #0 {
; SSE-LABEL: @sitofp_8i32_8f64(
; SSE-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @src32, align 64
; SSE-NEXT: [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x double>
; SSE-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64
; SSE-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
; SSE-NEXT: [[TMP4:%.*]] = sitofp <2 x i32> [[TMP3]] to <2 x double>
; SSE-NEXT: store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
; SSE-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
; SSE-NEXT: [[TMP6:%.*]] = sitofp <2 x i32> [[TMP5]] to <2 x double>
; SSE-NEXT: store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
; SSE-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 6), align 8
; SSE-NEXT: [[TMP8:%.*]] = sitofp <2 x i32> [[TMP7]] to <2 x double>
; SSE-NEXT: store <2 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
; SSE-NEXT: ret void
;
; AVX256-LABEL: @sitofp_8i32_8f64(
; AVX256-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64
; AVX256-NEXT: [[TMP2:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x double>
; AVX256-NEXT: store <4 x double> [[TMP2]], ptr @dst64, align 64
; AVX256-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
; AVX256-NEXT: [[TMP4:%.*]] = sitofp <4 x i32> [[TMP3]] to <4 x double>
; AVX256-NEXT: store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
; AVX256-NEXT: ret void
;
; AVX512-LABEL: @sitofp_8i32_8f64(
; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 64
; AVX512-NEXT: [[TMP2:%.*]] = sitofp <8 x i32> [[TMP1]] to <8 x double>
; AVX512-NEXT: store <8 x double> [[TMP2]], ptr @dst64, align 64
; AVX512-NEXT: ret void
;
%ld0 = load i32, ptr @src32, align 64
%ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4
%ld2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
%ld3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3), align 4
%ld4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
%ld5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 5), align 4
%ld6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 6), align 8
%ld7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 7), align 4
%cvt0 = sitofp i32 %ld0 to double
%cvt1 = sitofp i32 %ld1 to double
%cvt2 = sitofp i32 %ld2 to double
%cvt3 = sitofp i32 %ld3 to double
%cvt4 = sitofp i32 %ld4 to double
%cvt5 = sitofp i32 %ld5 to double
%cvt6 = sitofp i32 %ld6 to double
%cvt7 = sitofp i32 %ld7 to double
store double %cvt0, ptr @dst64, align 64
store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
store double %cvt4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
store double %cvt5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
store double %cvt6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
store double %cvt7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
ret void
}
define void @sitofp_2i16_2f64() #0 {
; CHECK-LABEL: @sitofp_2i16_2f64(
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr @src16, align 64
; CHECK-NEXT: [[TMP2:%.*]] = sitofp <2 x i16> [[TMP1]] to <2 x double>
; CHECK-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64
; CHECK-NEXT: ret void
;
%ld0 = load i16, ptr @src16, align 64
%ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2
%cvt0 = sitofp i16 %ld0 to double
%cvt1 = sitofp i16 %ld1 to double
store double %cvt0, ptr @dst64, align 64
store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
ret void
}
define void @sitofp_4i16_4f64() #0 {
; SSE-LABEL: @sitofp_4i16_4f64(
; SSE-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr @src16, align 64
; SSE-NEXT: [[TMP2:%.*]] = sitofp <2 x i16> [[TMP1]] to <2 x double>
; SSE-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64
; SSE-NEXT: [[TMP3:%.*]] = load <2 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
; SSE-NEXT: [[TMP4:%.*]] = sitofp <2 x i16> [[TMP3]] to <2 x double>
; SSE-NEXT: store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
; SSE-NEXT: ret void
;
; AVX-LABEL: @sitofp_4i16_4f64(
; AVX-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64
; AVX-NEXT: [[TMP2:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x double>
; AVX-NEXT: store <4 x double> [[TMP2]], ptr @dst64, align 64
; AVX-NEXT: ret void
;
%ld0 = load i16, ptr @src16, align 64
%ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2
%ld2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
%ld3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3), align 2
%cvt0 = sitofp i16 %ld0 to double
%cvt1 = sitofp i16 %ld1 to double
%cvt2 = sitofp i16 %ld2 to double
%cvt3 = sitofp i16 %ld3 to double
store double %cvt0, ptr @dst64, align 64
store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
ret void
}
define void @sitofp_8i16_8f64() #0 {
; SSE-LABEL: @sitofp_8i16_8f64(
; SSE-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr @src16, align 64
; SSE-NEXT: [[TMP2:%.*]] = sitofp <2 x i16> [[TMP1]] to <2 x double>
; SSE-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64
; SSE-NEXT: [[TMP3:%.*]] = load <2 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
; SSE-NEXT: [[TMP4:%.*]] = sitofp <2 x i16> [[TMP3]] to <2 x double>
; SSE-NEXT: store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
; SSE-NEXT: [[TMP5:%.*]] = load <2 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
; SSE-NEXT: [[TMP6:%.*]] = sitofp <2 x i16> [[TMP5]] to <2 x double>
; SSE-NEXT: store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
; SSE-NEXT: [[TMP7:%.*]] = load <2 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 6), align 4
; SSE-NEXT: [[TMP8:%.*]] = sitofp <2 x i16> [[TMP7]] to <2 x double>
; SSE-NEXT: store <2 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
; SSE-NEXT: ret void
;
; AVX256-LABEL: @sitofp_8i16_8f64(
; AVX256-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64
; AVX256-NEXT: [[TMP2:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x double>
; AVX256-NEXT: store <4 x double> [[TMP2]], ptr @dst64, align 64
; AVX256-NEXT: [[TMP3:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
; AVX256-NEXT: [[TMP4:%.*]] = sitofp <4 x i16> [[TMP3]] to <4 x double>
; AVX256-NEXT: store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
; AVX256-NEXT: ret void
;
; AVX512-LABEL: @sitofp_8i16_8f64(
; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 64
; AVX512-NEXT: [[TMP2:%.*]] = sitofp <8 x i16> [[TMP1]] to <8 x double>
; AVX512-NEXT: store <8 x double> [[TMP2]], ptr @dst64, align 64
; AVX512-NEXT: ret void
;
%ld0 = load i16, ptr @src16, align 64
%ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2
%ld2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
%ld3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3), align 2
%ld4 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
%ld5 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 5), align 2
%ld6 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 6), align 4
%ld7 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 7), align 2
%cvt0 = sitofp i16 %ld0 to double
%cvt1 = sitofp i16 %ld1 to double
%cvt2 = sitofp i16 %ld2 to double
%cvt3 = sitofp i16 %ld3 to double
%cvt4 = sitofp i16 %ld4 to double
%cvt5 = sitofp i16 %ld5 to double
%cvt6 = sitofp i16 %ld6 to double
%cvt7 = sitofp i16 %ld7 to double
store double %cvt0, ptr @dst64, align 64
store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
store double %cvt4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
store double %cvt5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
store double %cvt6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
store double %cvt7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
ret void
}
define void @sitofp_2i8_2f64() #0 {
; CHECK-LABEL: @sitofp_2i8_2f64(
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr @src8, align 64
; CHECK-NEXT: [[TMP2:%.*]] = sitofp <2 x i8> [[TMP1]] to <2 x double>
; CHECK-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64
; CHECK-NEXT: ret void
;
%ld0 = load i8, ptr @src8, align 64
%ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1
%cvt0 = sitofp i8 %ld0 to double
%cvt1 = sitofp i8 %ld1 to double
store double %cvt0, ptr @dst64, align 64
store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
ret void
}
define void @sitofp_4i8_4f64() #0 {
; SSE-LABEL: @sitofp_4i8_4f64(
; SSE-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr @src8, align 64
; SSE-NEXT: [[TMP2:%.*]] = sitofp <2 x i8> [[TMP1]] to <2 x double>
; SSE-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64
; SSE-NEXT: [[TMP3:%.*]] = load <2 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
; SSE-NEXT: [[TMP4:%.*]] = sitofp <2 x i8> [[TMP3]] to <2 x double>
; SSE-NEXT: store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
; SSE-NEXT: ret void
;
; AVX-LABEL: @sitofp_4i8_4f64(
; AVX-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64
; AVX-NEXT: [[TMP2:%.*]] = sitofp <4 x i8> [[TMP1]] to <4 x double>
; AVX-NEXT: store <4 x double> [[TMP2]], ptr @dst64, align 64
; AVX-NEXT: ret void
;
%ld0 = load i8, ptr @src8, align 64
%ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1
%ld2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
%ld3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3), align 1
%cvt0 = sitofp i8 %ld0 to double
%cvt1 = sitofp i8 %ld1 to double
%cvt2 = sitofp i8 %ld2 to double
%cvt3 = sitofp i8 %ld3 to double
store double %cvt0, ptr @dst64, align 64
store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
ret void
}
define void @sitofp_8i8_8f64() #0 {
; SSE-LABEL: @sitofp_8i8_8f64(
; SSE-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr @src8, align 64
; SSE-NEXT: [[TMP2:%.*]] = sitofp <2 x i8> [[TMP1]] to <2 x double>
; SSE-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64
; SSE-NEXT: [[TMP3:%.*]] = load <2 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
; SSE-NEXT: [[TMP4:%.*]] = sitofp <2 x i8> [[TMP3]] to <2 x double>
; SSE-NEXT: store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
; SSE-NEXT: [[TMP5:%.*]] = load <2 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
; SSE-NEXT: [[TMP6:%.*]] = sitofp <2 x i8> [[TMP5]] to <2 x double>
; SSE-NEXT: store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
; SSE-NEXT: [[TMP7:%.*]] = load <2 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 6), align 2
; SSE-NEXT: [[TMP8:%.*]] = sitofp <2 x i8> [[TMP7]] to <2 x double>
; SSE-NEXT: store <2 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
; SSE-NEXT: ret void
;
; AVX256-LABEL: @sitofp_8i8_8f64(
; AVX256-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64
; AVX256-NEXT: [[TMP2:%.*]] = sitofp <4 x i8> [[TMP1]] to <4 x double>
; AVX256-NEXT: store <4 x double> [[TMP2]], ptr @dst64, align 64
; AVX256-NEXT: [[TMP3:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
; AVX256-NEXT: [[TMP4:%.*]] = sitofp <4 x i8> [[TMP3]] to <4 x double>
; AVX256-NEXT: store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
; AVX256-NEXT: ret void
;
; AVX512-LABEL: @sitofp_8i8_8f64(
; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @src8, align 64
; AVX512-NEXT: [[TMP2:%.*]] = sitofp <8 x i8> [[TMP1]] to <8 x double>
; AVX512-NEXT: store <8 x double> [[TMP2]], ptr @dst64, align 64
; AVX512-NEXT: ret void
;
%ld0 = load i8, ptr @src8, align 64
%ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1
%ld2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
%ld3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3), align 1
%ld4 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
%ld5 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 5), align 1
%ld6 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 6), align 2
%ld7 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 7), align 1
%cvt0 = sitofp i8 %ld0 to double
%cvt1 = sitofp i8 %ld1 to double
%cvt2 = sitofp i8 %ld2 to double
%cvt3 = sitofp i8 %ld3 to double
%cvt4 = sitofp i8 %ld4 to double
%cvt5 = sitofp i8 %ld5 to double
%cvt6 = sitofp i8 %ld6 to double
%cvt7 = sitofp i8 %ld7 to double
store double %cvt0, ptr @dst64, align 64
store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
store double %cvt4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
store double %cvt5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
store double %cvt6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
store double %cvt7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
ret void
}
;
; SITOFP to vXf32
;
define void @sitofp_2i64_2f32() #0 {
; SSE-LABEL: @sitofp_2i64_2f32(
; SSE-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 64
; SSE-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
; SSE-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to float
; SSE-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to float
; SSE-NEXT: store float [[CVT0]], ptr @dst32, align 64
; SSE-NEXT: store float [[CVT1]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
; SSE-NEXT: ret void
;
; AVX256NODQ-LABEL: @sitofp_2i64_2f32(
; AVX256NODQ-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 64
; AVX256NODQ-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
; AVX256NODQ-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to float
; AVX256NODQ-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to float
; AVX256NODQ-NEXT: store float [[CVT0]], ptr @dst32, align 64
; AVX256NODQ-NEXT: store float [[CVT1]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
; AVX256NODQ-NEXT: ret void
;
; AVX512-LABEL: @sitofp_2i64_2f32(
; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 64
; AVX512-NEXT: [[TMP2:%.*]] = sitofp <2 x i64> [[TMP1]] to <2 x float>
; AVX512-NEXT: store <2 x float> [[TMP2]], ptr @dst32, align 64
; AVX512-NEXT: ret void
;
; AVX256DQ-LABEL: @sitofp_2i64_2f32(
; AVX256DQ-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 64
; AVX256DQ-NEXT: [[TMP2:%.*]] = sitofp <2 x i64> [[TMP1]] to <2 x float>
; AVX256DQ-NEXT: store <2 x float> [[TMP2]], ptr @dst32, align 64
; AVX256DQ-NEXT: ret void
;
%ld0 = load i64, ptr @src64, align 64
%ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
%cvt0 = sitofp i64 %ld0 to float
%cvt1 = sitofp i64 %ld1 to float
store float %cvt0, ptr @dst32, align 64
store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
ret void
}
define void @sitofp_4i64_4f32() #0 {
; CHECK-LABEL: @sitofp_4i64_4f32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 64
; CHECK-NEXT: [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float>
; CHECK-NEXT: store <4 x float> [[TMP2]], ptr @dst32, align 64
; CHECK-NEXT: ret void
;
%ld0 = load i64, ptr @src64, align 64
%ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
%ld2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
%ld3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
%cvt0 = sitofp i64 %ld0 to float
%cvt1 = sitofp i64 %ld1 to float
%cvt2 = sitofp i64 %ld2 to float
%cvt3 = sitofp i64 %ld3 to float
store float %cvt0, ptr @dst32, align 64
store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
ret void
}
define void @sitofp_8i64_8f32() #0 {
; SSE-LABEL: @sitofp_8i64_8f32(
; SSE-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 64
; SSE-NEXT: [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float>
; SSE-NEXT: store <4 x float> [[TMP2]], ptr @dst32, align 64
; SSE-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
; SSE-NEXT: [[TMP4:%.*]] = sitofp <4 x i64> [[TMP3]] to <4 x float>
; SSE-NEXT: store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
; SSE-NEXT: ret void
;
; AVX-LABEL: @sitofp_8i64_8f32(
; AVX-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @src64, align 64
; AVX-NEXT: [[TMP2:%.*]] = sitofp <8 x i64> [[TMP1]] to <8 x float>
; AVX-NEXT: store <8 x float> [[TMP2]], ptr @dst32, align 64
; AVX-NEXT: ret void
;
%ld0 = load i64, ptr @src64, align 64
%ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
%ld2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
%ld3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
%ld4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
%ld5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 5), align 8
%ld6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 6), align 16
%ld7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 7), align 8
%cvt0 = sitofp i64 %ld0 to float
%cvt1 = sitofp i64 %ld1 to float
%cvt2 = sitofp i64 %ld2 to float
%cvt3 = sitofp i64 %ld3 to float
%cvt4 = sitofp i64 %ld4 to float
%cvt5 = sitofp i64 %ld5 to float
%cvt6 = sitofp i64 %ld6 to float
%cvt7 = sitofp i64 %ld7 to float
store float %cvt0, ptr @dst32, align 64
store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
store float %cvt4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
store float %cvt5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
store float %cvt6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 8
store float %cvt7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
ret void
}
define void @sitofp_4i32_4f32() #0 {
; CHECK-LABEL: @sitofp_4i32_4f32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64
; CHECK-NEXT: [[TMP2:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x float>
; CHECK-NEXT: store <4 x float> [[TMP2]], ptr @dst32, align 64
; CHECK-NEXT: ret void
;
%ld0 = load i32, ptr @src32, align 64
%ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4
%ld2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
%ld3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3), align 4
%cvt0 = sitofp i32 %ld0 to float
%cvt1 = sitofp i32 %ld1 to float
%cvt2 = sitofp i32 %ld2 to float
%cvt3 = sitofp i32 %ld3 to float
store float %cvt0, ptr @dst32, align 64
store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
ret void
}
define void @sitofp_8i32_8f32() #0 {
; SSE-LABEL: @sitofp_8i32_8f32(
; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64
; SSE-NEXT: [[TMP2:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x float>
; SSE-NEXT: store <4 x float> [[TMP2]], ptr @dst32, align 64
; SSE-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
; SSE-NEXT: [[TMP4:%.*]] = sitofp <4 x i32> [[TMP3]] to <4 x float>
; SSE-NEXT: store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
; SSE-NEXT: ret void
;
; AVX-LABEL: @sitofp_8i32_8f32(
; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 64
; AVX-NEXT: [[TMP2:%.*]] = sitofp <8 x i32> [[TMP1]] to <8 x float>
; AVX-NEXT: store <8 x float> [[TMP2]], ptr @dst32, align 64
; AVX-NEXT: ret void
;
%ld0 = load i32, ptr @src32, align 64
%ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4
%ld2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
%ld3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3), align 4
%ld4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
%ld5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 5), align 4
%ld6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 6), align 8
%ld7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 7), align 4
%cvt0 = sitofp i32 %ld0 to float
%cvt1 = sitofp i32 %ld1 to float
%cvt2 = sitofp i32 %ld2 to float
%cvt3 = sitofp i32 %ld3 to float
%cvt4 = sitofp i32 %ld4 to float
%cvt5 = sitofp i32 %ld5 to float
%cvt6 = sitofp i32 %ld6 to float
%cvt7 = sitofp i32 %ld7 to float
store float %cvt0, ptr @dst32, align 64
store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
store float %cvt4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
store float %cvt5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
store float %cvt6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 8
store float %cvt7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
ret void
}
define void @sitofp_16i32_16f32() #0 {
; SSE-LABEL: @sitofp_16i32_16f32(
; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64
; SSE-NEXT: [[TMP2:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x float>
; SSE-NEXT: store <4 x float> [[TMP2]], ptr @dst32, align 64
; SSE-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
; SSE-NEXT: [[TMP4:%.*]] = sitofp <4 x i32> [[TMP3]] to <4 x float>
; SSE-NEXT: store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
; SSE-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 8), align 32
; SSE-NEXT: [[TMP6:%.*]] = sitofp <4 x i32> [[TMP5]] to <4 x float>
; SSE-NEXT: store <4 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
; SSE-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 12), align 16
; SSE-NEXT: [[TMP8:%.*]] = sitofp <4 x i32> [[TMP7]] to <4 x float>
; SSE-NEXT: store <4 x float> [[TMP8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
; SSE-NEXT: ret void
;
; AVX256-LABEL: @sitofp_16i32_16f32(
; AVX256-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 64
; AVX256-NEXT: [[TMP2:%.*]] = sitofp <8 x i32> [[TMP1]] to <8 x float>
; AVX256-NEXT: store <8 x float> [[TMP2]], ptr @dst32, align 64
; AVX256-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 8), align 32
; AVX256-NEXT: [[TMP4:%.*]] = sitofp <8 x i32> [[TMP3]] to <8 x float>
; AVX256-NEXT: store <8 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
; AVX256-NEXT: ret void
;
; AVX512-LABEL: @sitofp_16i32_16f32(
; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @src32, align 64
; AVX512-NEXT: [[TMP2:%.*]] = sitofp <16 x i32> [[TMP1]] to <16 x float>
; AVX512-NEXT: store <16 x float> [[TMP2]], ptr @dst32, align 64
; AVX512-NEXT: ret void
;
%ld0 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 0 ), align 64
%ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1 ), align 4
%ld2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2 ), align 8
%ld3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3 ), align 4
%ld4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4 ), align 16
%ld5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 5 ), align 4
%ld6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 6 ), align 8
%ld7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 7 ), align 4
%ld8 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 8 ), align 32
%ld9 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 9 ), align 4
%ld10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 10), align 8
%ld11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 11), align 4
%ld12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 12), align 16
%ld13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 13), align 4
%ld14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 14), align 8
%ld15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 15), align 4
%cvt0 = sitofp i32 %ld0 to float
%cvt1 = sitofp i32 %ld1 to float
%cvt2 = sitofp i32 %ld2 to float
%cvt3 = sitofp i32 %ld3 to float
%cvt4 = sitofp i32 %ld4 to float
%cvt5 = sitofp i32 %ld5 to float
%cvt6 = sitofp i32 %ld6 to float
%cvt7 = sitofp i32 %ld7 to float
%cvt8 = sitofp i32 %ld8 to float
%cvt9 = sitofp i32 %ld9 to float
%cvt10 = sitofp i32 %ld10 to float
%cvt11 = sitofp i32 %ld11 to float
%cvt12 = sitofp i32 %ld12 to float
%cvt13 = sitofp i32 %ld13 to float
%cvt14 = sitofp i32 %ld14 to float
%cvt15 = sitofp i32 %ld15 to float
store float %cvt0 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 0 ), align 64
store float %cvt1 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1 ), align 4
store float %cvt2 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2 ), align 8
store float %cvt3 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3 ), align 4
store float %cvt4 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4 ), align 16
store float %cvt5 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5 ), align 4
store float %cvt6 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6 ), align 8
store float %cvt7 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7 ), align 4
store float %cvt8 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8 ), align 32
store float %cvt9 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 9 ), align 4
store float %cvt10, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 8
store float %cvt11, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
store float %cvt12, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
store float %cvt13, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
store float %cvt14, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 8
store float %cvt15, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
ret void
}
define void @sitofp_4i16_4f32() #0 {
; CHECK-LABEL: @sitofp_4i16_4f32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64
; CHECK-NEXT: [[TMP2:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x float>
; CHECK-NEXT: store <4 x float> [[TMP2]], ptr @dst32, align 64
; CHECK-NEXT: ret void
;
%ld0 = load i16, ptr @src16, align 64
%ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2
%ld2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
%ld3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3), align 2
%cvt0 = sitofp i16 %ld0 to float
%cvt1 = sitofp i16 %ld1 to float
%cvt2 = sitofp i16 %ld2 to float
%cvt3 = sitofp i16 %ld3 to float
store float %cvt0, ptr @dst32, align 64
store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
ret void
}
define void @sitofp_8i16_8f32() #0 {
; SSE-LABEL: @sitofp_8i16_8f32(
; SSE-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64
; SSE-NEXT: [[TMP2:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x float>
; SSE-NEXT: store <4 x float> [[TMP2]], ptr @dst32, align 64
; SSE-NEXT: [[TMP3:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
; SSE-NEXT: [[TMP4:%.*]] = sitofp <4 x i16> [[TMP3]] to <4 x float>
; SSE-NEXT: store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
; SSE-NEXT: ret void
;
; AVX-LABEL: @sitofp_8i16_8f32(
; AVX-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 64
; AVX-NEXT: [[TMP2:%.*]] = sitofp <8 x i16> [[TMP1]] to <8 x float>
; AVX-NEXT: store <8 x float> [[TMP2]], ptr @dst32, align 64
; AVX-NEXT: ret void
;
%ld0 = load i16, ptr @src16, align 64
%ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2
%ld2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
%ld3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3), align 2
%ld4 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
%ld5 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 5), align 2
%ld6 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 6), align 4
%ld7 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 7), align 2
%cvt0 = sitofp i16 %ld0 to float
%cvt1 = sitofp i16 %ld1 to float
%cvt2 = sitofp i16 %ld2 to float
%cvt3 = sitofp i16 %ld3 to float
%cvt4 = sitofp i16 %ld4 to float
%cvt5 = sitofp i16 %ld5 to float
%cvt6 = sitofp i16 %ld6 to float
%cvt7 = sitofp i16 %ld7 to float
store float %cvt0, ptr @dst32, align 64
store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
store float %cvt4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
store float %cvt5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
store float %cvt6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 8
store float %cvt7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
ret void
}
define void @sitofp_16i16_16f32() #0 {
; SSE-LABEL: @sitofp_16i16_16f32(
; SSE-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64
; SSE-NEXT: [[TMP2:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x float>
; SSE-NEXT: store <4 x float> [[TMP2]], ptr @dst32, align 64
; SSE-NEXT: [[TMP3:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
; SSE-NEXT: [[TMP4:%.*]] = sitofp <4 x i16> [[TMP3]] to <4 x float>
; SSE-NEXT: store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
; SSE-NEXT: [[TMP5:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 8), align 16
; SSE-NEXT: [[TMP6:%.*]] = sitofp <4 x i16> [[TMP5]] to <4 x float>
; SSE-NEXT: store <4 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
; SSE-NEXT: [[TMP7:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 12), align 8
; SSE-NEXT: [[TMP8:%.*]] = sitofp <4 x i16> [[TMP7]] to <4 x float>
; SSE-NEXT: store <4 x float> [[TMP8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
; SSE-NEXT: ret void
;
; AVX256-LABEL: @sitofp_16i16_16f32(
; AVX256-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 64
; AVX256-NEXT: [[TMP2:%.*]] = sitofp <8 x i16> [[TMP1]] to <8 x float>
; AVX256-NEXT: store <8 x float> [[TMP2]], ptr @dst32, align 64
; AVX256-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 8), align 16
; AVX256-NEXT: [[TMP4:%.*]] = sitofp <8 x i16> [[TMP3]] to <8 x float>
; AVX256-NEXT: store <8 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
; AVX256-NEXT: ret void
;
; AVX512-LABEL: @sitofp_16i16_16f32(
; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @src16, align 64
; AVX512-NEXT: [[TMP2:%.*]] = sitofp <16 x i16> [[TMP1]] to <16 x float>
; AVX512-NEXT: store <16 x float> [[TMP2]], ptr @dst32, align 64
; AVX512-NEXT: ret void
;
%ld0 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 0 ), align 64
%ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1 ), align 2
%ld2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2 ), align 4
%ld3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3 ), align 2
%ld4 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4 ), align 8
%ld5 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 5 ), align 2
%ld6 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 6 ), align 4
%ld7 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 7 ), align 2
%ld8 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 8 ), align 16
%ld9 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 9 ), align 2
%ld10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 10), align 4
%ld11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 11), align 2
%ld12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 12), align 8
%ld13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 13), align 2
%ld14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 14), align 4
%ld15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 15), align 2
%cvt0 = sitofp i16 %ld0 to float
%cvt1 = sitofp i16 %ld1 to float
%cvt2 = sitofp i16 %ld2 to float
%cvt3 = sitofp i16 %ld3 to float
%cvt4 = sitofp i16 %ld4 to float
%cvt5 = sitofp i16 %ld5 to float
%cvt6 = sitofp i16 %ld6 to float
%cvt7 = sitofp i16 %ld7 to float
%cvt8 = sitofp i16 %ld8 to float
%cvt9 = sitofp i16 %ld9 to float
%cvt10 = sitofp i16 %ld10 to float
%cvt11 = sitofp i16 %ld11 to float
%cvt12 = sitofp i16 %ld12 to float
%cvt13 = sitofp i16 %ld13 to float
%cvt14 = sitofp i16 %ld14 to float
%cvt15 = sitofp i16 %ld15 to float
store float %cvt0 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 0 ), align 64
store float %cvt1 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1 ), align 4
store float %cvt2 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2 ), align 8
store float %cvt3 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3 ), align 4
store float %cvt4 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4 ), align 16
store float %cvt5 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5 ), align 4
store float %cvt6 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6 ), align 8
store float %cvt7 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7 ), align 4
store float %cvt8 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8 ), align 32
store float %cvt9 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 9 ), align 4
store float %cvt10, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 8
store float %cvt11, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
store float %cvt12, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
store float %cvt13, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
store float %cvt14, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 8
store float %cvt15, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
ret void
}
define void @sitofp_4i8_4f32() #0 {
; CHECK-LABEL: @sitofp_4i8_4f32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64
; CHECK-NEXT: [[TMP2:%.*]] = sitofp <4 x i8> [[TMP1]] to <4 x float>
; CHECK-NEXT: store <4 x float> [[TMP2]], ptr @dst32, align 64
; CHECK-NEXT: ret void
;
%ld0 = load i8, ptr @src8, align 64
%ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1
%ld2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
%ld3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3), align 1
%cvt0 = sitofp i8 %ld0 to float
%cvt1 = sitofp i8 %ld1 to float
%cvt2 = sitofp i8 %ld2 to float
%cvt3 = sitofp i8 %ld3 to float
store float %cvt0, ptr @dst32, align 64
store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
ret void
}
define void @sitofp_8i8_8f32() #0 {
; SSE-LABEL: @sitofp_8i8_8f32(
; SSE-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64
; SSE-NEXT: [[TMP2:%.*]] = sitofp <4 x i8> [[TMP1]] to <4 x float>
; SSE-NEXT: store <4 x float> [[TMP2]], ptr @dst32, align 64
; SSE-NEXT: [[TMP3:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
; SSE-NEXT: [[TMP4:%.*]] = sitofp <4 x i8> [[TMP3]] to <4 x float>
; SSE-NEXT: store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
; SSE-NEXT: ret void
;
; AVX-LABEL: @sitofp_8i8_8f32(
; AVX-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @src8, align 64
; AVX-NEXT: [[TMP2:%.*]] = sitofp <8 x i8> [[TMP1]] to <8 x float>
; AVX-NEXT: store <8 x float> [[TMP2]], ptr @dst32, align 64
; AVX-NEXT: ret void
;
%ld0 = load i8, ptr @src8, align 64
%ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1
%ld2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
%ld3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3), align 1
%ld4 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
%ld5 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 5), align 1
%ld6 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 6), align 2
%ld7 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 7), align 1
%cvt0 = sitofp i8 %ld0 to float
%cvt1 = sitofp i8 %ld1 to float
%cvt2 = sitofp i8 %ld2 to float
%cvt3 = sitofp i8 %ld3 to float
%cvt4 = sitofp i8 %ld4 to float
%cvt5 = sitofp i8 %ld5 to float
%cvt6 = sitofp i8 %ld6 to float
%cvt7 = sitofp i8 %ld7 to float
store float %cvt0, ptr @dst32, align 64
store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
store float %cvt4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
store float %cvt5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
store float %cvt6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 8
store float %cvt7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
ret void
}
define void @sitofp_16i8_16f32() #0 {
; SSE-LABEL: @sitofp_16i8_16f32(
; SSE-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64
; SSE-NEXT: [[TMP2:%.*]] = sitofp <4 x i8> [[TMP1]] to <4 x float>
; SSE-NEXT: store <4 x float> [[TMP2]], ptr @dst32, align 64
; SSE-NEXT: [[TMP3:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
; SSE-NEXT: [[TMP4:%.*]] = sitofp <4 x i8> [[TMP3]] to <4 x float>
; SSE-NEXT: store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
; SSE-NEXT: [[TMP5:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 8), align 8
; SSE-NEXT: [[TMP6:%.*]] = sitofp <4 x i8> [[TMP5]] to <4 x float>
; SSE-NEXT: store <4 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
; SSE-NEXT: [[TMP7:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 12), align 4
; SSE-NEXT: [[TMP8:%.*]] = sitofp <4 x i8> [[TMP7]] to <4 x float>
; SSE-NEXT: store <4 x float> [[TMP8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
; SSE-NEXT: ret void
;
; AVX256-LABEL: @sitofp_16i8_16f32(
; AVX256-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @src8, align 64
; AVX256-NEXT: [[TMP2:%.*]] = sitofp <8 x i8> [[TMP1]] to <8 x float>
; AVX256-NEXT: store <8 x float> [[TMP2]], ptr @dst32, align 64
; AVX256-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 8), align 8
; AVX256-NEXT: [[TMP4:%.*]] = sitofp <8 x i8> [[TMP3]] to <8 x float>
; AVX256-NEXT: store <8 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
; AVX256-NEXT: ret void
;
; AVX512-LABEL: @sitofp_16i8_16f32(
; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @src8, align 64
; AVX512-NEXT: [[TMP2:%.*]] = sitofp <16 x i8> [[TMP1]] to <16 x float>
; AVX512-NEXT: store <16 x float> [[TMP2]], ptr @dst32, align 64
; AVX512-NEXT: ret void
;
%ld0 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 0 ), align 64
%ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1 ), align 1
%ld2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2 ), align 2
%ld3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3 ), align 1
%ld4 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4 ), align 4
%ld5 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 5 ), align 1
%ld6 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 6 ), align 2
%ld7 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 7 ), align 1
%ld8 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 8 ), align 8
%ld9 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 9 ), align 1
%ld10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 10), align 2
%ld11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 11), align 1
%ld12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 12), align 4
%ld13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 13), align 1
%ld14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 14), align 2
%ld15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 15), align 1
%cvt0 = sitofp i8 %ld0 to float
%cvt1 = sitofp i8 %ld1 to float
%cvt2 = sitofp i8 %ld2 to float
%cvt3 = sitofp i8 %ld3 to float
%cvt4 = sitofp i8 %ld4 to float
%cvt5 = sitofp i8 %ld5 to float
%cvt6 = sitofp i8 %ld6 to float
%cvt7 = sitofp i8 %ld7 to float
%cvt8 = sitofp i8 %ld8 to float
%cvt9 = sitofp i8 %ld9 to float
%cvt10 = sitofp i8 %ld10 to float
%cvt11 = sitofp i8 %ld11 to float
%cvt12 = sitofp i8 %ld12 to float
%cvt13 = sitofp i8 %ld13 to float
%cvt14 = sitofp i8 %ld14 to float
%cvt15 = sitofp i8 %ld15 to float
store float %cvt0 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 0 ), align 64
store float %cvt1 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1 ), align 4
store float %cvt2 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2 ), align 8
store float %cvt3 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3 ), align 4
store float %cvt4 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4 ), align 16
store float %cvt5 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5 ), align 4
store float %cvt6 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6 ), align 8
store float %cvt7 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7 ), align 4
store float %cvt8 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8 ), align 32
store float %cvt9 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 9 ), align 4
store float %cvt10, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 8
store float %cvt11, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
store float %cvt12, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
store float %cvt13, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
store float %cvt14, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 8
store float %cvt15, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
ret void
}
;
; SITOFP BUILDVECTOR
;
define <4 x double> @sitofp_4xi32_4f64(i32 %a0, i32 %a1, i32 %a2, i32 %a3) #0 {
; SSE-LABEL: @sitofp_4xi32_4f64(
; SSE-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[A0:%.*]], i32 0
; SSE-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1
; SSE-NEXT: [[TMP3:%.*]] = sitofp <2 x i32> [[TMP2]] to <2 x double>
; SSE-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[A2:%.*]], i32 0
; SSE-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[A3:%.*]], i32 1
; SSE-NEXT: [[TMP6:%.*]] = sitofp <2 x i32> [[TMP5]] to <2 x double>
; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; SSE-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; SSE-NEXT: [[RES31:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; SSE-NEXT: ret <4 x double> [[RES31]]
;
; AVX-LABEL: @sitofp_4xi32_4f64(
; AVX-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[A0:%.*]], i32 0
; AVX-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1
; AVX-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A2:%.*]], i32 2
; AVX-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[A3:%.*]], i32 3
; AVX-NEXT: [[TMP5:%.*]] = sitofp <4 x i32> [[TMP4]] to <4 x double>
; AVX-NEXT: ret <4 x double> [[TMP5]]
;
%cvt0 = sitofp i32 %a0 to double
%cvt1 = sitofp i32 %a1 to double
%cvt2 = sitofp i32 %a2 to double
%cvt3 = sitofp i32 %a3 to double
%res0 = insertelement <4 x double> undef, double %cvt0, i32 0
%res1 = insertelement <4 x double> %res0, double %cvt1, i32 1
%res2 = insertelement <4 x double> %res1, double %cvt2, i32 2
%res3 = insertelement <4 x double> %res2, double %cvt3, i32 3
ret <4 x double> %res3
}
define <4 x double> @sitofp_with_const_4xi32_4f64(i32 %a2, i32 %a3) #0 {
; CHECK-LABEL: @sitofp_with_const_4xi32_4f64(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[A2:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[A3:%.*]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = sitofp <2 x i32> [[TMP2]] to <2 x double>
; CHECK-NEXT: [[RES0:%.*]] = insertelement <4 x double> undef, double 1.000000e+00, i32 3
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
; CHECK-NEXT: [[RES31:%.*]] = shufflevector <4 x double> [[RES0]], <4 x double> [[TMP4]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
; CHECK-NEXT: ret <4 x double> [[RES31]]
;
%cvt2 = sitofp i32 %a2 to double
%cvt3 = sitofp i32 %a3 to double
%res0 = insertelement <4 x double> undef, double 1.0, i32 3
%res2 = insertelement <4 x double> %res0, double %cvt2, i32 0
%res3 = insertelement <4 x double> %res2, double %cvt3, i32 1
ret <4 x double> %res3
}
define <4 x double> @sitofp_with_undef_4xi32_4f64(i32 %a2, i32 %a3) #0 {
; CHECK-LABEL: @sitofp_with_undef_4xi32_4f64(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[A2:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[A3:%.*]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = sitofp <2 x i32> [[TMP2]] to <2 x double>
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: ret <4 x double> [[TMP4]]
;
%cvt2 = sitofp i32 %a2 to double
%cvt3 = sitofp i32 %a3 to double
%res2 = insertelement <4 x double> undef, double %cvt2, i32 0
%res3 = insertelement <4 x double> %res2, double %cvt3, i32 1
ret <4 x double> %res3
}
define <4 x float> @sitofp_4xi32_4f32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) #0 {
; CHECK-LABEL: @sitofp_4xi32_4f32(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[A0:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A2:%.*]], i32 2
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[A3:%.*]], i32 3
; CHECK-NEXT: [[TMP5:%.*]] = sitofp <4 x i32> [[TMP4]] to <4 x float>
; CHECK-NEXT: ret <4 x float> [[TMP5]]
;
%cvt0 = sitofp i32 %a0 to float
%cvt1 = sitofp i32 %a1 to float
%cvt2 = sitofp i32 %a2 to float
%cvt3 = sitofp i32 %a3 to float
%res0 = insertelement <4 x float> undef, float %cvt0, i32 0
%res1 = insertelement <4 x float> %res0, float %cvt1, i32 1
%res2 = insertelement <4 x float> %res1, float %cvt2, i32 2
%res3 = insertelement <4 x float> %res2, float %cvt3, i32 3
ret <4 x float> %res3
}
attributes #0 = { nounwind }