clang-p2996/llvm/test/Transforms/SLPVectorizer/X86/mul64.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S -mcpu=x86-64 | FileCheck %s
; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S -mcpu=x86-64-v2 | FileCheck %s
; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S -mcpu=x86-64-v3 | FileCheck %s
; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S -mcpu=x86-64-v4 | FileCheck %s

define void @PR62969(ptr dereferenceable(16) %out, ptr dereferenceable(16) %in) {
; CHECK-LABEL: @PR62969(
; CHECK-NEXT:    [[IN0:%.*]] = getelementptr inbounds [2 x i64], ptr [[IN:%.*]], i64 0, i64 0
; CHECK-NEXT:    [[OUT0:%.*]] = getelementptr inbounds [2 x i64], ptr [[OUT:%.*]], i64 0, i64 0
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[IN0]], align 8
; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i64> [[TMP1]], <i64 4294967295, i64 4294967295>
; CHECK-NEXT:    [[TMP3:%.*]] = lshr <2 x i64> [[TMP1]], <i64 32, i64 32>
; CHECK-NEXT:    [[TMP4:%.*]] = mul <2 x i64> [[TMP2]], [[TMP3]]
; CHECK-NEXT:    store <2 x i64> [[TMP4]], ptr [[OUT0]], align 8
; CHECK-NEXT:    ret void
;
  %in0 = getelementptr inbounds [2 x i64], ptr %in, i64 0, i64 0
  %in1 = getelementptr inbounds [2 x i64], ptr %in, i64 0, i64 1
  %x = load i64, ptr %in0, align 8
  %y = load i64, ptr %in1, align 8
  %xl = and i64 %x, 4294967295
  %yl = and i64 %y, 4294967295
  %xh = lshr i64 %x, 32
  %yh = lshr i64 %y, 32
  %m0 = mul i64 %xl, %xh
  %m1 = mul i64 %yl, %yh
  %out0 = getelementptr inbounds [2 x i64], ptr %out, i64 0, i64 0
  %out1 = getelementptr inbounds [2 x i64], ptr %out, i64 0, i64 1
  store i64 %m0, ptr %out0, align 8
  store i64 %m1, ptr %out1, align 8
  ret void
}