This will remove suboptimal branching from the generated ll/sc loops. The extra simplification pass affects a lot of testcases, which have been modified to accommodate this change: either by modifying the test to become immune to the CFG simplification, or (less preferablt) by adding option -hexagon-initial-cfg-clenaup=0. llvm-svn: 338774
26 lines
1.9 KiB
LLVM
26 lines
1.9 KiB
LLVM
; RUN: llc -march=hexagon < %s | FileCheck %s
|
|
|
|
; Check that this compiles successfully.
|
|
; CHECK: vunpack
|
|
|
|
target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
|
|
target triple = "hexagon"
|
|
|
|
define void @fred(<64 x i8>* %a0, <64 x i8>* %a1) #0 {
|
|
b0:
|
|
%v1 = load <64 x i8>, <64 x i8>* %a0, align 1
|
|
%v2 = sext <64 x i8> %v1 to <64 x i32>
|
|
%v3 = load <64 x i8>, <64 x i8>* %a1, align 1
|
|
%v4 = sext <64 x i8> %v3 to <64 x i32>
|
|
%v5 = mul nsw <64 x i32> %v4, %v2
|
|
%v6 = add nsw <64 x i32> %v5, zeroinitializer
|
|
%v7 = shl <64 x i32> %v6, <i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
|
|
%v8 = ashr exact <64 x i32> %v7, <i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
|
|
%v9 = mul nsw <64 x i32> %v8, %v8
|
|
%v10 = trunc <64 x i32> %v9 to <64 x i8>
|
|
store <64 x i8> %v10, <64 x i8>* %a0, align 1
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx-length64b,+hvxv60" }
|