This is a recommit of #107120 . The original PR was approved but failed buildbot. The newly added tests should only be run for compilers that support the ARM target. This has been resolved by adding a config file for these tests. - Pass optimizes memcpy's by padding out destinations and sources to a full word to make ARM backend generate full word loads instead of loading a single byte (ldrb) and/or half word (ldrh). Only pads destination when it's a stack allocated constant size array and source when it's constant string. Heuristic to decide whether to pad or not is very basic and could be improved to allow more examples to be padded. - Pass works at the midend level
23 lines
1.0 KiB
LLVM
23 lines
1.0 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s
|
|
|
|
@.i16 = private unnamed_addr constant [5 x i16] [i16 1, i16 2, i16 3, i16 4, i16 5] , align 1
|
|
|
|
define void @memcpy_i16_array() {
|
|
; CHECK-LABEL: define void @memcpy_i16_array() local_unnamed_addr {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: [[SOMETHING1:%.*]] = alloca [6 x i16], align 1
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) [[SOMETHING1]], ptr noundef nonnull align 1 dereferenceable(10) @.i16, i32 12, i1 false)
|
|
; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING1]])
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%something = alloca [5 x i16], align 1
|
|
call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) %something, ptr noundef nonnull align 1 dereferenceable(10) @.i16, i32 10, i1 false)
|
|
%call2 = call i32 @bar(ptr nonnull %something)
|
|
ret void
|
|
}
|
|
|
|
|
|
declare i32 @bar(...)
|