[WebAssembly] Add intrinsics to wasm_simd128.h for all FP16 instructions (#106465)

Getting this to work required a few additional changes:
- Add builtins for any instructions that can't be done with plain C
currently.
- Add support for the saturating version of fp_to_<s,i>_I16x8. Other
vector sizes supported this already.
- Support bitcast of f16x8 to v128. Needed to return a __f16x8 as
v128_t.
This commit is contained in:
Brendan Dahl
2024-08-30 08:42:37 -07:00
committed by GitHub
parent 206b5aff44
commit 5703d8572f
7 changed files with 348 additions and 13 deletions

View File

@@ -2,7 +2,7 @@
// expected-no-diagnostics
// RUN: %clang %s -O2 -S -o - -target wasm32-unknown-unknown \
// RUN: -msimd128 -mrelaxed-simd -Wcast-qual -Werror | FileCheck %s
// RUN: -msimd128 -mrelaxed-simd -mfp16 -Wcast-qual -Werror | FileCheck %s
#include <wasm_simd128.h>
@@ -1385,3 +1385,139 @@ v128_t test_i16x8_relaxed_dot_i8x16_i7x16(v128_t a, v128_t b) {
v128_t test_i32x4_relaxed_dot_i8x16_i7x16_add(v128_t a, v128_t b, v128_t c) {
return wasm_i32x4_relaxed_dot_i8x16_i7x16_add(a, b, c);
}
// CHECK-LABEL: test_f16x8_splat:
// CHECK: f16x8.splat{{$}}
v128_t test_f16x8_splat(float a) { return wasm_f16x8_splat(a); }
// CHECK-LABEL: test_f16x8_extract_lane:
// CHECK: f16x8.extract_lane 7{{$}}
int16_t test_f16x8_extract_lane(v128_t a) {
return wasm_f16x8_extract_lane(a, 7);
}
// CHECK-LABEL: test_f16x8_replace_lane:
// CHECK: f16x8.replace_lane 7{{$}}
v128_t test_f16x8_replace_lane(v128_t a, float b) {
return wasm_f16x8_replace_lane(a, 7, b);
}
// CHECK-LABEL: test_f16x8_abs:
// CHECK: f16x8.abs{{$}}
v128_t test_f16x8_abs(v128_t a) { return wasm_f16x8_abs(a); }
// CHECK-LABEL: test_f16x8_neg:
// CHECK: f16x8.neg{{$}}
v128_t test_f16x8_neg(v128_t a) { return wasm_f16x8_neg(a); }
// CHECK-LABEL: test_f16x8_sqrt:
// CHECK: f16x8.sqrt{{$}}
v128_t test_f16x8_sqrt(v128_t a) { return wasm_f16x8_sqrt(a); }
// CHECK-LABEL: test_f16x8_ceil:
// CHECK: f16x8.ceil{{$}}
v128_t test_f16x8_ceil(v128_t a) { return wasm_f16x8_ceil(a); }
// CHECK-LABEL: test_f16x8_floor:
// CHECK: f16x8.floor{{$}}
v128_t test_f16x8_floor(v128_t a) { return wasm_f16x8_floor(a); }
// CHECK-LABEL: test_f16x8_trunc:
// CHECK: f16x8.trunc{{$}}
v128_t test_f16x8_trunc(v128_t a) { return wasm_f16x8_trunc(a); }
// CHECK-LABEL: test_f16x8_nearest:
// CHECK: f16x8.nearest{{$}}
v128_t test_f16x8_nearest(v128_t a) { return wasm_f16x8_nearest(a); }
// CHECK-LABEL: test_f16x8_add:
// CHECK: f16x8.add{{$}}
v128_t test_f16x8_add(v128_t a, v128_t b) { return wasm_f16x8_add(a, b); }
// CHECK-LABEL: test_f16x8_sub:
// CHECK: f16x8.sub{{$}}
v128_t test_f16x8_sub(v128_t a, v128_t b) { return wasm_f16x8_sub(a, b); }
// CHECK-LABEL: test_f16x8_mul:
// CHECK: f16x8.mul{{$}}
v128_t test_f16x8_mul(v128_t a, v128_t b) { return wasm_f16x8_mul(a, b); }
// CHECK-LABEL: test_f16x8_div:
// CHECK: f16x8.div{{$}}
v128_t test_f16x8_div(v128_t a, v128_t b) { return wasm_f16x8_div(a, b); }
// CHECK-LABEL: test_f16x8_min:
// CHECK: f16x8.min{{$}}
v128_t test_f16x8_min(v128_t a, v128_t b) { return wasm_f16x8_min(a, b); }
// CHECK-LABEL: test_f16x8_max:
// CHECK: f16x8.max{{$}}
v128_t test_f16x8_max(v128_t a, v128_t b) { return wasm_f16x8_max(a, b); }
// CHECK-LABEL: test_f16x8_pmin:
// CHECK: f16x8.pmin{{$}}
v128_t test_f16x8_pmin(v128_t a, v128_t b) { return wasm_f16x8_pmin(a, b); }
// CHECK-LABEL: test_f16x8_pmax:
// CHECK: f16x8.pmax{{$}}
v128_t test_f16x8_pmax(v128_t a, v128_t b) { return wasm_f16x8_pmax(a, b); }
// CHECK-LABEL: test_f16x8_eq:
// CHECK: f16x8.eq{{$}}
v128_t test_f16x8_eq(v128_t a, v128_t b) { return wasm_f16x8_eq(a, b); }
// CHECK-LABEL: test_f16x8_ne:
// CHECK: f16x8.ne{{$}}
v128_t test_f16x8_ne(v128_t a, v128_t b) { return wasm_f16x8_ne(a, b); }
// CHECK-LABEL: test_f16x8_lt:
// CHECK: f16x8.lt{{$}}
v128_t test_f16x8_lt(v128_t a, v128_t b) { return wasm_f16x8_lt(a, b); }
// CHECK-LABEL: test_f16x8_gt:
// CHECK: f16x8.gt{{$}}
v128_t test_f16x8_gt(v128_t a, v128_t b) { return wasm_f16x8_gt(a, b); }
// CHECK-LABEL: test_f16x8_le:
// CHECK: f16x8.le{{$}}
v128_t test_f16x8_le(v128_t a, v128_t b) { return wasm_f16x8_le(a, b); }
// CHECK-LABEL: test_f16x8_ge:
// CHECK: f16x8.ge{{$}}
v128_t test_f16x8_ge(v128_t a, v128_t b) { return wasm_f16x8_ge(a, b); }
// CHECK-LABEL: test_i16x8_trunc_sat_f16x8:
// CHECK: i16x8.trunc_sat_f16x8_s{{$}}
v128_t test_i16x8_trunc_sat_f16x8(v128_t a) {
return wasm_i16x8_trunc_sat_f16x8(a);
}
// CHECK-LABEL: test_u16x8_trunc_sat_f16x8:
// CHECK: i16x8.trunc_sat_f16x8_u{{$}}
v128_t test_u16x8_trunc_sat_f16x8(v128_t a) {
return wasm_u16x8_trunc_sat_f16x8(a);
}
// CHECK-LABEL: test_f16x8_convert_i16x8:
// CHECK: f16x8.convert_i16x8_s{{$}}
v128_t test_f16x8_convert_i16x8(v128_t a) {
return wasm_f16x8_convert_i16x8(a);
}
// CHECK-LABEL: test_f16x8_convert_u16x8:
// CHECK: f16x8.convert_i16x8_u{{$}}
v128_t test_f16x8_convert_u16x8(v128_t a) {
return wasm_f16x8_convert_u16x8(a);
}
// CHECK-LABEL: test_f16x8_relaxed_madd:
// CHECK: f16x8.relaxed_madd{{$}}
v128_t test_f16x8_relaxed_madd(v128_t a, v128_t b, v128_t c) {
return wasm_f16x8_relaxed_madd(a, b, c);
}
// CHECK-LABEL: test_f16x8_relaxed_nmadd:
// CHECK: f16x8.relaxed_nmadd{{$}}
v128_t test_f16x8_relaxed_nmadd(v128_t a, v128_t b, v128_t c) {
return wasm_f16x8_relaxed_nmadd(a, b, c);
}