[X86][AVX10.2] Remove YMM rounding from VCVT2PS2PHX (#132397)

Ref: https://cdrdv2.intel.com/v1/dl/getContent/784343
2025-03-21 15:51:51 +01:00
parent 0ea4fb9264
commit 924c7ea76a
14 changed files with 11 additions and 260 deletions
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -5006,7 +5006,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<
 }

 let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
-  def vcvt2ps2phx256_mask : X86Builtin<"_Vector<16, _Float16>(_Vector<8, float>, _Vector<8, float>, _Vector<16, _Float16>, unsigned short, _Constant int)">;
+  def vcvt2ps2phx256_mask : X86Builtin<"_Vector<16, _Float16>(_Vector<8, float>, _Vector<8, float>, _Vector<16, _Float16>, unsigned short)">;
 }

 let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
--- a/clang/lib/Headers/avx10_2convertintrin.h
+++ b/clang/lib/Headers/avx10_2convertintrin.h
@@ -178,8 +178,7 @@ _mm_maskz_cvtx2ps_ph(__mmask8 __U, __m128 __A, __m128 __B) {
 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvtx2ps_ph(__m256 __A,
                                                                  __m256 __B) {
  return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask(
-      (__v8sf)__A, (__v8sf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)(-1),
-      _MM_FROUND_CUR_DIRECTION);
+      (__v8sf)__A, (__v8sf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)(-1));
 }

 /// Convert two 256-bit vectors, \a __A and \a __B, containing packed
@@ -223,8 +222,7 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvtx2ps_ph(__m256 __A,
 static __inline__ __m256h __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtx2ps_ph(__m256h __W, __mmask16 __U, __m256 __A, __m256 __B) {
  return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask(
-      (__v8sf)__A, (__v8sf)__B, (__v16hf)__W, (__mmask16)__U,
-      _MM_FROUND_CUR_DIRECTION);
+      (__v8sf)__A, (__v8sf)__B, (__v16hf)__W, (__mmask16)__U);
 }

 /// Convert two 256-bit vectors, \a __A and \a __B, containing packed
@@ -266,142 +264,9 @@ _mm256_mask_cvtx2ps_ph(__m256h __W, __mmask16 __U, __m256 __A, __m256 __B) {
 static __inline__ __m256h __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtx2ps_ph(__mmask16 __U, __m256 __A, __m256 __B) {
  return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask(
-      (__v8sf)__A, (__v8sf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U,
-      _MM_FROUND_CUR_DIRECTION);
+      (__v8sf)__A, (__v8sf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U);
 }

-/// Convert two 256-bit vectors, \a __A and \a __B, containing packed
-///    single-precision (32-bit) floating-point elements to a 256-bit vector
-///    containing FP16 elements. Rounding mode \a __R needs to be provided.
-///   
-/// \code{.operation}
-/// FOR i := 0 to 15 
-/// 	IF i < 8
-/// 		dst.fp16[i] := convert_fp32_to_fp16(__B.fp32[i])
-/// 	ELSE
-/// 		dst.fp16[i] := convert_fp32_to_fp16(__A.fp32[i - 8])
-/// 	FI
-/// ENDFOR
-///
-/// dst[MAX:256] := 0
-/// \endcode
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the \c VCVT2PS2PHX instruction.
-///
-/// \param __A
-///    A 256-bit vector of [8 x float].
-/// \param __B
-///    A 256-bit vector of [8 x float].
-/// \param __R
-///    Rounding mode. Valid inputs are: _MM_FROUND_CUR_DIRECTION or
-///    result of bitwise or of _MM_FROUND_NO_EXC with at most one of the following:
-///    _MM_FROUND_TO_NEAREST_INT, _MM_FROUND_TO_NEG_INF, _MM_FROUND_TO_POS_INF,
-///    _MM_FROUND_TO_ZERO.
-/// \returns
-///    A 256-bit vector of [16 x fp16]. Lower elements correspond to the
-///    (converted) elements from \a __B; higher order elements correspond to the
-///    (converted) elements from \a __A.
-#define _mm256_cvtx_round2ps_ph(__A, __B, __R)                                       \
-  ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask(                                \
-      (__v8sf)(__A), (__v8sf)(__B), (__v16hf)_mm256_undefined_ph(),                \
-      (__mmask16)(-1), (const int)(__R)))
-
-/// Convert two 256-bit vectors, \a __A and \a __B, containing packed
-///    single-precision (32-bit) floating-point elements to a 256-bit vector
-///    containing FP16 elements. Merging mask \a __U is used to determine if given
-///    element should be taken from \a __W instead. Rounding mode \a __R needs to
-///    be provided.
-///
-/// \code{.operation}
-/// FOR i := 0 to 15
-/// 	IF __U[i]
-/// 		IF i < 8
-/// 			dst.fp16[i] := convert_fp32_to_fp16(__B.fp32[i])
-/// 		ELSE
-/// 			dst.fp16[i] := convert_fp32_to_fp16(__A.fp32[i - 8])
-/// 		FI
-/// 	ELSE
-/// 		dst.fp16[i] := __W.fp16[i]
-/// 	FI
-/// ENDFOR
-///
-/// dst[MAX:256] := 0
-/// \endcode
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the \c VCVT2PS2PHX instruction.
-///
-/// \param __W
-///    A 256-bit vector of [16 x fp16].
-/// \param __U
-///    A 16-bit merging mask.
-/// \param __A
-///    A 256-bit vector of [8 x float].
-/// \param __B
-///    A 256-bit vector of [8 x float].
-/// \param __R
-///    Rounding mode. Valid inputs are: _MM_FROUND_CUR_DIRECTION or
-///    result of bitwise or of _MM_FROUND_NO_EXC with at most one of the following:
-///    _MM_FROUND_TO_NEAREST_INT, _MM_FROUND_TO_NEG_INF, _MM_FROUND_TO_POS_INF,
-///    _MM_FROUND_TO_ZERO.
-/// \returns
-///    A 256-bit vector of [16 x fp16]. Lower elements correspond to the
-///    (converted) elements from \a __B; higher order elements correspond to the
-///    (converted) elements from \a __A. If corresponding mask bit is not set, then
-///    element from \a __W is taken instead.
-#define _mm256_mask_cvtx_round2ps_ph(__W, __U, __A, __B, __R)                            \
-  ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask(                                \
-      (__v8sf)(__A), (__v8sf)(__B), (__v16hf)(__W), (__mmask16)(__U), (const int)(__R)))
-
-/// Convert two 256-bit vectors, \a __A and \a __B, containing packed
-///    single-precision (32-bit) floating-point elements to a 256-bit vector
-///    containing FP16 elements. Zeroing mask \a __U is used to determine if given
-///    element should be zeroed instead. Rounding mode \a __R needs to be provided.
-///
-/// \code{.operation}
-/// FOR i := 0 to 15 
-/// 	IF __U[i]
-/// 		IF i < 8
-/// 			dst.fp16[i] := convert_fp32_to_fp16(__B.fp32[i])
-/// 		ELSE
-/// 			dst.fp16[i] := convert_fp32_to_fp16(__A.fp32[i - 8])
-/// 		FI
-/// 	ELSE
-/// 		dst.fp16[i] := 0
-/// 	FI
-/// ENDFOR
-///
-/// dst[MAX:256] := 0
-/// \endcode
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the \c VCVT2PS2PHX instruction.
-///
-/// \param __U
-///    A 16-bit zeroing mask.
-/// \param __A
-///    A 256-bit vector of [8 x float].
-/// \param __B
-///    A 256-bit vector of [8 x float].
-/// \param __R
-///    Rounding mode. Valid inputs are: _MM_FROUND_CUR_DIRECTION or
-///    result of bitwise or of _MM_FROUND_NO_EXC with at most one of the following:
-///    _MM_FROUND_TO_NEAREST_INT, _MM_FROUND_TO_NEG_INF, _MM_FROUND_TO_POS_INF,
-///    _MM_FROUND_TO_ZERO.
-/// \returns
-///    A 256-bit vector of [16 x fp16]. Lower elements correspond to the
-///    (converted) elements from \a __B; higher order elements correspond to the
-///    (converted) elements from \a __A. If corresponding mask bit is not set,
-///    then zero is taken instead.
-#define _mm256_maskz_cvtx_round2ps_ph(__U, __A, __B, __R)                              \
-  ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask(                                \
-      (__v8sf)(__A), (__v8sf)(__B), (__v16hf)(_mm256_setzero_ph()),                \
-      (__mmask16)(__U), (const int)(__R)))
-
 /// Convert 128-bit vector \a __B containing packed FP16 floating-point elements
 ///    to FP8 E5M2 numbers, using conversion biases stored in lower 8 bits of each
 ///    16-bit integer stored in \a __B.
--- a/clang/lib/Sema/SemaX86.cpp
+++ b/clang/lib/Sema/SemaX86.cpp
@@ -314,7 +314,6 @@ bool SemaX86::CheckBuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
  case X86::BI__builtin_ia32_vfmulcph512_mask:
  case X86::BI__builtin_ia32_vfcmulcsh_mask:
  case X86::BI__builtin_ia32_vfcmulcph512_mask:
-  case X86::BI__builtin_ia32_vcvt2ps2phx256_mask:
  case X86::BI__builtin_ia32_vcvt2ps2phx512_mask:
    ArgNum = 4;
    HasRC = true;
--- a/clang/test/CodeGen/X86/avx10_2convert-builtins.c
+++ b/clang/test/CodeGen/X86/avx10_2convert-builtins.c
@@ -41,24 +41,6 @@ __m256h test_mm256_maskz_cvtx2ps_ph(__mmask16 __U, __m256 __A, __m256 __B) {
  return _mm256_maskz_cvtx2ps_ph(__U, __A, __B);
 }

-__m256h test_mm256_cvtx_round2ps_ph(__m256 __A, __m256 __B) {
-  // CHECK-LABEL: @test_mm256_cvtx_round2ps_ph(
-  // CHECK: call <16 x half> @llvm.x86.avx10.mask.vcvt2ps2phx.256(
-  return _mm256_cvtx_round2ps_ph(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
-}
-
-__m256h test_mm256_mask_cvtx_round2ps_ph(__m256h __W, __mmask8 __U, __m256 __A, __m256 __B) {
-  // CHECK-LABEL: @test_mm256_mask_cvtx_round2ps_ph(
-  // CHECK: call <16 x half> @llvm.x86.avx10.mask.vcvt2ps2phx.256(
-  return _mm256_mask_cvtx_round2ps_ph(__W, __U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
-}
-
-__m256h test_mm256_maskz_cvtx_round2ps_ph(__mmask8 __U, __m256 __A, __m256 __B) {
-  // CHECK-LABEL: @test_mm256_maskz_cvtx_round2ps_ph(
-  // CHECK: call <16 x half> @llvm.x86.avx10.mask.vcvt2ps2phx.256(
-  return _mm256_maskz_cvtx_round2ps_ph(__U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
-}
-
 __m128i test_mm_cvtbiasph_bf8(__m128i __A, __m128h __B) {
  // CHECK-LABEL: @test_mm_cvtbiasph_bf8(
  // CHECK: call <16 x i8> @llvm.x86.avx10.mask.vcvtbiasph2bf8128(
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -7026,8 +7026,8 @@ def int_x86_avx10_mask_vcvt2ps2phx_128 : ClangBuiltin<"__builtin_ia32_vcvt2ps2ph
        DefaultAttrsIntrinsic<[llvm_v8f16_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v8f16_ty, llvm_i8_ty],
                              [IntrNoMem]>;
 def int_x86_avx10_mask_vcvt2ps2phx_256 : ClangBuiltin<"__builtin_ia32_vcvt2ps2phx256_mask">,
-        DefaultAttrsIntrinsic<[llvm_v16f16_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v16f16_ty, llvm_i16_ty, llvm_i32_ty],
-                              [IntrNoMem, ImmArg<ArgIndex<4>>]>;
+        DefaultAttrsIntrinsic<[llvm_v16f16_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v16f16_ty, llvm_i16_ty],
+                              [IntrNoMem]>;
 def int_x86_avx10_mask_vcvt2ps2phx_512 : ClangBuiltin<"__builtin_ia32_vcvt2ps2phx512_mask">,
        DefaultAttrsIntrinsic<[llvm_v32f16_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v32f16_ty, llvm_i32_ty, llvm_i32_ty],
                              [IntrNoMem, ImmArg<ArgIndex<4>>]>;
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -771,12 +771,6 @@ multiclass avx10_cvt2ps2ph<bits<8> opc, string OpcodeStr,
                                 _SrcVTInfo.info128>,
                EVEX_V128, EVEX_CD8<32, CD8VF>;
  }
-
-  let Predicates = [HasAVX10_2], hasEVEX_U = 1 in {
-    defm Z256 : avx10_cvt2ps2ph_rc<opc, OpcodeStr, sched.YMM,
-                                   _SrcVTInfo.info256, _DstVTInfo.info256,
-                                   OpNodeRnd>;
-  }
 }

 defm VCVT2PS2PHX : avx10_cvt2ps2ph<0x67, "vcvt2ps2phx",
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -440,7 +440,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
    X86_INTRINSIC_DATA(avx10_mask_vcvt2ps2phx_128, INTR_TYPE_2OP_MASK,
                       X86ISD::VFPROUND2, 0),
    X86_INTRINSIC_DATA(avx10_mask_vcvt2ps2phx_256, INTR_TYPE_2OP_MASK,
-                       X86ISD::VFPROUND2, X86ISD::VFPROUND2_RND),
+                       X86ISD::VFPROUND2, 0),
    X86_INTRINSIC_DATA(avx10_mask_vcvt2ps2phx_512, INTR_TYPE_2OP_MASK,
                       X86ISD::VFPROUND2, X86ISD::VFPROUND2_RND),
    X86_INTRINSIC_DATA(avx10_mask_vcvtbiasph2bf8128, TRUNCATE2_TO_REG,
--- a/llvm/test/CodeGen/X86/avx10_2convert-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx10_2convert-intrinsics.ll
@@ -50,7 +50,7 @@ define <16 x half> @test_int_x86_avx10_vcvt2ps2phx256(<8 x float> %A, <8 x float
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vcvt2ps2phx %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x67,0xc1]
 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %ret = call <16 x half> @llvm.x86.avx10.mask.vcvt2ps2phx.256(<8 x float> %A, <8 x float> %B, <16 x half> zeroinitializer, i16 -1, i32 4)
+  %ret = call <16 x half> @llvm.x86.avx10.mask.vcvt2ps2phx.256(<8 x float> %A, <8 x float> %B, <16 x half> zeroinitializer, i16 -1)
  ret <16 x half> %ret
 }

@@ -66,7 +66,7 @@ define <16 x half> @test_int_x86_avx10_vcvt2ps2phx256_mask(<16 x half> %W, i16 %
 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
 ; X86-NEXT:    vcvt2ps2phx %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x67,0xc2]
 ; X86-NEXT:    retl # encoding: [0xc3]
-  %ret = call <16 x half> @llvm.x86.avx10.mask.vcvt2ps2phx.256(<8 x float> %A, <8 x float> %B, <16 x half> %W, i16 %U, i32 4)
+  %ret = call <16 x half> @llvm.x86.avx10.mask.vcvt2ps2phx.256(<8 x float> %A, <8 x float> %B, <16 x half> %W, i16 %U)
  ret <16 x half> %ret
 }

@@ -82,52 +82,11 @@ define <16 x half> @test_int_x86_avx10_vcvt2ps2phx256_maskz(<16 x half> %W, i16
 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
 ; X86-NEXT:    vcvt2ps2phx %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x67,0xc2]
 ; X86-NEXT:    retl # encoding: [0xc3]
-  %ret = call <16 x half> @llvm.x86.avx10.mask.vcvt2ps2phx.256(<8 x float> %A, <8 x float> %B, <16 x half> zeroinitializer, i16 %U, i32 4)
+  %ret = call <16 x half> @llvm.x86.avx10.mask.vcvt2ps2phx.256(<8 x float> %A, <8 x float> %B, <16 x half> zeroinitializer, i16 %U)
  ret <16 x half> %ret
 }

-define <16 x half> @test_int_x86_avx10_vcvt2ps2phx256_round(<8 x float> %A, <8 x float> %B) {
-; CHECK-LABEL: test_int_x86_avx10_vcvt2ps2phx256_round:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vcvt2ps2phx {rz-sae}, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x79,0x78,0x67,0xc1]
-; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %ret = call <16 x half> @llvm.x86.avx10.mask.vcvt2ps2phx.256(<8 x float> %A, <8 x float> %B, <16 x half> zeroinitializer, i16 -1, i32 11)
-  ret <16 x half> %ret
-}
-
-define <16 x half> @test_int_x86_avx10_vcvt2ps2phx256_round_mask(<16 x half> %W, i16 %U, <8 x float> %A, <8 x float> %B) {
-; X64-LABEL: test_int_x86_avx10_vcvt2ps2phx256_round_mask:
-; X64:       # %bb.0:
-; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
-; X64-NEXT:    vcvt2ps2phx {rz-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x71,0x79,0x67,0xc2]
-; X64-NEXT:    retq # encoding: [0xc3]
-;
-; X86-LABEL: test_int_x86_avx10_vcvt2ps2phx256_round_mask:
-; X86:       # %bb.0:
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
-; X86-NEXT:    vcvt2ps2phx {rz-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x71,0x79,0x67,0xc2]
-; X86-NEXT:    retl # encoding: [0xc3]
-  %ret = call <16 x half> @llvm.x86.avx10.mask.vcvt2ps2phx.256(<8 x float> %A, <8 x float> %B, <16 x half> %W, i16 %U, i32 11)
-  ret <16 x half> %ret
-}
-
-define <16 x half> @test_int_x86_avx10_vcvt2ps2phx256_round_maskz(i16 %U, <8 x float> %A, <8 x float> %B) {
-; X64-LABEL: test_int_x86_avx10_vcvt2ps2phx256_round_maskz:
-; X64:       # %bb.0:
-; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
-; X64-NEXT:    vcvt2ps2phx {rz-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x79,0xf9,0x67,0xc1]
-; X64-NEXT:    retq # encoding: [0xc3]
-;
-; X86-LABEL: test_int_x86_avx10_vcvt2ps2phx256_round_maskz:
-; X86:       # %bb.0:
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
-; X86-NEXT:    vcvt2ps2phx {rz-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x79,0xf9,0x67,0xc1]
-; X86-NEXT:    retl # encoding: [0xc3]
-  %ret = call <16 x half> @llvm.x86.avx10.mask.vcvt2ps2phx.256(<8 x float> %A, <8 x float> %B, <16 x half> zeroinitializer, i16 %U, i32 11)
-  ret <16 x half> %ret
-}
-
-declare <16 x half> @llvm.x86.avx10.mask.vcvt2ps2phx.256(<8 x float>, <8 x float>, <16 x half>, i16, i32)
+declare <16 x half> @llvm.x86.avx10.mask.vcvt2ps2phx.256(<8 x float>, <8 x float>, <16 x half>, i16)

 define <16 x i8> @test_int_x86_avx10_vcvtbiasph2bf8128(<16 x i8> %A, <8 x half> %B) nounwind {
 ; CHECK-LABEL: test_int_x86_avx10_vcvtbiasph2bf8128:
--- a/llvm/test/MC/Disassembler/X86/avx10.2convert-32.txt
+++ b/llvm/test/MC/Disassembler/X86/avx10.2convert-32.txt
@@ -5,18 +5,10 @@
 # INTEL: vcvt2ps2phx ymm2, ymm3, ymm4
 0x62,0xf2,0x65,0x28,0x67,0xd4

-# ATT:   vcvt2ps2phx {rn-sae}, %ymm4, %ymm3, %ymm2
-# INTEL: vcvt2ps2phx ymm2, ymm3, ymm4, {rn-sae}
-0x62,0xf2,0x61,0x18,0x67,0xd4
-
 # ATT:   vcvt2ps2phx %ymm4, %ymm3, %ymm2 {%k7}
 # INTEL: vcvt2ps2phx ymm2 {k7}, ymm3, ymm4
 0x62,0xf2,0x65,0x2f,0x67,0xd4

-# ATT:   vcvt2ps2phx {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
-# INTEL: vcvt2ps2phx ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
-0x62,0xf2,0x61,0xff,0x67,0xd4
-
 # ATT:   vcvt2ps2phx %zmm4, %zmm3, %zmm2
 # INTEL: vcvt2ps2phx zmm2, zmm3, zmm4
 0x62,0xf2,0x65,0x48,0x67,0xd4
--- a/llvm/test/MC/Disassembler/X86/avx10.2convert-64.txt
+++ b/llvm/test/MC/Disassembler/X86/avx10.2convert-64.txt
@@ -5,18 +5,10 @@
 # INTEL: vcvt2ps2phx ymm22, ymm23, ymm24
 0x62,0x82,0x45,0x20,0x67,0xf0

-# ATT:   vcvt2ps2phx {rn-sae}, %ymm24, %ymm23, %ymm22
-# INTEL: vcvt2ps2phx ymm22, ymm23, ymm24, {rn-sae}
-0x62,0x82,0x41,0x10,0x67,0xf0
-
 # ATT:   vcvt2ps2phx %ymm24, %ymm23, %ymm22 {%k7}
 # INTEL: vcvt2ps2phx ymm22 {k7}, ymm23, ymm24
 0x62,0x82,0x45,0x27,0x67,0xf0

-# ATT:   vcvt2ps2phx {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
-# INTEL: vcvt2ps2phx ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
-0x62,0x82,0x41,0xf7,0x67,0xf0
-
 # ATT:   vcvt2ps2phx %zmm24, %zmm23, %zmm22
 # INTEL: vcvt2ps2phx zmm22, zmm23, zmm24
 0x62,0x82,0x45,0x40,0x67,0xf0
--- a/llvm/test/MC/X86/avx10.2convert-32-att.s
+++ b/llvm/test/MC/X86/avx10.2convert-32-att.s
@@ -4,18 +4,10 @@
 // CHECK: encoding: [0x62,0xf2,0x65,0x28,0x67,0xd4]
          vcvt2ps2phx %ymm4, %ymm3, %ymm2

-// CHECK: vcvt2ps2phx {rn-sae}, %ymm4, %ymm3, %ymm2
-// CHECK: encoding: [0x62,0xf2,0x61,0x18,0x67,0xd4]
-          vcvt2ps2phx {rn-sae}, %ymm4, %ymm3, %ymm2
-
 // CHECK: vcvt2ps2phx %ymm4, %ymm3, %ymm2 {%k7}
 // CHECK: encoding: [0x62,0xf2,0x65,0x2f,0x67,0xd4]
          vcvt2ps2phx %ymm4, %ymm3, %ymm2 {%k7}

-// CHECK: vcvt2ps2phx {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
-// CHECK: encoding: [0x62,0xf2,0x61,0xff,0x67,0xd4]
-          vcvt2ps2phx {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
-
 // CHECK: vcvt2ps2phx %zmm4, %zmm3, %zmm2
 // CHECK: encoding: [0x62,0xf2,0x65,0x48,0x67,0xd4]
          vcvt2ps2phx %zmm4, %zmm3, %zmm2
--- a/llvm/test/MC/X86/avx10.2convert-32-intel.s
+++ b/llvm/test/MC/X86/avx10.2convert-32-intel.s
@@ -4,18 +4,10 @@
 // CHECK: encoding: [0x62,0xf2,0x65,0x28,0x67,0xd4]
          vcvt2ps2phx ymm2, ymm3, ymm4

-// CHECK: vcvt2ps2phx ymm2, ymm3, ymm4, {rn-sae}
-// CHECK: encoding: [0x62,0xf2,0x61,0x18,0x67,0xd4]
-          vcvt2ps2phx ymm2, ymm3, ymm4, {rn-sae}
-
 // CHECK: vcvt2ps2phx ymm2 {k7}, ymm3, ymm4
 // CHECK: encoding: [0x62,0xf2,0x65,0x2f,0x67,0xd4]
          vcvt2ps2phx ymm2 {k7}, ymm3, ymm4

-// CHECK: vcvt2ps2phx ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
-// CHECK: encoding: [0x62,0xf2,0x61,0xff,0x67,0xd4]
-          vcvt2ps2phx ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
-
 // CHECK: vcvt2ps2phx zmm2, zmm3, zmm4
 // CHECK: encoding: [0x62,0xf2,0x65,0x48,0x67,0xd4]
          vcvt2ps2phx zmm2, zmm3, zmm4
--- a/llvm/test/MC/X86/avx10.2convert-64-att.s
+++ b/llvm/test/MC/X86/avx10.2convert-64-att.s
@@ -4,18 +4,10 @@
 // CHECK: encoding: [0x62,0x82,0x45,0x20,0x67,0xf0]
          vcvt2ps2phx %ymm24, %ymm23, %ymm22

-// CHECK: vcvt2ps2phx {rn-sae}, %ymm24, %ymm23, %ymm22
-// CHECK: encoding: [0x62,0x82,0x41,0x10,0x67,0xf0]
-          vcvt2ps2phx {rn-sae}, %ymm24, %ymm23, %ymm22
-
 // CHECK: vcvt2ps2phx %ymm24, %ymm23, %ymm22 {%k7}
 // CHECK: encoding: [0x62,0x82,0x45,0x27,0x67,0xf0]
          vcvt2ps2phx %ymm24, %ymm23, %ymm22 {%k7}

-// CHECK: vcvt2ps2phx {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
-// CHECK: encoding: [0x62,0x82,0x41,0xf7,0x67,0xf0]
-          vcvt2ps2phx {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
-
 // CHECK: vcvt2ps2phx %zmm24, %zmm23, %zmm22
 // CHECK: encoding: [0x62,0x82,0x45,0x40,0x67,0xf0]
          vcvt2ps2phx %zmm24, %zmm23, %zmm22
--- a/llvm/test/MC/X86/avx10.2convert-64-intel.s
+++ b/llvm/test/MC/X86/avx10.2convert-64-intel.s
@@ -4,18 +4,10 @@
 // CHECK: encoding: [0x62,0x82,0x45,0x20,0x67,0xf0]
          vcvt2ps2phx ymm22, ymm23, ymm24

-// CHECK: vcvt2ps2phx ymm22, ymm23, ymm24, {rn-sae}
-// CHECK: encoding: [0x62,0x82,0x41,0x10,0x67,0xf0]
-          vcvt2ps2phx ymm22, ymm23, ymm24, {rn-sae}
-
 // CHECK: vcvt2ps2phx ymm22 {k7}, ymm23, ymm24
 // CHECK: encoding: [0x62,0x82,0x45,0x27,0x67,0xf0]
          vcvt2ps2phx ymm22 {k7}, ymm23, ymm24

-// CHECK: vcvt2ps2phx ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
-// CHECK: encoding: [0x62,0x82,0x41,0xf7,0x67,0xf0]
-          vcvt2ps2phx ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
-
 // CHECK: vcvt2ps2phx zmm22, zmm23, zmm24
 // CHECK: encoding: [0x62,0x82,0x45,0x40,0x67,0xf0]
          vcvt2ps2phx zmm22, zmm23, zmm24