From ab60910e01eaa8bcf993cdb59a95f882b3859fd1 Mon Sep 17 00:00:00 2001 From: "A. Jiang" Date: Tue, 13 May 2025 04:49:25 +0800 Subject: [PATCH] [libc++][format] Discard contents since null-terminator in character arrays in formatting (#116571) Currently, built-in `char`/`wchar_t` arrays are assumed to be null-terminated sequence with the terminator being the last element in formatting. This doesn't conform to [format.arg]/6.9. > otherwise, if `decay_t` is `char_type*` or `const char_type*`, > initializes value with `static_cast(v)`; The standard wording specifies that character arrays are decayed to pointers. When the null terminator is not the last element or there's no null terminator (the latter case is UB), libc++ currently produces different results. Also fixes and hardens `formatter::format` in `<__format/formatter_string.h>`. These specializations are rarely used. Fixes #115935. Also checks the preconditions in this case, which fixes #116570. --- libcxx/include/__format/format_arg_store.h | 28 ++++++++++----- libcxx/include/__format/formatter_string.h | 6 +++- .../format.arg/assert.array.pass.cpp | 33 ++++++++++++++++++ .../formatter.char_array.pass.cpp | 34 +++++++++++-------- .../format/format.functions/format_tests.h | 9 +++++ 5 files changed, 86 insertions(+), 24 deletions(-) create mode 100644 libcxx/test/libcxx/utilities/format/format.arguments/format.arg/assert.array.pass.cpp diff --git a/libcxx/include/__format/format_arg_store.h b/libcxx/include/__format/format_arg_store.h index dba2dfd6df08..87557aa4da7b 100644 --- a/libcxx/include/__format/format_arg_store.h +++ b/libcxx/include/__format/format_arg_store.h @@ -17,6 +17,7 @@ #include <__concepts/arithmetic.h> #include <__concepts/same_as.h> #include <__config> +#include <__cstddef/size_t.h> #include <__format/concepts.h> #include <__format/format_arg.h> #include <__type_traits/conditional.h> @@ -32,6 +33,12 @@ _LIBCPP_BEGIN_NAMESPACE_STD namespace __format { +template +inline constexpr bool __is_bounded_array_of = false; + +template +inline constexpr bool __is_bounded_array_of<_Elem[_Len], _Elem> = true; + /// \returns The @c __arg_t based on the type of the formatting argument. /// /// \pre \c __formattable<_Tp, typename _Context::char_type> @@ -110,7 +117,7 @@ consteval __arg_t __determine_arg_t() { // Char array template - requires(is_array_v<_Tp> && same_as<_Tp, typename _Context::char_type[extent_v<_Tp>]>) + requires __is_bounded_array_of<_Tp, typename _Context::char_type> consteval __arg_t __determine_arg_t() { return __arg_t::__string_view; } @@ -168,13 +175,14 @@ _LIBCPP_HIDE_FROM_ABI basic_format_arg<_Context> __create_format_arg(_Tp& __valu static_assert(__arg != __arg_t::__none, "the supplied type is not formattable"); static_assert(__formattable_with<_Tp, _Context>); + using __context_char_type = _Context::char_type; // Not all types can be used to directly initialize the // __basic_format_arg_value. First handle all types needing adjustment, the // final else requires no adjustment. if constexpr (__arg == __arg_t::__char_type) # if _LIBCPP_HAS_WIDE_CHARACTERS - if constexpr (same_as && same_as<_Dp, char>) + if constexpr (same_as<__context_char_type, wchar_t> && same_as<_Dp, char>) return basic_format_arg<_Context>{__arg, static_cast(static_cast(__value))}; else # endif @@ -189,14 +197,16 @@ _LIBCPP_HIDE_FROM_ABI basic_format_arg<_Context> __create_format_arg(_Tp& __valu return basic_format_arg<_Context>{__arg, static_cast(__value)}; else if constexpr (__arg == __arg_t::__string_view) // Using std::size on a character array will add the NUL-terminator to the size. - if constexpr (is_array_v<_Dp>) + if constexpr (__is_bounded_array_of<_Dp, __context_char_type>) { + const __context_char_type* const __pbegin = std::begin(__value); + const __context_char_type* const __pzero = + char_traits<__context_char_type>::find(__pbegin, extent_v<_Dp>, __context_char_type{}); + _LIBCPP_ASSERT_VALID_INPUT_RANGE(__pzero != nullptr, "formatting a non-null-terminated array"); return basic_format_arg<_Context>{ - __arg, basic_string_view{__value, extent_v<_Dp> - 1}}; - else - // When the _Traits or _Allocator are different an implicit conversion will - // fail. - return basic_format_arg<_Context>{ - __arg, basic_string_view{__value.data(), __value.size()}}; + __arg, basic_string_view<__context_char_type>{__pbegin, static_cast(__pzero - __pbegin)}}; + } else + // When the _Traits or _Allocator are different an implicit conversion will fail. + return basic_format_arg<_Context>{__arg, basic_string_view<__context_char_type>{__value.data(), __value.size()}}; else if constexpr (__arg == __arg_t::__ptr) return basic_format_arg<_Context>{__arg, static_cast(__value)}; else if constexpr (__arg == __arg_t::__handle) diff --git a/libcxx/include/__format/formatter_string.h b/libcxx/include/__format/formatter_string.h index d71d19a4970a..bad6a4d2bb89 100644 --- a/libcxx/include/__format/formatter_string.h +++ b/libcxx/include/__format/formatter_string.h @@ -10,6 +10,7 @@ #ifndef _LIBCPP___FORMAT_FORMATTER_STRING_H #define _LIBCPP___FORMAT_FORMATTER_STRING_H +#include <__assert> #include <__config> #include <__format/concepts.h> #include <__format/format_parse_context.h> @@ -17,6 +18,7 @@ #include <__format/formatter_output.h> #include <__format/parser_std_format_spec.h> #include <__format/write_escaped.h> +#include #include #include @@ -94,7 +96,9 @@ struct formatter<_CharT[_Size], _CharT> : public __formatter_string<_CharT> { template _LIBCPP_HIDE_FROM_ABI typename _FormatContext::iterator format(const _CharT (&__str)[_Size], _FormatContext& __ctx) const { - return _Base::format(basic_string_view<_CharT>(__str, _Size), __ctx); + const _CharT* const __pzero = char_traits<_CharT>::find(__str, _Size, _CharT{}); + _LIBCPP_ASSERT_VALID_INPUT_RANGE(__pzero != nullptr, "formatting a non-null-terminated array"); + return _Base::format(basic_string_view<_CharT>(__str, static_cast(__pzero - __str)), __ctx); } }; diff --git a/libcxx/test/libcxx/utilities/format/format.arguments/format.arg/assert.array.pass.cpp b/libcxx/test/libcxx/utilities/format/format.arguments/format.arg/assert.array.pass.cpp new file mode 100644 index 000000000000..1e9b1d93eb06 --- /dev/null +++ b/libcxx/test/libcxx/utilities/format/format.arguments/format.arg/assert.array.pass.cpp @@ -0,0 +1,33 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// Formatting non-null-terminated character arrays. + +// REQUIRES: std-at-least-c++20, has-unix-headers, libcpp-hardening-mode={{extensive|debug}} +// XFAIL: libcpp-hardening-mode=debug && availability-verbose_abort-missing + +#include + +#include "check_assertion.h" + +int main(int, char**) { + { + const char non_null_terminated[3]{'1', '2', '3'}; + TEST_LIBCPP_ASSERT_FAILURE(std::format("{}", non_null_terminated), "formatting a non-null-terminated array"); + } +#ifndef TEST_HAS_NO_WIDE_CHARACTERS + { + const wchar_t non_null_terminated[3]{L'1', L'2', L'3'}; + TEST_LIBCPP_ASSERT_FAILURE(std::format(L"{}", non_null_terminated), "formatting a non-null-terminated array"); + } +#endif + + return 0; +} diff --git a/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.char_array.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.char_array.pass.cpp index 1b3ff52d22d5..bc056db9e254 100644 --- a/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.char_array.pass.cpp +++ b/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.char_array.pass.cpp @@ -41,13 +41,10 @@ struct Tester { constexpr Tester(const char (&r)[N]) { __builtin_memcpy(text, r, N); } char text[N]; - // The size of the array shouldn't include the NUL character. - static const std::size_t size = N - 1; - template void test(const std::basic_string& expected, const std::basic_string_view& fmt, std::size_t offset) const { - using Str = CharT[size]; + using Str = CharT[N]; std::basic_format_parse_context parse_ctx{fmt}; std::formatter formatter; static_assert(std::semiregular); @@ -57,16 +54,25 @@ struct Tester { assert(std::to_address(it) == std::to_address(fmt.end()) - offset); std::basic_string result; - auto out = std::back_inserter(result); + auto out = std::back_inserter(result); using FormatCtxT = std::basic_format_context; - std::basic_string buffer{text, text + N}; - // Note not too found of this hack - Str* data = reinterpret_cast(const_cast(buffer.c_str())); - - FormatCtxT format_ctx = - test_format_context_create(out, std::make_format_args(*data)); - formatter.format(*data, format_ctx); + if constexpr (std::is_same_v) { + FormatCtxT format_ctx = + test_format_context_create(out, std::make_format_args(text)); + formatter.format(text, format_ctx); + } +#ifndef TEST_HAS_NO_WIDE_CHARACTERS + else { + Str buffer; + for (std::size_t i = 0; i != N; ++i) { + buffer[i] = static_cast(text[i]); + } + FormatCtxT format_ctx = + test_format_context_create(out, std::make_format_args(buffer)); + formatter.format(buffer, format_ctx); + } +#endif assert(result == expected); } @@ -118,8 +124,8 @@ template void test_array() { test_helper_wrapper<" azAZ09,./<>?">(STR(" azAZ09,./<>?"), STR("}")); - std::basic_string s(CSTR("abc\0abc"), 7); - test_helper_wrapper<"abc\0abc">(s, STR("}")); + // Contents after embedded null terminator are not formatted. + test_helper_wrapper<"abc\0abc">(STR("abc"), STR("}")); test_helper_wrapper<"world">(STR("world"), STR("}")); test_helper_wrapper<"world">(STR("world"), STR("_>}")); diff --git a/libcxx/test/std/utilities/format/format.functions/format_tests.h b/libcxx/test/std/utilities/format/format.functions/format_tests.h index 3969b341cb14..60abd4ac4e22 100644 --- a/libcxx/test/std/utilities/format/format.functions/format_tests.h +++ b/libcxx/test/std/utilities/format/format.functions/format_tests.h @@ -3189,6 +3189,15 @@ void format_tests(TestFunction check, ExceptionTest check_exception) { const CharT* data = buffer; check(SV("hello 09azAZ!"), SV("hello {}"), data); } + { + // https://github.com/llvm/llvm-project/issues/115935 + // Contents after the embedded null character are discarded. + CharT buffer[] = {CharT('a'), CharT('b'), CharT('c'), 0, CharT('d'), CharT('e'), CharT('f'), 0}; + check(SV("hello abc"), SV("hello {}"), buffer); + // Even when the last element of the array is not null character. + CharT buffer2[] = {CharT('a'), CharT('b'), CharT('c'), 0, CharT('d'), CharT('e'), CharT('f')}; + check(SV("hello abc"), SV("hello {}"), buffer2); + } { std::basic_string data = STR("world"); check(SV("hello world"), SV("hello {}"), data);