Files
clang-p2996/libcxx/include/__format/write_escaped.h
Louis Dionne 7b4622514d [libc++] Fix missing and incorrect push/pop macros (#79204)
We recently noticed that the unwrap_iter.h file was pushing macros, but
it was pushing them again instead of popping them at the end of the
file. This led to libc++ basically swallowing any custom definition of
these macros in user code:

    #define min HELLO
    #include <algorithm>
    // min is not HELLO anymore, it's not defined

While investigating this issue, I noticed that our push/pop pragmas were
actually entirely wrong too. Indeed, instead of pushing macros like
`move`, we'd push `move(int, int)` in the pragma, which is not a valid
macro name. As a result, we would not actually push macros like `move`
-- instead we'd simply undefine them. This led to the following code not
working:

    #define move HELLO
    #include <algorithm>
    // move is not HELLO anymore

Fixing the pragma push/pop incantations led to a cascade of issues
because we use identifiers like `move` in a large number of places, and
all of these headers would now need to do the push/pop dance.

This patch fixes all these issues. First, it adds a check that we don't
swallow important names like min, max, move or refresh as explained
above. This is done by augmenting the existing
system_reserved_names.gen.py test to also check that the macros are what
we expect after including each header.

Second, it fixes the push/pop pragmas to work properly and adds missing
pragmas to all the files I could detect a failure in via the newly added
test.

rdar://121365472
2024-01-25 15:48:46 -05:00

227 lines
8.0 KiB
C++

// -*- C++ -*-
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef _LIBCPP___FORMAT_WRITE_ESCAPED_H
#define _LIBCPP___FORMAT_WRITE_ESCAPED_H
#include <__algorithm/ranges_copy.h>
#include <__algorithm/ranges_for_each.h>
#include <__charconv/to_chars_integral.h>
#include <__charconv/to_chars_result.h>
#include <__chrono/statically_widen.h>
#include <__format/escaped_output_table.h>
#include <__format/formatter_output.h>
#include <__format/parser_std_format_spec.h>
#include <__format/unicode.h>
#include <__iterator/back_insert_iterator.h>
#include <__memory/addressof.h>
#include <__system_error/errc.h>
#include <__type_traits/make_unsigned.h>
#include <__utility/move.h>
#include <string_view>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
#endif
_LIBCPP_PUSH_MACROS
#include <__undef_macros>
_LIBCPP_BEGIN_NAMESPACE_STD
namespace __formatter {
#if _LIBCPP_STD_VER >= 20
/// Writes a string using format's width estimation algorithm.
///
/// \note When \c _LIBCPP_HAS_NO_UNICODE is defined the function assumes the
/// input is ASCII.
template <class _CharT>
_LIBCPP_HIDE_FROM_ABI auto
__write_string(basic_string_view<_CharT> __str,
output_iterator<const _CharT&> auto __out_it,
__format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) {
if (!__specs.__has_precision())
return __formatter::__write_string_no_precision(__str, std::move(__out_it), __specs);
int __size = __formatter::__truncate(__str, __specs.__precision_);
return __formatter::__write(__str.begin(), __str.end(), std::move(__out_it), __specs, __size);
}
#endif // _LIBCPP_STD_VER >= 20
#if _LIBCPP_STD_VER >= 23
struct __nul_terminator {};
template <class _CharT>
_LIBCPP_HIDE_FROM_ABI bool operator==(const _CharT* __cstr, __nul_terminator) {
return *__cstr == _CharT('\0');
}
template <class _CharT>
_LIBCPP_HIDE_FROM_ABI void
__write_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value, const _CharT* __prefix) {
back_insert_iterator __out_it{__str};
std::ranges::copy(__prefix, __nul_terminator{}, __out_it);
char __buffer[8];
to_chars_result __r = std::to_chars(std::begin(__buffer), std::end(__buffer), __value, 16);
_LIBCPP_ASSERT_INTERNAL(__r.ec == errc(0), "Internal buffer too small");
std::ranges::copy(std::begin(__buffer), __r.ptr, __out_it);
__str += _CharT('}');
}
// [format.string.escaped]/2.2.1.2
// ...
// then the sequence \u{hex-digit-sequence} is appended to E, where
// hex-digit-sequence is the shortest hexadecimal representation of C using
// lower-case hexadecimal digits.
template <class _CharT>
_LIBCPP_HIDE_FROM_ABI void __write_well_formed_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value) {
__formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\u{"));
}
// [format.string.escaped]/2.2.3
// Otherwise (X is a sequence of ill-formed code units), each code unit U is
// appended to E in order as the sequence \x{hex-digit-sequence}, where
// hex-digit-sequence is the shortest hexadecimal representation of U using
// lower-case hexadecimal digits.
template <class _CharT>
_LIBCPP_HIDE_FROM_ABI void __write_escape_ill_formed_code_unit(basic_string<_CharT>& __str, char32_t __value) {
__formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\x{"));
}
template <class _CharT>
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool __is_escaped_sequence_written(basic_string<_CharT>& __str, char32_t __value) {
# ifdef _LIBCPP_HAS_NO_UNICODE
// For ASCII assume everything above 127 is printable.
if (__value > 127)
return false;
# endif
if (!__escaped_output_table::__needs_escape(__value))
return false;
__formatter::__write_well_formed_escaped_code_unit(__str, __value);
return true;
}
template <class _CharT>
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr char32_t __to_char32(_CharT __value) {
return static_cast<make_unsigned_t<_CharT>>(__value);
}
enum class __escape_quotation_mark { __apostrophe, __double_quote };
// [format.string.escaped]/2
template <class _CharT>
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool
__is_escaped_sequence_written(basic_string<_CharT>& __str, char32_t __value, __escape_quotation_mark __mark) {
// 2.2.1.1 - Mapped character in [tab:format.escape.sequences]
switch (__value) {
case _CharT('\t'):
__str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\t");
return true;
case _CharT('\n'):
__str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\n");
return true;
case _CharT('\r'):
__str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\r");
return true;
case _CharT('\''):
if (__mark == __escape_quotation_mark::__apostrophe)
__str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\')");
else
__str += __value;
return true;
case _CharT('"'):
if (__mark == __escape_quotation_mark::__double_quote)
__str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\")");
else
__str += __value;
return true;
case _CharT('\\'):
__str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\\)");
return true;
// 2.2.1.2 - Space
case _CharT(' '):
__str += __value;
return true;
}
// 2.2.2
// Otherwise, if X is a shift sequence, the effect on E and further
// decoding of S is unspecified.
// For now shift sequences are ignored and treated as Unicode. Other parts
// of the format library do the same. It's unknown how ostream treats them.
// TODO FMT determine what to do with shift sequences.
// 2.2.1.2.1 and 2.2.1.2.2 - Escape
return __formatter::__is_escaped_sequence_written(__str, __formatter::__to_char32(__value));
}
template <class _CharT>
_LIBCPP_HIDE_FROM_ABI void
__escape(basic_string<_CharT>& __str, basic_string_view<_CharT> __values, __escape_quotation_mark __mark) {
__unicode::__code_point_view<_CharT> __view{__values.begin(), __values.end()};
while (!__view.__at_end()) {
auto __first = __view.__position();
typename __unicode::__consume_result __result = __view.__consume();
if (__result.__status == __unicode::__consume_result::__ok) {
if (!__formatter::__is_escaped_sequence_written(__str, __result.__code_point, __mark))
// 2.2.1.3 - Add the character
ranges::copy(__first, __view.__position(), std::back_insert_iterator(__str));
} else {
// 2.2.3 sequence of ill-formed code units
ranges::for_each(__first, __view.__position(), [&](_CharT __value) {
__formatter::__write_escape_ill_formed_code_unit(__str, __formatter::__to_char32(__value));
});
}
}
}
template <class _CharT>
_LIBCPP_HIDE_FROM_ABI auto
__format_escaped_char(_CharT __value,
output_iterator<const _CharT&> auto __out_it,
__format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) {
basic_string<_CharT> __str;
__str += _CharT('\'');
__formatter::__escape(__str, basic_string_view{std::addressof(__value), 1}, __escape_quotation_mark::__apostrophe);
__str += _CharT('\'');
return __formatter::__write(__str.data(), __str.data() + __str.size(), std::move(__out_it), __specs, __str.size());
}
template <class _CharT>
_LIBCPP_HIDE_FROM_ABI auto
__format_escaped_string(basic_string_view<_CharT> __values,
output_iterator<const _CharT&> auto __out_it,
__format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) {
basic_string<_CharT> __str;
__str += _CharT('"');
__formatter::__escape(__str, __values, __escape_quotation_mark::__double_quote);
__str += _CharT('"');
return __formatter::__write_string(basic_string_view{__str}, std::move(__out_it), __specs);
}
#endif // _LIBCPP_STD_VER >= 23
} // namespace __formatter
_LIBCPP_END_NAMESPACE_STD
_LIBCPP_POP_MACROS
#endif // _LIBCPP___FORMAT_WRITE_ESCAPED_H