Files
clang-p2996/libcxx/include/format
Mark de Wever e3dea5e341 [libc++][format] Improves escaping performance. (#88533)
The previous patch implemented
- P2713R1 Escaping improvements in std::format
- LWG3965 Incorrect example in [format.string.escaped] p3 for formatting
of combining characters

These changes were correct, but had a size and performance penalty. This
patch improves the size and performance of the previous patch. The
performance is still worse than before since the lookups may require two
property lookups instead of one before implementing the paper. The
changes give a tighter coupling between the Unicode data and the
algorithm. Additional tests are added to notify about changes in future
Unicode updates.

Before
```
-----------------------------------------------------------------------
Benchmark                             Time             CPU   Iterations
-----------------------------------------------------------------------
BM_ascii_escaped<char>           110704 ns       110696 ns         6206
BM_unicode_escaped<char>         101371 ns       101374 ns         6862
BM_cyrillic_escaped<char>         63329 ns        63327 ns        11013
BM_japanese_escaped<char>         41223 ns        41225 ns        16938
BM_emoji_escaped<char>           111022 ns       111021 ns         6304
BM_ascii_escaped<wchar_t>        112441 ns       112443 ns         6231
BM_unicode_escaped<wchar_t>      102776 ns       102779 ns         6813
BM_cyrillic_escaped<wchar_t>      58977 ns        58975 ns        11868
BM_japanese_escaped<wchar_t>      36885 ns        36886 ns        18975
BM_emoji_escaped<wchar_t>        115885 ns       115881 ns         6051
```

The first change is to manually encode the entire last area and make a
manual exception for the 240 excluded entries. This reduced the table
from 1077 to 729 entries and gave the following benchmark results.
```
-----------------------------------------------------------------------
Benchmark                             Time             CPU   Iterations
-----------------------------------------------------------------------
BM_ascii_escaped<char>           104777 ns       104776 ns         6550
BM_unicode_escaped<char>          96980 ns        96982 ns         7238
BM_cyrillic_escaped<char>         60254 ns        60251 ns        11670
BM_japanese_escaped<char>         44452 ns        44452 ns        15734
BM_emoji_escaped<char>           104557 ns       104551 ns         6685
BM_ascii_escaped<wchar_t>        107456 ns       107454 ns         6505
BM_unicode_escaped<wchar_t>       96219 ns        96216 ns         7301
BM_cyrillic_escaped<wchar_t>      56921 ns        56904 ns        12288
BM_japanese_escaped<wchar_t>      39530 ns        39529 ns        17492
BM_emoji_escaped<wchar_t>        108494 ns       108496 ns         6408
```

An entry in the table can only contain 2048 code points. For larger
ranges there are multiple entries split in chunks with a maximum size of
2048 entries. To encode the entire Unicode code point range 21 bits are
required. The manual part starts at 0x323B0 this means all entries in
the table fit in 18 bits. This allows to allocate 3 additional bits for
the range. This allows entries to have 16384 elements. This range always
avoids splitting the range in multiple chunks.

This reduces the number of table elements from 729 to 711 and gives the
following benchmark results.
```
-----------------------------------------------------------------------
Benchmark                             Time             CPU   Iterations
-----------------------------------------------------------------------
BM_ascii_escaped<char>           104289 ns       104289 ns         6619
BM_unicode_escaped<char>          96682 ns        96681 ns         7215
BM_cyrillic_escaped<char>         59673 ns        59673 ns        11732
BM_japanese_escaped<char>         41983 ns        41982 ns        16646
BM_emoji_escaped<char>           104119 ns       104120 ns         6683
BM_ascii_escaped<wchar_t>        104503 ns       104505 ns         6693
BM_unicode_escaped<wchar_t>       93426 ns        93423 ns         7489
BM_cyrillic_escaped<wchar_t>      54858 ns        54859 ns        12742
BM_japanese_escaped<wchar_t>      36385 ns        36384 ns        19259
BM_emoji_escaped<wchar_t>        105608 ns       105610 ns         6592
```
2024-04-28 12:15:25 +02:00

233 lines
9.2 KiB
C++

// -*- C++ -*-
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef _LIBCPP_FORMAT
#define _LIBCPP_FORMAT
/*
namespace std {
// [format.context], class template basic_format_context
template<class Out, class charT> class basic_format_context;
using format_context = basic_format_context<unspecified, char>;
using wformat_context = basic_format_context<unspecified, wchar_t>;
// [format.args], class template basic_format_args
template<class Context> class basic_format_args;
using format_args = basic_format_args<format_context>;
using wformat_args = basic_format_args<wformat_context>;
// [format.fmt.string], class template basic_format_string
template<class charT, class... Args>
struct basic_format_string { // since C++23, exposition only before C++23
private:
basic_string_view<charT> str; // exposition only
public:
template<class T> consteval basic_format_string(const T& s);
basic_format_string(runtime-format-string<charT> s) noexcept : str(s.str) {} // since C++26
constexpr basic_string_view<charT> get() const noexcept { return str; }
};
template<class... Args>
using format_string = // since C++23, exposition only before C++23
basic_format_string<char, type_identity_t<Args>...>;
template<class... Args>
using wformat_string = // since C++23, exposition only before C++23
basic_format_string<wchar_t, type_identity_t<Args>...>;
template<class charT> struct runtime-format-string { // since C++26, exposition-only
private:
basic_string_view<charT> str; // exposition-only
public:
runtime-format-string(basic_string_view<charT> s) noexcept : str(s) {}
runtime-format-string(const runtime-format-string&) = delete;
runtime-format-string& operator=(const runtime-format-string&) = delete;
};
runtime-format-string<char> runtime_format(string_view fmt) noexcept {
return fmt;
}
runtime-format-string<wchar_t> runtime_format(wstring_view fmt) noexcept {
return fmt;
}
// [format.functions], formatting functions
template<class... Args>
string format(format-string<Args...> fmt, Args&&... args);
template<class... Args>
wstring format(wformat-string<Args...> fmt, Args&&... args);
template<class... Args>
string format(const locale& loc, format-string<Args...> fmt, Args&&... args);
template<class... Args>
wstring format(const locale& loc, wformat-string<Args...> fmt, Args&&... args);
string vformat(string_view fmt, format_args args);
wstring vformat(wstring_view fmt, wformat_args args);
string vformat(const locale& loc, string_view fmt, format_args args);
wstring vformat(const locale& loc, wstring_view fmt, wformat_args args);
template<class Out, class... Args>
Out format_to(Out out, format-string<Args...> fmt, Args&&... args);
template<class Out, class... Args>
Out format_to(Out out, wformat-string<Args...> fmt, Args&&... args);
template<class Out, class... Args>
Out format_to(Out out, const locale& loc, format-string<Args...> fmt, Args&&... args);
template<class Out, class... Args>
Out format_to(Out out, const locale& loc, wformat-string<Args...> fmt, Args&&... args);
template<class Out>
Out vformat_to(Out out, string_view fmt, format_args args);
template<class Out>
Out vformat_to(Out out, wstring_view fmt, wformat_args args);
template<class Out>
Out vformat_to(Out out, const locale& loc, string_view fmt,
format_args char> args);
template<class Out>
Out vformat_to(Out out, const locale& loc, wstring_view fmt,
wformat_args args);
template<class Out> struct format_to_n_result {
Out out;
iter_difference_t<Out> size;
};
template<class Out, class... Args>
format_to_n_result<Out> format_to_n(Out out, iter_difference_t<Out> n,
format-string<Args...> fmt, Args&&... args);
template<class Out, class... Args>
format_to_n_result<Out> format_to_n(Out out, iter_difference_t<Out> n,
wformat-string<Args...> fmt, Args&&... args);
template<class Out, class... Args>
format_to_n_result<Out> format_to_n(Out out, iter_difference_t<Out> n,
const locale& loc, format-string<Args...> fmt,
Args&&... args);
template<class Out, class... Args>
format_to_n_result<Out> format_to_n(Out out, iter_difference_t<Out> n,
const locale& loc, wformat-string<Args...> fmt,
Args&&... args);
template<class... Args>
size_t formatted_size(format-string<Args...> fmt, Args&&... args);
template<class... Args>
size_t formatted_size(wformat-string<Args...> fmt, Args&&... args);
template<class... Args>
size_t formatted_size(const locale& loc, format-string<Args...> fmt, Args&&... args);
template<class... Args>
size_t formatted_size(const locale& loc, wformat-string<Args...> fmt, Args&&... args);
// [format.formatter], formatter
template<class T, class charT = char> struct formatter;
// [format.parse.ctx], class template basic_format_parse_context
template<class charT> class basic_format_parse_context;
using format_parse_context = basic_format_parse_context<char>;
using wformat_parse_context = basic_format_parse_context<wchar_t>;
// [format.range], formatting of ranges
// [format.range.fmtkind], variable template format_kind
enum class range_format { // since C++23
disabled,
map,
set,
sequence,
string,
debug_string
};
template<class R>
constexpr unspecified format_kind = unspecified; // since C++23
template<ranges::input_range R>
requires same_as<R, remove_cvref_t<R>>
constexpr range_format format_kind<R> = see below; // since C++23
// [format.range.formatter], class template range_formatter
template<class T, class charT = char>
requires same_as<remove_cvref_t<T>, T> && formattable<T, charT>
class range_formatter; // since C++23
// [format.range.fmtdef], class template range-default-formatter
template<range_format K, ranges::input_range R, class charT>
struct range-default-formatter; // exposition only, since C++23
// [format.range.fmtmap], [format.range.fmtset], [format.range.fmtstr],
// specializations for maps, sets, and strings
template<ranges::input_range R, class charT>
requires (format_kind<R> != range_format::disabled) &&
formattable<ranges::range_reference_t<R>, charT>
struct formatter<R, charT> : range-default-formatter<format_kind<R>, R, charT> { }; // since C++23
// [format.arguments], arguments
// [format.arg], class template basic_format_arg
template<class Context> class basic_format_arg;
template<class Visitor, class Context>
see below visit_format_arg(Visitor&& vis, basic_format_arg<Context> arg); // Deprecated in C++26
// [format.arg.store], class template format-arg-store
template<class Context, class... Args> struct format-arg-store; // exposition only
template<class Context = format_context, class... Args>
format-arg-store<Context, Args...>
make_format_args(Args&... args);
template<class... Args>
format-arg-store<wformat_context, Args...>
make_wformat_args(Args&... args);
// [format.error], class format_error
class format_error;
}
*/
#include <__config>
#include <__format/buffer.h>
#include <__format/concepts.h>
#include <__format/container_adaptor.h>
#include <__format/enable_insertable.h>
#include <__format/escaped_output_table.h>
#include <__format/extended_grapheme_cluster_table.h>
#include <__format/format_arg.h>
#include <__format/format_arg_store.h>
#include <__format/format_args.h>
#include <__format/format_context.h>
#include <__format/format_error.h>
#include <__format/format_functions.h>
#include <__format/format_parse_context.h>
#include <__format/format_string.h>
#include <__format/format_to_n_result.h>
#include <__format/formatter.h>
#include <__format/formatter_bool.h>
#include <__format/formatter_char.h>
#include <__format/formatter_floating_point.h>
#include <__format/formatter_integer.h>
#include <__format/formatter_pointer.h>
#include <__format/formatter_string.h>
#include <__format/formatter_tuple.h>
#include <__format/parser_std_format_spec.h>
#include <__format/range_default_formatter.h>
#include <__format/range_formatter.h>
#include <__format/unicode.h>
#include <__fwd/format.h>
#include <version>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
#endif
#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
# include <locale>
# include <queue>
# include <stack>
#endif
#endif // _LIBCPP_FORMAT