The previous patch implemented - P2713R1 Escaping improvements in std::format - LWG3965 Incorrect example in [format.string.escaped] p3 for formatting of combining characters These changes were correct, but had a size and performance penalty. This patch improves the size and performance of the previous patch. The performance is still worse than before since the lookups may require two property lookups instead of one before implementing the paper. The changes give a tighter coupling between the Unicode data and the algorithm. Additional tests are added to notify about changes in future Unicode updates. Before ``` ----------------------------------------------------------------------- Benchmark Time CPU Iterations ----------------------------------------------------------------------- BM_ascii_escaped<char> 110704 ns 110696 ns 6206 BM_unicode_escaped<char> 101371 ns 101374 ns 6862 BM_cyrillic_escaped<char> 63329 ns 63327 ns 11013 BM_japanese_escaped<char> 41223 ns 41225 ns 16938 BM_emoji_escaped<char> 111022 ns 111021 ns 6304 BM_ascii_escaped<wchar_t> 112441 ns 112443 ns 6231 BM_unicode_escaped<wchar_t> 102776 ns 102779 ns 6813 BM_cyrillic_escaped<wchar_t> 58977 ns 58975 ns 11868 BM_japanese_escaped<wchar_t> 36885 ns 36886 ns 18975 BM_emoji_escaped<wchar_t> 115885 ns 115881 ns 6051 ``` The first change is to manually encode the entire last area and make a manual exception for the 240 excluded entries. This reduced the table from 1077 to 729 entries and gave the following benchmark results. ``` ----------------------------------------------------------------------- Benchmark Time CPU Iterations ----------------------------------------------------------------------- BM_ascii_escaped<char> 104777 ns 104776 ns 6550 BM_unicode_escaped<char> 96980 ns 96982 ns 7238 BM_cyrillic_escaped<char> 60254 ns 60251 ns 11670 BM_japanese_escaped<char> 44452 ns 44452 ns 15734 BM_emoji_escaped<char> 104557 ns 104551 ns 6685 BM_ascii_escaped<wchar_t> 107456 ns 107454 ns 6505 BM_unicode_escaped<wchar_t> 96219 ns 96216 ns 7301 BM_cyrillic_escaped<wchar_t> 56921 ns 56904 ns 12288 BM_japanese_escaped<wchar_t> 39530 ns 39529 ns 17492 BM_emoji_escaped<wchar_t> 108494 ns 108496 ns 6408 ``` An entry in the table can only contain 2048 code points. For larger ranges there are multiple entries split in chunks with a maximum size of 2048 entries. To encode the entire Unicode code point range 21 bits are required. The manual part starts at 0x323B0 this means all entries in the table fit in 18 bits. This allows to allocate 3 additional bits for the range. This allows entries to have 16384 elements. This range always avoids splitting the range in multiple chunks. This reduces the number of table elements from 729 to 711 and gives the following benchmark results. ``` ----------------------------------------------------------------------- Benchmark Time CPU Iterations ----------------------------------------------------------------------- BM_ascii_escaped<char> 104289 ns 104289 ns 6619 BM_unicode_escaped<char> 96682 ns 96681 ns 7215 BM_cyrillic_escaped<char> 59673 ns 59673 ns 11732 BM_japanese_escaped<char> 41983 ns 41982 ns 16646 BM_emoji_escaped<char> 104119 ns 104120 ns 6683 BM_ascii_escaped<wchar_t> 104503 ns 104505 ns 6693 BM_unicode_escaped<wchar_t> 93426 ns 93423 ns 7489 BM_cyrillic_escaped<wchar_t> 54858 ns 54859 ns 12742 BM_japanese_escaped<wchar_t> 36385 ns 36384 ns 19259 BM_emoji_escaped<wchar_t> 105608 ns 105610 ns 6592 ```
233 lines
9.2 KiB
C++
233 lines
9.2 KiB
C++
// -*- C++ -*-
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef _LIBCPP_FORMAT
|
|
#define _LIBCPP_FORMAT
|
|
|
|
/*
|
|
|
|
namespace std {
|
|
// [format.context], class template basic_format_context
|
|
template<class Out, class charT> class basic_format_context;
|
|
using format_context = basic_format_context<unspecified, char>;
|
|
using wformat_context = basic_format_context<unspecified, wchar_t>;
|
|
|
|
// [format.args], class template basic_format_args
|
|
template<class Context> class basic_format_args;
|
|
using format_args = basic_format_args<format_context>;
|
|
using wformat_args = basic_format_args<wformat_context>;
|
|
|
|
// [format.fmt.string], class template basic_format_string
|
|
template<class charT, class... Args>
|
|
struct basic_format_string { // since C++23, exposition only before C++23
|
|
private:
|
|
basic_string_view<charT> str; // exposition only
|
|
|
|
public:
|
|
template<class T> consteval basic_format_string(const T& s);
|
|
basic_format_string(runtime-format-string<charT> s) noexcept : str(s.str) {} // since C++26
|
|
|
|
constexpr basic_string_view<charT> get() const noexcept { return str; }
|
|
};
|
|
template<class... Args>
|
|
using format_string = // since C++23, exposition only before C++23
|
|
basic_format_string<char, type_identity_t<Args>...>;
|
|
template<class... Args>
|
|
using wformat_string = // since C++23, exposition only before C++23
|
|
basic_format_string<wchar_t, type_identity_t<Args>...>;
|
|
|
|
template<class charT> struct runtime-format-string { // since C++26, exposition-only
|
|
private:
|
|
basic_string_view<charT> str; // exposition-only
|
|
|
|
public:
|
|
runtime-format-string(basic_string_view<charT> s) noexcept : str(s) {}
|
|
|
|
runtime-format-string(const runtime-format-string&) = delete;
|
|
runtime-format-string& operator=(const runtime-format-string&) = delete;
|
|
};
|
|
|
|
runtime-format-string<char> runtime_format(string_view fmt) noexcept {
|
|
return fmt;
|
|
}
|
|
runtime-format-string<wchar_t> runtime_format(wstring_view fmt) noexcept {
|
|
return fmt;
|
|
}
|
|
|
|
// [format.functions], formatting functions
|
|
template<class... Args>
|
|
string format(format-string<Args...> fmt, Args&&... args);
|
|
template<class... Args>
|
|
wstring format(wformat-string<Args...> fmt, Args&&... args);
|
|
template<class... Args>
|
|
string format(const locale& loc, format-string<Args...> fmt, Args&&... args);
|
|
template<class... Args>
|
|
wstring format(const locale& loc, wformat-string<Args...> fmt, Args&&... args);
|
|
|
|
string vformat(string_view fmt, format_args args);
|
|
wstring vformat(wstring_view fmt, wformat_args args);
|
|
string vformat(const locale& loc, string_view fmt, format_args args);
|
|
wstring vformat(const locale& loc, wstring_view fmt, wformat_args args);
|
|
|
|
template<class Out, class... Args>
|
|
Out format_to(Out out, format-string<Args...> fmt, Args&&... args);
|
|
template<class Out, class... Args>
|
|
Out format_to(Out out, wformat-string<Args...> fmt, Args&&... args);
|
|
template<class Out, class... Args>
|
|
Out format_to(Out out, const locale& loc, format-string<Args...> fmt, Args&&... args);
|
|
template<class Out, class... Args>
|
|
Out format_to(Out out, const locale& loc, wformat-string<Args...> fmt, Args&&... args);
|
|
|
|
template<class Out>
|
|
Out vformat_to(Out out, string_view fmt, format_args args);
|
|
template<class Out>
|
|
Out vformat_to(Out out, wstring_view fmt, wformat_args args);
|
|
template<class Out>
|
|
Out vformat_to(Out out, const locale& loc, string_view fmt,
|
|
format_args char> args);
|
|
template<class Out>
|
|
Out vformat_to(Out out, const locale& loc, wstring_view fmt,
|
|
wformat_args args);
|
|
|
|
template<class Out> struct format_to_n_result {
|
|
Out out;
|
|
iter_difference_t<Out> size;
|
|
};
|
|
template<class Out, class... Args>
|
|
format_to_n_result<Out> format_to_n(Out out, iter_difference_t<Out> n,
|
|
format-string<Args...> fmt, Args&&... args);
|
|
template<class Out, class... Args>
|
|
format_to_n_result<Out> format_to_n(Out out, iter_difference_t<Out> n,
|
|
wformat-string<Args...> fmt, Args&&... args);
|
|
template<class Out, class... Args>
|
|
format_to_n_result<Out> format_to_n(Out out, iter_difference_t<Out> n,
|
|
const locale& loc, format-string<Args...> fmt,
|
|
Args&&... args);
|
|
template<class Out, class... Args>
|
|
format_to_n_result<Out> format_to_n(Out out, iter_difference_t<Out> n,
|
|
const locale& loc, wformat-string<Args...> fmt,
|
|
Args&&... args);
|
|
|
|
template<class... Args>
|
|
size_t formatted_size(format-string<Args...> fmt, Args&&... args);
|
|
template<class... Args>
|
|
size_t formatted_size(wformat-string<Args...> fmt, Args&&... args);
|
|
template<class... Args>
|
|
size_t formatted_size(const locale& loc, format-string<Args...> fmt, Args&&... args);
|
|
template<class... Args>
|
|
size_t formatted_size(const locale& loc, wformat-string<Args...> fmt, Args&&... args);
|
|
|
|
// [format.formatter], formatter
|
|
template<class T, class charT = char> struct formatter;
|
|
|
|
// [format.parse.ctx], class template basic_format_parse_context
|
|
template<class charT> class basic_format_parse_context;
|
|
using format_parse_context = basic_format_parse_context<char>;
|
|
using wformat_parse_context = basic_format_parse_context<wchar_t>;
|
|
|
|
// [format.range], formatting of ranges
|
|
// [format.range.fmtkind], variable template format_kind
|
|
enum class range_format { // since C++23
|
|
disabled,
|
|
map,
|
|
set,
|
|
sequence,
|
|
string,
|
|
debug_string
|
|
};
|
|
|
|
template<class R>
|
|
constexpr unspecified format_kind = unspecified; // since C++23
|
|
|
|
template<ranges::input_range R>
|
|
requires same_as<R, remove_cvref_t<R>>
|
|
constexpr range_format format_kind<R> = see below; // since C++23
|
|
|
|
// [format.range.formatter], class template range_formatter
|
|
template<class T, class charT = char>
|
|
requires same_as<remove_cvref_t<T>, T> && formattable<T, charT>
|
|
class range_formatter; // since C++23
|
|
|
|
// [format.range.fmtdef], class template range-default-formatter
|
|
template<range_format K, ranges::input_range R, class charT>
|
|
struct range-default-formatter; // exposition only, since C++23
|
|
|
|
// [format.range.fmtmap], [format.range.fmtset], [format.range.fmtstr],
|
|
// specializations for maps, sets, and strings
|
|
template<ranges::input_range R, class charT>
|
|
requires (format_kind<R> != range_format::disabled) &&
|
|
formattable<ranges::range_reference_t<R>, charT>
|
|
struct formatter<R, charT> : range-default-formatter<format_kind<R>, R, charT> { }; // since C++23
|
|
|
|
// [format.arguments], arguments
|
|
// [format.arg], class template basic_format_arg
|
|
template<class Context> class basic_format_arg;
|
|
|
|
template<class Visitor, class Context>
|
|
see below visit_format_arg(Visitor&& vis, basic_format_arg<Context> arg); // Deprecated in C++26
|
|
|
|
// [format.arg.store], class template format-arg-store
|
|
template<class Context, class... Args> struct format-arg-store; // exposition only
|
|
|
|
template<class Context = format_context, class... Args>
|
|
format-arg-store<Context, Args...>
|
|
make_format_args(Args&... args);
|
|
template<class... Args>
|
|
format-arg-store<wformat_context, Args...>
|
|
make_wformat_args(Args&... args);
|
|
|
|
// [format.error], class format_error
|
|
class format_error;
|
|
}
|
|
|
|
*/
|
|
|
|
#include <__config>
|
|
#include <__format/buffer.h>
|
|
#include <__format/concepts.h>
|
|
#include <__format/container_adaptor.h>
|
|
#include <__format/enable_insertable.h>
|
|
#include <__format/escaped_output_table.h>
|
|
#include <__format/extended_grapheme_cluster_table.h>
|
|
#include <__format/format_arg.h>
|
|
#include <__format/format_arg_store.h>
|
|
#include <__format/format_args.h>
|
|
#include <__format/format_context.h>
|
|
#include <__format/format_error.h>
|
|
#include <__format/format_functions.h>
|
|
#include <__format/format_parse_context.h>
|
|
#include <__format/format_string.h>
|
|
#include <__format/format_to_n_result.h>
|
|
#include <__format/formatter.h>
|
|
#include <__format/formatter_bool.h>
|
|
#include <__format/formatter_char.h>
|
|
#include <__format/formatter_floating_point.h>
|
|
#include <__format/formatter_integer.h>
|
|
#include <__format/formatter_pointer.h>
|
|
#include <__format/formatter_string.h>
|
|
#include <__format/formatter_tuple.h>
|
|
#include <__format/parser_std_format_spec.h>
|
|
#include <__format/range_default_formatter.h>
|
|
#include <__format/range_formatter.h>
|
|
#include <__format/unicode.h>
|
|
#include <__fwd/format.h>
|
|
#include <version>
|
|
|
|
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
|
|
# pragma GCC system_header
|
|
#endif
|
|
|
|
#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
|
|
# include <locale>
|
|
# include <queue>
|
|
# include <stack>
|
|
#endif
|
|
|
|
#endif // _LIBCPP_FORMAT
|