[libc] Templatize the scanf Reader interface (#131037)
This allows specializing the implementation for different targets without including unnecessary logic and is similar to #111559 which did the same for printf Writer interface.
This commit is contained in:
@@ -117,8 +117,8 @@ add_entrypoint_object(
|
||||
sscanf.h
|
||||
DEPENDS
|
||||
libc.src.__support.arg_list
|
||||
libc.src.stdio.scanf_core.reader
|
||||
libc.src.stdio.scanf_core.scanf_main
|
||||
libc.src.stdio.scanf_core.string_reader
|
||||
)
|
||||
|
||||
add_entrypoint_object(
|
||||
@@ -129,8 +129,8 @@ add_entrypoint_object(
|
||||
vsscanf.h
|
||||
DEPENDS
|
||||
libc.src.__support.arg_list
|
||||
libc.src.stdio.scanf_core.reader
|
||||
libc.src.stdio.scanf_core.scanf_main
|
||||
libc.src.stdio.scanf_core.string_reader
|
||||
)
|
||||
|
||||
add_entrypoint_object(
|
||||
|
||||
@@ -61,10 +61,8 @@ if(NOT(TARGET libc.src.__support.File.file) AND LLVM_LIBC_FULL_BUILD AND
|
||||
return()
|
||||
endif()
|
||||
|
||||
add_object_library(
|
||||
add_header_library(
|
||||
scanf_main
|
||||
SRCS
|
||||
scanf_main.cpp
|
||||
HDRS
|
||||
scanf_main.h
|
||||
DEPENDS
|
||||
@@ -83,18 +81,19 @@ add_header_library(
|
||||
reader.h
|
||||
DEPENDS
|
||||
libc.src.__support.macros.attributes
|
||||
${file_deps}
|
||||
${use_system_file}
|
||||
)
|
||||
|
||||
add_object_library(
|
||||
add_header_library(
|
||||
string_reader
|
||||
HDRS
|
||||
string_reader.h
|
||||
DEPENDS
|
||||
.reader
|
||||
libc.src.__support.macros.attributes
|
||||
)
|
||||
|
||||
add_header_library(
|
||||
converter
|
||||
SRCS
|
||||
converter.cpp
|
||||
string_converter.cpp
|
||||
int_converter.cpp
|
||||
float_converter.cpp
|
||||
ptr_converter.cpp
|
||||
HDRS
|
||||
converter.h
|
||||
converter_utils.h
|
||||
|
||||
@@ -1,103 +0,0 @@
|
||||
//===-- Format specifier converter implmentation for scanf -----*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "src/stdio/scanf_core/converter.h"
|
||||
|
||||
#include "src/__support/ctype_utils.h"
|
||||
#include "src/__support/macros/config.h"
|
||||
#include "src/stdio/scanf_core/core_structs.h"
|
||||
#include "src/stdio/scanf_core/reader.h"
|
||||
|
||||
#ifndef LIBC_COPT_SCANF_DISABLE_FLOAT
|
||||
#include "src/stdio/scanf_core/float_converter.h"
|
||||
#endif // LIBC_COPT_SCANF_DISABLE_FLOAT
|
||||
#include "src/stdio/scanf_core/current_pos_converter.h"
|
||||
#include "src/stdio/scanf_core/int_converter.h"
|
||||
#include "src/stdio/scanf_core/ptr_converter.h"
|
||||
#include "src/stdio/scanf_core/string_converter.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
namespace LIBC_NAMESPACE_DECL {
|
||||
namespace scanf_core {
|
||||
|
||||
int convert(Reader *reader, const FormatSection &to_conv) {
|
||||
int ret_val = 0;
|
||||
switch (to_conv.conv_name) {
|
||||
case '%':
|
||||
return raw_match(reader, "%");
|
||||
case 's':
|
||||
ret_val = raw_match(reader, " ");
|
||||
if (ret_val != READ_OK)
|
||||
return ret_val;
|
||||
return convert_string(reader, to_conv);
|
||||
case 'c':
|
||||
case '[':
|
||||
return convert_string(reader, to_conv);
|
||||
case 'd':
|
||||
case 'i':
|
||||
case 'u':
|
||||
case 'o':
|
||||
case 'x':
|
||||
case 'X':
|
||||
ret_val = raw_match(reader, " ");
|
||||
if (ret_val != READ_OK)
|
||||
return ret_val;
|
||||
return convert_int(reader, to_conv);
|
||||
#ifndef LIBC_COPT_SCANF_DISABLE_FLOAT
|
||||
case 'f':
|
||||
case 'F':
|
||||
case 'e':
|
||||
case 'E':
|
||||
case 'a':
|
||||
case 'A':
|
||||
case 'g':
|
||||
case 'G':
|
||||
ret_val = raw_match(reader, " ");
|
||||
if (ret_val != READ_OK)
|
||||
return ret_val;
|
||||
return convert_float(reader, to_conv);
|
||||
#endif // LIBC_COPT_SCANF_DISABLE_FLOAT
|
||||
case 'n':
|
||||
return convert_current_pos(reader, to_conv);
|
||||
case 'p':
|
||||
ret_val = raw_match(reader, " ");
|
||||
if (ret_val != READ_OK)
|
||||
return ret_val;
|
||||
return convert_pointer(reader, to_conv);
|
||||
default:
|
||||
return raw_match(reader, to_conv.raw_string);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
// raw_string is assumed to have a positive size.
|
||||
int raw_match(Reader *reader, cpp::string_view raw_string) {
|
||||
char cur_char = reader->getc();
|
||||
int ret_val = READ_OK;
|
||||
for (size_t i = 0; i < raw_string.size(); ++i) {
|
||||
// Any space character matches any number of space characters.
|
||||
if (internal::isspace(raw_string[i])) {
|
||||
while (internal::isspace(cur_char)) {
|
||||
cur_char = reader->getc();
|
||||
}
|
||||
} else {
|
||||
if (raw_string[i] == cur_char) {
|
||||
cur_char = reader->getc();
|
||||
} else {
|
||||
ret_val = MATCHING_FAILURE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
reader->ungetc(cur_char);
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
} // namespace scanf_core
|
||||
} // namespace LIBC_NAMESPACE_DECL
|
||||
@@ -10,10 +10,19 @@
|
||||
#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_CONVERTER_H
|
||||
|
||||
#include "src/__support/CPP/string_view.h"
|
||||
#include "src/__support/ctype_utils.h"
|
||||
#include "src/__support/macros/config.h"
|
||||
#include "src/stdio/scanf_core/core_structs.h"
|
||||
#include "src/stdio/scanf_core/reader.h"
|
||||
|
||||
#ifndef LIBC_COPT_SCANF_DISABLE_FLOAT
|
||||
#include "src/stdio/scanf_core/float_converter.h"
|
||||
#endif // LIBC_COPT_SCANF_DISABLE_FLOAT
|
||||
#include "src/stdio/scanf_core/current_pos_converter.h"
|
||||
#include "src/stdio/scanf_core/int_converter.h"
|
||||
#include "src/stdio/scanf_core/ptr_converter.h"
|
||||
#include "src/stdio/scanf_core/string_converter.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
namespace LIBC_NAMESPACE_DECL {
|
||||
@@ -22,11 +31,81 @@ namespace scanf_core {
|
||||
// convert will call a conversion function to convert the FormatSection into
|
||||
// its string representation, and then that will write the result to the
|
||||
// reader.
|
||||
int convert(Reader *reader, const FormatSection &to_conv);
|
||||
template <typename T>
|
||||
int convert(Reader<T> *reader, const FormatSection &to_conv) {
|
||||
int ret_val = 0;
|
||||
switch (to_conv.conv_name) {
|
||||
case '%':
|
||||
return raw_match(reader, "%");
|
||||
case 's':
|
||||
ret_val = raw_match(reader, " ");
|
||||
if (ret_val != READ_OK)
|
||||
return ret_val;
|
||||
return convert_string(reader, to_conv);
|
||||
case 'c':
|
||||
case '[':
|
||||
return convert_string(reader, to_conv);
|
||||
case 'd':
|
||||
case 'i':
|
||||
case 'u':
|
||||
case 'o':
|
||||
case 'x':
|
||||
case 'X':
|
||||
ret_val = raw_match(reader, " ");
|
||||
if (ret_val != READ_OK)
|
||||
return ret_val;
|
||||
return convert_int(reader, to_conv);
|
||||
#ifndef LIBC_COPT_SCANF_DISABLE_FLOAT
|
||||
case 'f':
|
||||
case 'F':
|
||||
case 'e':
|
||||
case 'E':
|
||||
case 'a':
|
||||
case 'A':
|
||||
case 'g':
|
||||
case 'G':
|
||||
ret_val = raw_match(reader, " ");
|
||||
if (ret_val != READ_OK)
|
||||
return ret_val;
|
||||
return convert_float(reader, to_conv);
|
||||
#endif // LIBC_COPT_SCANF_DISABLE_FLOAT
|
||||
case 'n':
|
||||
return convert_current_pos(reader, to_conv);
|
||||
case 'p':
|
||||
ret_val = raw_match(reader, " ");
|
||||
if (ret_val != READ_OK)
|
||||
return ret_val;
|
||||
return convert_pointer(reader, to_conv);
|
||||
default:
|
||||
return raw_match(reader, to_conv.raw_string);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
// raw_match takes a raw string and matches it to the characters obtained from
|
||||
// the reader.
|
||||
int raw_match(Reader *reader, cpp::string_view raw_string);
|
||||
template <typename T>
|
||||
int raw_match(Reader<T> *reader, cpp::string_view raw_string) {
|
||||
char cur_char = reader->getc();
|
||||
int ret_val = READ_OK;
|
||||
for (size_t i = 0; i < raw_string.size(); ++i) {
|
||||
// Any space character matches any number of space characters.
|
||||
if (internal::isspace(raw_string[i])) {
|
||||
while (internal::isspace(cur_char)) {
|
||||
cur_char = reader->getc();
|
||||
}
|
||||
} else {
|
||||
if (raw_string[i] == cur_char) {
|
||||
cur_char = reader->getc();
|
||||
} else {
|
||||
ret_val = MATCHING_FAILURE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
reader->ungetc(cur_char);
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
} // namespace scanf_core
|
||||
} // namespace LIBC_NAMESPACE_DECL
|
||||
|
||||
@@ -19,7 +19,8 @@
|
||||
namespace LIBC_NAMESPACE_DECL {
|
||||
namespace scanf_core {
|
||||
|
||||
LIBC_INLINE int convert_current_pos(Reader *reader,
|
||||
template <typename T>
|
||||
LIBC_INLINE int convert_current_pos(Reader<T> *reader,
|
||||
const FormatSection &to_conv) {
|
||||
write_int_with_length(reader->chars_read(), to_conv);
|
||||
return READ_OK;
|
||||
|
||||
@@ -1,229 +0,0 @@
|
||||
//===-- Int type specifier converters for scanf -----------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "src/stdio/scanf_core/float_converter.h"
|
||||
|
||||
#include "src/__support/CPP/limits.h"
|
||||
#include "src/__support/char_vector.h"
|
||||
#include "src/__support/ctype_utils.h"
|
||||
#include "src/__support/macros/config.h"
|
||||
#include "src/stdio/scanf_core/converter_utils.h"
|
||||
#include "src/stdio/scanf_core/core_structs.h"
|
||||
#include "src/stdio/scanf_core/reader.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
namespace LIBC_NAMESPACE_DECL {
|
||||
namespace scanf_core {
|
||||
|
||||
// All of the floating point conversions are the same for scanf, every name will
|
||||
// accept every style.
|
||||
int convert_float(Reader *reader, const FormatSection &to_conv) {
|
||||
// %a/A/e/E/f/F/g/G "Matches an optionally signed floating-point number,
|
||||
// infinity, or NaN, whose format is the same as expected for the subject
|
||||
// sequence of the strtod function. The corresponding argument shall be a
|
||||
// pointer to floating."
|
||||
|
||||
CharVector out_str = CharVector();
|
||||
bool is_number = false;
|
||||
|
||||
size_t max_width = cpp::numeric_limits<size_t>::max();
|
||||
if (to_conv.max_width > 0) {
|
||||
max_width = to_conv.max_width;
|
||||
}
|
||||
|
||||
char cur_char = reader->getc();
|
||||
// Handle the sign.
|
||||
if (cur_char == '+' || cur_char == '-') {
|
||||
if (!out_str.append(cur_char)) {
|
||||
return ALLOCATION_FAILURE;
|
||||
}
|
||||
if (out_str.length() == max_width) {
|
||||
return MATCHING_FAILURE;
|
||||
} else {
|
||||
cur_char = reader->getc();
|
||||
}
|
||||
}
|
||||
|
||||
static constexpr char DECIMAL_POINT = '.';
|
||||
static const char inf_string[] = "infinity";
|
||||
|
||||
// Handle inf
|
||||
|
||||
if (internal::tolower(cur_char) == inf_string[0]) {
|
||||
size_t inf_index = 0;
|
||||
|
||||
for (;
|
||||
inf_index < (sizeof(inf_string) - 1) && out_str.length() < max_width &&
|
||||
internal::tolower(cur_char) == inf_string[inf_index];
|
||||
++inf_index) {
|
||||
if (!out_str.append(cur_char)) {
|
||||
return ALLOCATION_FAILURE;
|
||||
}
|
||||
cur_char = reader->getc();
|
||||
}
|
||||
|
||||
if (inf_index == 3 || inf_index == sizeof(inf_string) - 1) {
|
||||
write_float_with_length(out_str.c_str(), to_conv);
|
||||
return READ_OK;
|
||||
} else {
|
||||
return MATCHING_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
static const char nan_string[] = "nan";
|
||||
|
||||
// Handle nan
|
||||
if (internal::tolower(cur_char) == nan_string[0]) {
|
||||
size_t nan_index = 0;
|
||||
|
||||
for (;
|
||||
nan_index < (sizeof(nan_string) - 1) && out_str.length() < max_width &&
|
||||
internal::tolower(cur_char) == nan_string[nan_index];
|
||||
++nan_index) {
|
||||
if (!out_str.append(cur_char)) {
|
||||
return ALLOCATION_FAILURE;
|
||||
}
|
||||
cur_char = reader->getc();
|
||||
}
|
||||
|
||||
if (nan_index == sizeof(nan_string) - 1) {
|
||||
write_float_with_length(out_str.c_str(), to_conv);
|
||||
return READ_OK;
|
||||
} else {
|
||||
return MATCHING_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
// Assume base of 10 by default but check if it is actually base 16.
|
||||
int base = 10;
|
||||
|
||||
// If the string starts with 0 it might be in hex.
|
||||
if (cur_char == '0') {
|
||||
is_number = true;
|
||||
// Read the next character to check.
|
||||
if (!out_str.append(cur_char)) {
|
||||
return ALLOCATION_FAILURE;
|
||||
}
|
||||
// If we've hit the end, then this is "0", which is valid.
|
||||
if (out_str.length() == max_width) {
|
||||
write_float_with_length(out_str.c_str(), to_conv);
|
||||
return READ_OK;
|
||||
} else {
|
||||
cur_char = reader->getc();
|
||||
}
|
||||
|
||||
// If that next character is an 'x' then this is a hexadecimal number.
|
||||
if (internal::tolower(cur_char) == 'x') {
|
||||
base = 16;
|
||||
|
||||
if (!out_str.append(cur_char)) {
|
||||
return ALLOCATION_FAILURE;
|
||||
}
|
||||
// If we've hit the end here, we have "0x" which is a valid prefix to a
|
||||
// floating point number, and will be evaluated to 0.
|
||||
if (out_str.length() == max_width) {
|
||||
write_float_with_length(out_str.c_str(), to_conv);
|
||||
return READ_OK;
|
||||
} else {
|
||||
cur_char = reader->getc();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const char exponent_mark = ((base == 10) ? 'e' : 'p');
|
||||
bool after_decimal = false;
|
||||
|
||||
// The format for the remaining characters at this point is DD.DDe+/-DD for
|
||||
// base 10 and XX.XXp+/-DD for base 16
|
||||
|
||||
// This handles the digits before and after the decimal point, but not the
|
||||
// exponent.
|
||||
while (out_str.length() < max_width) {
|
||||
if (internal::isalnum(cur_char) &&
|
||||
internal::b36_char_to_int(cur_char) < base) {
|
||||
is_number = true;
|
||||
if (!out_str.append(cur_char)) {
|
||||
return ALLOCATION_FAILURE;
|
||||
}
|
||||
cur_char = reader->getc();
|
||||
} else if (cur_char == DECIMAL_POINT && !after_decimal) {
|
||||
after_decimal = true;
|
||||
if (!out_str.append(cur_char)) {
|
||||
return ALLOCATION_FAILURE;
|
||||
}
|
||||
cur_char = reader->getc();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Handle the exponent, which has an exponent mark, an optional sign, and
|
||||
// decimal digits.
|
||||
if (internal::tolower(cur_char) == exponent_mark) {
|
||||
if (!out_str.append(cur_char)) {
|
||||
return ALLOCATION_FAILURE;
|
||||
}
|
||||
if (out_str.length() == max_width) {
|
||||
// This is laid out in the standard as being a matching error (100e is not
|
||||
// a valid float) but may conflict with existing implementations.
|
||||
return MATCHING_FAILURE;
|
||||
} else {
|
||||
cur_char = reader->getc();
|
||||
}
|
||||
|
||||
if (cur_char == '+' || cur_char == '-') {
|
||||
if (!out_str.append(cur_char)) {
|
||||
return ALLOCATION_FAILURE;
|
||||
}
|
||||
if (out_str.length() == max_width) {
|
||||
return MATCHING_FAILURE;
|
||||
} else {
|
||||
cur_char = reader->getc();
|
||||
}
|
||||
}
|
||||
|
||||
// It is specified by the standard that "100er" is a matching failure since
|
||||
// the longest prefix of a possibly valid floating-point number (which is
|
||||
// "100e") is not a valid floating-point number. If there is an exponent
|
||||
// mark then there must be a digit after it else the number is not valid.
|
||||
// Some implementations will roll back two characters (to just "100") and
|
||||
// accept that since the prefix is not valid, and some will interpret an
|
||||
// exponent mark followed by no digits as an additional exponent of 0
|
||||
// (accepting "100e" and returning 100.0). Both of these behaviors are wrong
|
||||
// by the standard, but they may be used in real code, see Hyrum's law. This
|
||||
// code follows the standard, but may be incompatible due to code expecting
|
||||
// these bugs.
|
||||
if (!internal::isdigit(cur_char)) {
|
||||
return MATCHING_FAILURE;
|
||||
}
|
||||
|
||||
while (internal::isdigit(cur_char) && out_str.length() < max_width) {
|
||||
if (!out_str.append(cur_char)) {
|
||||
return ALLOCATION_FAILURE;
|
||||
}
|
||||
cur_char = reader->getc();
|
||||
}
|
||||
}
|
||||
|
||||
// We always read one more character than will be used, so we have to put the
|
||||
// last one back.
|
||||
reader->ungetc(cur_char);
|
||||
|
||||
// If we haven't actually found any digits, this is a matching failure (this
|
||||
// catches cases like "+.")
|
||||
if (!is_number) {
|
||||
return MATCHING_FAILURE;
|
||||
}
|
||||
write_float_with_length(out_str.c_str(), to_conv);
|
||||
|
||||
return READ_OK;
|
||||
}
|
||||
|
||||
} // namespace scanf_core
|
||||
} // namespace LIBC_NAMESPACE_DECL
|
||||
@@ -9,7 +9,11 @@
|
||||
#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_FLOAT_CONVERTER_H
|
||||
#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_FLOAT_CONVERTER_H
|
||||
|
||||
#include "src/__support/CPP/limits.h"
|
||||
#include "src/__support/char_vector.h"
|
||||
#include "src/__support/ctype_utils.h"
|
||||
#include "src/__support/macros/config.h"
|
||||
#include "src/stdio/scanf_core/converter_utils.h"
|
||||
#include "src/stdio/scanf_core/core_structs.h"
|
||||
#include "src/stdio/scanf_core/reader.h"
|
||||
|
||||
@@ -18,7 +22,210 @@
|
||||
namespace LIBC_NAMESPACE_DECL {
|
||||
namespace scanf_core {
|
||||
|
||||
int convert_float(Reader *reader, const FormatSection &to_conv);
|
||||
// All of the floating point conversions are the same for scanf, every name will
|
||||
// accept every style.
|
||||
template <typename T>
|
||||
int convert_float(Reader<T> *reader, const FormatSection &to_conv) {
|
||||
// %a/A/e/E/f/F/g/G "Matches an optionally signed floating-point number,
|
||||
// infinity, or NaN, whose format is the same as expected for the subject
|
||||
// sequence of the strtod function. The corresponding argument shall be a
|
||||
// pointer to floating."
|
||||
|
||||
CharVector out_str = CharVector();
|
||||
bool is_number = false;
|
||||
|
||||
size_t max_width = cpp::numeric_limits<size_t>::max();
|
||||
if (to_conv.max_width > 0) {
|
||||
max_width = to_conv.max_width;
|
||||
}
|
||||
|
||||
char cur_char = reader->getc();
|
||||
// Handle the sign.
|
||||
if (cur_char == '+' || cur_char == '-') {
|
||||
if (!out_str.append(cur_char)) {
|
||||
return ALLOCATION_FAILURE;
|
||||
}
|
||||
if (out_str.length() == max_width) {
|
||||
return MATCHING_FAILURE;
|
||||
} else {
|
||||
cur_char = reader->getc();
|
||||
}
|
||||
}
|
||||
|
||||
static constexpr char DECIMAL_POINT = '.';
|
||||
static const char inf_string[] = "infinity";
|
||||
|
||||
// Handle inf
|
||||
|
||||
if (internal::tolower(cur_char) == inf_string[0]) {
|
||||
size_t inf_index = 0;
|
||||
|
||||
for (;
|
||||
inf_index < (sizeof(inf_string) - 1) && out_str.length() < max_width &&
|
||||
internal::tolower(cur_char) == inf_string[inf_index];
|
||||
++inf_index) {
|
||||
if (!out_str.append(cur_char)) {
|
||||
return ALLOCATION_FAILURE;
|
||||
}
|
||||
cur_char = reader->getc();
|
||||
}
|
||||
|
||||
if (inf_index == 3 || inf_index == sizeof(inf_string) - 1) {
|
||||
write_float_with_length(out_str.c_str(), to_conv);
|
||||
return READ_OK;
|
||||
} else {
|
||||
return MATCHING_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
static const char nan_string[] = "nan";
|
||||
|
||||
// Handle nan
|
||||
if (internal::tolower(cur_char) == nan_string[0]) {
|
||||
size_t nan_index = 0;
|
||||
|
||||
for (;
|
||||
nan_index < (sizeof(nan_string) - 1) && out_str.length() < max_width &&
|
||||
internal::tolower(cur_char) == nan_string[nan_index];
|
||||
++nan_index) {
|
||||
if (!out_str.append(cur_char)) {
|
||||
return ALLOCATION_FAILURE;
|
||||
}
|
||||
cur_char = reader->getc();
|
||||
}
|
||||
|
||||
if (nan_index == sizeof(nan_string) - 1) {
|
||||
write_float_with_length(out_str.c_str(), to_conv);
|
||||
return READ_OK;
|
||||
} else {
|
||||
return MATCHING_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
// Assume base of 10 by default but check if it is actually base 16.
|
||||
int base = 10;
|
||||
|
||||
// If the string starts with 0 it might be in hex.
|
||||
if (cur_char == '0') {
|
||||
is_number = true;
|
||||
// Read the next character to check.
|
||||
if (!out_str.append(cur_char)) {
|
||||
return ALLOCATION_FAILURE;
|
||||
}
|
||||
// If we've hit the end, then this is "0", which is valid.
|
||||
if (out_str.length() == max_width) {
|
||||
write_float_with_length(out_str.c_str(), to_conv);
|
||||
return READ_OK;
|
||||
} else {
|
||||
cur_char = reader->getc();
|
||||
}
|
||||
|
||||
// If that next character is an 'x' then this is a hexadecimal number.
|
||||
if (internal::tolower(cur_char) == 'x') {
|
||||
base = 16;
|
||||
|
||||
if (!out_str.append(cur_char)) {
|
||||
return ALLOCATION_FAILURE;
|
||||
}
|
||||
// If we've hit the end here, we have "0x" which is a valid prefix to a
|
||||
// floating point number, and will be evaluated to 0.
|
||||
if (out_str.length() == max_width) {
|
||||
write_float_with_length(out_str.c_str(), to_conv);
|
||||
return READ_OK;
|
||||
} else {
|
||||
cur_char = reader->getc();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const char exponent_mark = ((base == 10) ? 'e' : 'p');
|
||||
bool after_decimal = false;
|
||||
|
||||
// The format for the remaining characters at this point is DD.DDe+/-DD for
|
||||
// base 10 and XX.XXp+/-DD for base 16
|
||||
|
||||
// This handles the digits before and after the decimal point, but not the
|
||||
// exponent.
|
||||
while (out_str.length() < max_width) {
|
||||
if (internal::isalnum(cur_char) &&
|
||||
internal::b36_char_to_int(cur_char) < base) {
|
||||
is_number = true;
|
||||
if (!out_str.append(cur_char)) {
|
||||
return ALLOCATION_FAILURE;
|
||||
}
|
||||
cur_char = reader->getc();
|
||||
} else if (cur_char == DECIMAL_POINT && !after_decimal) {
|
||||
after_decimal = true;
|
||||
if (!out_str.append(cur_char)) {
|
||||
return ALLOCATION_FAILURE;
|
||||
}
|
||||
cur_char = reader->getc();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Handle the exponent, which has an exponent mark, an optional sign, and
|
||||
// decimal digits.
|
||||
if (internal::tolower(cur_char) == exponent_mark) {
|
||||
if (!out_str.append(cur_char)) {
|
||||
return ALLOCATION_FAILURE;
|
||||
}
|
||||
if (out_str.length() == max_width) {
|
||||
// This is laid out in the standard as being a matching error (100e is not
|
||||
// a valid float) but may conflict with existing implementations.
|
||||
return MATCHING_FAILURE;
|
||||
} else {
|
||||
cur_char = reader->getc();
|
||||
}
|
||||
|
||||
if (cur_char == '+' || cur_char == '-') {
|
||||
if (!out_str.append(cur_char)) {
|
||||
return ALLOCATION_FAILURE;
|
||||
}
|
||||
if (out_str.length() == max_width) {
|
||||
return MATCHING_FAILURE;
|
||||
} else {
|
||||
cur_char = reader->getc();
|
||||
}
|
||||
}
|
||||
|
||||
// It is specified by the standard that "100er" is a matching failure since
|
||||
// the longest prefix of a possibly valid floating-point number (which is
|
||||
// "100e") is not a valid floating-point number. If there is an exponent
|
||||
// mark then there must be a digit after it else the number is not valid.
|
||||
// Some implementations will roll back two characters (to just "100") and
|
||||
// accept that since the prefix is not valid, and some will interpret an
|
||||
// exponent mark followed by no digits as an additional exponent of 0
|
||||
// (accepting "100e" and returning 100.0). Both of these behaviors are wrong
|
||||
// by the standard, but they may be used in real code, see Hyrum's law. This
|
||||
// code follows the standard, but may be incompatible due to code expecting
|
||||
// these bugs.
|
||||
if (!internal::isdigit(cur_char)) {
|
||||
return MATCHING_FAILURE;
|
||||
}
|
||||
|
||||
while (internal::isdigit(cur_char) && out_str.length() < max_width) {
|
||||
if (!out_str.append(cur_char)) {
|
||||
return ALLOCATION_FAILURE;
|
||||
}
|
||||
cur_char = reader->getc();
|
||||
}
|
||||
}
|
||||
|
||||
// We always read one more character than will be used, so we have to put the
|
||||
// last one back.
|
||||
reader->ungetc(cur_char);
|
||||
|
||||
// If we haven't actually found any digits, this is a matching failure (this
|
||||
// catches cases like "+.")
|
||||
if (!is_number) {
|
||||
return MATCHING_FAILURE;
|
||||
}
|
||||
write_float_with_length(out_str.c_str(), to_conv);
|
||||
|
||||
return READ_OK;
|
||||
}
|
||||
|
||||
} // namespace scanf_core
|
||||
} // namespace LIBC_NAMESPACE_DECL
|
||||
|
||||
@@ -1,230 +0,0 @@
|
||||
//===-- Int type specifier converters for scanf -----------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "src/stdio/scanf_core/int_converter.h"
|
||||
|
||||
#include "src/__support/CPP/limits.h"
|
||||
#include "src/__support/ctype_utils.h"
|
||||
#include "src/__support/macros/config.h"
|
||||
#include "src/stdio/scanf_core/converter_utils.h"
|
||||
#include "src/stdio/scanf_core/core_structs.h"
|
||||
#include "src/stdio/scanf_core/reader.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
namespace LIBC_NAMESPACE_DECL {
|
||||
namespace scanf_core {
|
||||
|
||||
// This code is very similar to the code in __support/str_to_integer.h but is
|
||||
// not quite the same. Here is the list of differences and why they exist:
|
||||
// 1) This takes a reader and a format section instead of a char* and the base.
|
||||
// This should be fairly self explanatory. While the char* could be adapted
|
||||
// to a reader and the base could be calculated ahead of time, the
|
||||
// semantics are slightly different, specifically a char* can be indexed
|
||||
// freely (I can read str[2] and then str[0]) whereas a File (which the
|
||||
// reader may contain) cannot.
|
||||
// 2) Because this uses a Reader, this function can only unget once.
|
||||
// This is relevant because scanf specifies it reads the "longest sequence
|
||||
// of input characters which does not exceed any specified field width and
|
||||
// which is, or is a prefix of, a matching input sequence." Whereas the
|
||||
// strtol function accepts "the longest initial subsequence of the input
|
||||
// string (...) that is of the expected form." This is demonstrated by the
|
||||
// differences in how they deal with the string "0xZZZ" when parsing as
|
||||
// hexadecimal. Scanf will read the "0x" as a valid prefix and return 0,
|
||||
// since it reads the first 'Z', sees that it's not a valid hex digit, and
|
||||
// reverses one character. The strtol function on the other hand only
|
||||
// accepts the "0" since that's the longest valid hexadecimal sequence. It
|
||||
// sees the 'Z' after the "0x" and determines that this is not the prefix
|
||||
// to a valid hex string.
|
||||
// 3) This conversion may have a maximum width.
|
||||
// If a maximum width is specified, this conversion is only allowed to
|
||||
// accept a certain number of characters. Strtol doesn't have any such
|
||||
// limitation.
|
||||
int convert_int(Reader *reader, const FormatSection &to_conv) {
|
||||
// %d "Matches an optionally signed decimal integer [...] with the value 10
|
||||
// for the base argument. The corresponding argument shall be a pointer to
|
||||
// signed integer."
|
||||
|
||||
// %i "Matches an optionally signed integer [...] with the value 0 for the
|
||||
// base argument. The corresponding argument shall be a pointer to signed
|
||||
// integer."
|
||||
|
||||
// %u "Matches an optionally signed decimal integer [...] with the value 10
|
||||
// for the base argument. The corresponding argument shall be a pointer to
|
||||
// unsigned integer"
|
||||
|
||||
// %o "Matches an optionally signed octal integer [...] with the value 8 for
|
||||
// the base argument. The corresponding argument shall be a pointer to
|
||||
// unsigned integer"
|
||||
|
||||
// %x/X "Matches an optionally signed hexadecimal integer [...] with the value
|
||||
// 16 for the base argument. The corresponding argument shall be a pointer to
|
||||
// unsigned integer"
|
||||
|
||||
size_t max_width = cpp::numeric_limits<size_t>::max();
|
||||
if (to_conv.max_width > 0) {
|
||||
max_width = to_conv.max_width;
|
||||
}
|
||||
|
||||
uintmax_t result = 0;
|
||||
bool is_number = false;
|
||||
bool is_signed = false;
|
||||
int base = 0;
|
||||
if (to_conv.conv_name == 'i') {
|
||||
base = 0;
|
||||
is_signed = true;
|
||||
} else if (to_conv.conv_name == 'o') {
|
||||
base = 8;
|
||||
} else if (internal::tolower(to_conv.conv_name) == 'x' ||
|
||||
to_conv.conv_name == 'p') {
|
||||
base = 16;
|
||||
} else if (to_conv.conv_name == 'd') {
|
||||
base = 10;
|
||||
is_signed = true;
|
||||
} else { // conv_name must be 'u'
|
||||
base = 10;
|
||||
}
|
||||
|
||||
char cur_char = reader->getc();
|
||||
|
||||
char result_sign = '+';
|
||||
if (cur_char == '+' || cur_char == '-') {
|
||||
result_sign = cur_char;
|
||||
if (max_width > 1) {
|
||||
--max_width;
|
||||
cur_char = reader->getc();
|
||||
} else {
|
||||
// If the max width has been hit already, then the return value must be 0
|
||||
// since no actual digits of the number have been parsed yet.
|
||||
write_int_with_length(0, to_conv);
|
||||
return MATCHING_FAILURE;
|
||||
}
|
||||
}
|
||||
const bool is_negative = result_sign == '-';
|
||||
|
||||
// Base of 0 means automatically determine the base. Base of 16 may have a
|
||||
// prefix of "0x"
|
||||
if (base == 0 || base == 16) {
|
||||
// If the first character is 0, then it could be octal or hex.
|
||||
if (cur_char == '0') {
|
||||
is_number = true;
|
||||
|
||||
// Read the next character to check.
|
||||
if (max_width > 1) {
|
||||
--max_width;
|
||||
cur_char = reader->getc();
|
||||
} else {
|
||||
write_int_with_length(0, to_conv);
|
||||
return READ_OK;
|
||||
}
|
||||
|
||||
if (internal::tolower(cur_char) == 'x') {
|
||||
// This is a valid hex prefix.
|
||||
|
||||
is_number = false;
|
||||
// A valid hex prefix is not necessarily a valid number. For the
|
||||
// conversion to be valid it needs to use all of the characters it
|
||||
// consumes. From the standard:
|
||||
// 7.23.6.2 paragraph 9: "An input item is defined as the longest
|
||||
// sequence of input characters which does not exceed any specified
|
||||
// field width and which is, or is a prefix of, a matching input
|
||||
// sequence."
|
||||
// 7.23.6.2 paragraph 10: "If the input item is not a matching sequence,
|
||||
// the execution of the directive fails: this condition is a matching
|
||||
// failure"
|
||||
base = 16;
|
||||
if (max_width > 1) {
|
||||
--max_width;
|
||||
cur_char = reader->getc();
|
||||
} else {
|
||||
return MATCHING_FAILURE;
|
||||
}
|
||||
|
||||
} else {
|
||||
if (base == 0) {
|
||||
base = 8;
|
||||
}
|
||||
}
|
||||
} else if (base == 0) {
|
||||
if (internal::isdigit(cur_char)) {
|
||||
// If the first character is a different number, then it's 10.
|
||||
base = 10;
|
||||
} else {
|
||||
// If the first character isn't a valid digit, then there are no valid
|
||||
// digits at all. The number is 0.
|
||||
reader->ungetc(cur_char);
|
||||
write_int_with_length(0, to_conv);
|
||||
return MATCHING_FAILURE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
constexpr uintmax_t UNSIGNED_MAX = cpp::numeric_limits<uintmax_t>::max();
|
||||
constexpr uintmax_t SIGNED_MAX =
|
||||
static_cast<uintmax_t>(cpp::numeric_limits<intmax_t>::max());
|
||||
constexpr uintmax_t NEGATIVE_SIGNED_MAX =
|
||||
static_cast<uintmax_t>(cpp::numeric_limits<intmax_t>::max()) + 1;
|
||||
|
||||
const uintmax_t MAX =
|
||||
(is_signed ? (is_negative ? NEGATIVE_SIGNED_MAX : SIGNED_MAX)
|
||||
: UNSIGNED_MAX);
|
||||
|
||||
const uintmax_t max_div_by_base = MAX / base;
|
||||
|
||||
if (internal::isalnum(cur_char) &&
|
||||
internal::b36_char_to_int(cur_char) < base) {
|
||||
is_number = true;
|
||||
}
|
||||
|
||||
bool has_overflow = false;
|
||||
size_t i = 0;
|
||||
for (; i < max_width && internal::isalnum(cur_char) &&
|
||||
internal::b36_char_to_int(cur_char) < base;
|
||||
++i, cur_char = reader->getc()) {
|
||||
|
||||
uintmax_t cur_digit = internal::b36_char_to_int(cur_char);
|
||||
|
||||
if (result == MAX) {
|
||||
has_overflow = true;
|
||||
continue;
|
||||
} else if (result > max_div_by_base) {
|
||||
result = MAX;
|
||||
has_overflow = true;
|
||||
} else {
|
||||
result = result * base;
|
||||
}
|
||||
|
||||
if (result > MAX - cur_digit) {
|
||||
result = MAX;
|
||||
has_overflow = true;
|
||||
} else {
|
||||
result = result + cur_digit;
|
||||
}
|
||||
}
|
||||
|
||||
// We always read one more character than will be used, so we have to put the
|
||||
// last one back.
|
||||
reader->ungetc(cur_char);
|
||||
|
||||
if (!is_number)
|
||||
return MATCHING_FAILURE;
|
||||
|
||||
if (has_overflow) {
|
||||
write_int_with_length(MAX, to_conv);
|
||||
} else {
|
||||
if (is_negative)
|
||||
result = -result;
|
||||
|
||||
write_int_with_length(result, to_conv);
|
||||
}
|
||||
|
||||
return READ_OK;
|
||||
}
|
||||
|
||||
} // namespace scanf_core
|
||||
} // namespace LIBC_NAMESPACE_DECL
|
||||
@@ -9,7 +9,10 @@
|
||||
#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_INT_CONVERTER_H
|
||||
#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_INT_CONVERTER_H
|
||||
|
||||
#include "src/__support/CPP/limits.h"
|
||||
#include "src/__support/ctype_utils.h"
|
||||
#include "src/__support/macros/config.h"
|
||||
#include "src/stdio/scanf_core/converter_utils.h"
|
||||
#include "src/stdio/scanf_core/core_structs.h"
|
||||
#include "src/stdio/scanf_core/reader.h"
|
||||
|
||||
@@ -18,7 +21,212 @@
|
||||
namespace LIBC_NAMESPACE_DECL {
|
||||
namespace scanf_core {
|
||||
|
||||
int convert_int(Reader *reader, const FormatSection &to_conv);
|
||||
// This code is very similar to the code in __support/str_to_integer.h but is
|
||||
// not quite the same. Here is the list of differences and why they exist:
|
||||
// 1) This takes a reader and a format section instead of a char* and the base.
|
||||
// This should be fairly self explanatory. While the char* could be adapted
|
||||
// to a reader and the base could be calculated ahead of time, the
|
||||
// semantics are slightly different, specifically a char* can be indexed
|
||||
// freely (I can read str[2] and then str[0]) whereas a File (which the
|
||||
// reader may contain) cannot.
|
||||
// 2) Because this uses a Reader, this function can only unget once.
|
||||
// This is relevant because scanf specifies it reads the "longest sequence
|
||||
// of input characters which does not exceed any specified field width and
|
||||
// which is, or is a prefix of, a matching input sequence." Whereas the
|
||||
// strtol function accepts "the longest initial subsequence of the input
|
||||
// string (...) that is of the expected form." This is demonstrated by the
|
||||
// differences in how they deal with the string "0xZZZ" when parsing as
|
||||
// hexadecimal. Scanf will read the "0x" as a valid prefix and return 0,
|
||||
// since it reads the first 'Z', sees that it's not a valid hex digit, and
|
||||
// reverses one character. The strtol function on the other hand only
|
||||
// accepts the "0" since that's the longest valid hexadecimal sequence. It
|
||||
// sees the 'Z' after the "0x" and determines that this is not the prefix
|
||||
// to a valid hex string.
|
||||
// 3) This conversion may have a maximum width.
|
||||
// If a maximum width is specified, this conversion is only allowed to
|
||||
// accept a certain number of characters. Strtol doesn't have any such
|
||||
// limitation.
|
||||
template <typename T>
|
||||
int convert_int(Reader<T> *reader, const FormatSection &to_conv) {
|
||||
// %d "Matches an optionally signed decimal integer [...] with the value 10
|
||||
// for the base argument. The corresponding argument shall be a pointer to
|
||||
// signed integer."
|
||||
|
||||
// %i "Matches an optionally signed integer [...] with the value 0 for the
|
||||
// base argument. The corresponding argument shall be a pointer to signed
|
||||
// integer."
|
||||
|
||||
// %u "Matches an optionally signed decimal integer [...] with the value 10
|
||||
// for the base argument. The corresponding argument shall be a pointer to
|
||||
// unsigned integer"
|
||||
|
||||
// %o "Matches an optionally signed octal integer [...] with the value 8 for
|
||||
// the base argument. The corresponding argument shall be a pointer to
|
||||
// unsigned integer"
|
||||
|
||||
// %x/X "Matches an optionally signed hexadecimal integer [...] with the value
|
||||
// 16 for the base argument. The corresponding argument shall be a pointer to
|
||||
// unsigned integer"
|
||||
|
||||
size_t max_width = cpp::numeric_limits<size_t>::max();
|
||||
if (to_conv.max_width > 0) {
|
||||
max_width = to_conv.max_width;
|
||||
}
|
||||
|
||||
uintmax_t result = 0;
|
||||
bool is_number = false;
|
||||
bool is_signed = false;
|
||||
int base = 0;
|
||||
if (to_conv.conv_name == 'i') {
|
||||
base = 0;
|
||||
is_signed = true;
|
||||
} else if (to_conv.conv_name == 'o') {
|
||||
base = 8;
|
||||
} else if (internal::tolower(to_conv.conv_name) == 'x' ||
|
||||
to_conv.conv_name == 'p') {
|
||||
base = 16;
|
||||
} else if (to_conv.conv_name == 'd') {
|
||||
base = 10;
|
||||
is_signed = true;
|
||||
} else { // conv_name must be 'u'
|
||||
base = 10;
|
||||
}
|
||||
|
||||
char cur_char = reader->getc();
|
||||
|
||||
char result_sign = '+';
|
||||
if (cur_char == '+' || cur_char == '-') {
|
||||
result_sign = cur_char;
|
||||
if (max_width > 1) {
|
||||
--max_width;
|
||||
cur_char = reader->getc();
|
||||
} else {
|
||||
// If the max width has been hit already, then the return value must be 0
|
||||
// since no actual digits of the number have been parsed yet.
|
||||
write_int_with_length(0, to_conv);
|
||||
return MATCHING_FAILURE;
|
||||
}
|
||||
}
|
||||
const bool is_negative = result_sign == '-';
|
||||
|
||||
// Base of 0 means automatically determine the base. Base of 16 may have a
|
||||
// prefix of "0x"
|
||||
if (base == 0 || base == 16) {
|
||||
// If the first character is 0, then it could be octal or hex.
|
||||
if (cur_char == '0') {
|
||||
is_number = true;
|
||||
|
||||
// Read the next character to check.
|
||||
if (max_width > 1) {
|
||||
--max_width;
|
||||
cur_char = reader->getc();
|
||||
} else {
|
||||
write_int_with_length(0, to_conv);
|
||||
return READ_OK;
|
||||
}
|
||||
|
||||
if (internal::tolower(cur_char) == 'x') {
|
||||
// This is a valid hex prefix.
|
||||
|
||||
is_number = false;
|
||||
// A valid hex prefix is not necessarily a valid number. For the
|
||||
// conversion to be valid it needs to use all of the characters it
|
||||
// consumes. From the standard:
|
||||
// 7.23.6.2 paragraph 9: "An input item is defined as the longest
|
||||
// sequence of input characters which does not exceed any specified
|
||||
// field width and which is, or is a prefix of, a matching input
|
||||
// sequence."
|
||||
// 7.23.6.2 paragraph 10: "If the input item is not a matching sequence,
|
||||
// the execution of the directive fails: this condition is a matching
|
||||
// failure"
|
||||
base = 16;
|
||||
if (max_width > 1) {
|
||||
--max_width;
|
||||
cur_char = reader->getc();
|
||||
} else {
|
||||
return MATCHING_FAILURE;
|
||||
}
|
||||
|
||||
} else {
|
||||
if (base == 0) {
|
||||
base = 8;
|
||||
}
|
||||
}
|
||||
} else if (base == 0) {
|
||||
if (internal::isdigit(cur_char)) {
|
||||
// If the first character is a different number, then it's 10.
|
||||
base = 10;
|
||||
} else {
|
||||
// If the first character isn't a valid digit, then there are no valid
|
||||
// digits at all. The number is 0.
|
||||
reader->ungetc(cur_char);
|
||||
write_int_with_length(0, to_conv);
|
||||
return MATCHING_FAILURE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
constexpr uintmax_t UNSIGNED_MAX = cpp::numeric_limits<uintmax_t>::max();
|
||||
constexpr uintmax_t SIGNED_MAX =
|
||||
static_cast<uintmax_t>(cpp::numeric_limits<intmax_t>::max());
|
||||
constexpr uintmax_t NEGATIVE_SIGNED_MAX =
|
||||
static_cast<uintmax_t>(cpp::numeric_limits<intmax_t>::max()) + 1;
|
||||
|
||||
const uintmax_t MAX =
|
||||
(is_signed ? (is_negative ? NEGATIVE_SIGNED_MAX : SIGNED_MAX)
|
||||
: UNSIGNED_MAX);
|
||||
|
||||
const uintmax_t max_div_by_base = MAX / base;
|
||||
|
||||
if (internal::isalnum(cur_char) &&
|
||||
internal::b36_char_to_int(cur_char) < base) {
|
||||
is_number = true;
|
||||
}
|
||||
|
||||
bool has_overflow = false;
|
||||
size_t i = 0;
|
||||
for (; i < max_width && internal::isalnum(cur_char) &&
|
||||
internal::b36_char_to_int(cur_char) < base;
|
||||
++i, cur_char = reader->getc()) {
|
||||
|
||||
uintmax_t cur_digit = internal::b36_char_to_int(cur_char);
|
||||
|
||||
if (result == MAX) {
|
||||
has_overflow = true;
|
||||
continue;
|
||||
} else if (result > max_div_by_base) {
|
||||
result = MAX;
|
||||
has_overflow = true;
|
||||
} else {
|
||||
result = result * base;
|
||||
}
|
||||
|
||||
if (result > MAX - cur_digit) {
|
||||
result = MAX;
|
||||
has_overflow = true;
|
||||
} else {
|
||||
result = result + cur_digit;
|
||||
}
|
||||
}
|
||||
|
||||
// We always read one more character than will be used, so we have to put the
|
||||
// last one back.
|
||||
reader->ungetc(cur_char);
|
||||
|
||||
if (!is_number)
|
||||
return MATCHING_FAILURE;
|
||||
|
||||
if (has_overflow) {
|
||||
write_int_with_length(MAX, to_conv);
|
||||
} else {
|
||||
if (is_negative)
|
||||
result = -result;
|
||||
|
||||
write_int_with_length(result, to_conv);
|
||||
}
|
||||
|
||||
return READ_OK;
|
||||
}
|
||||
|
||||
} // namespace scanf_core
|
||||
} // namespace LIBC_NAMESPACE_DECL
|
||||
|
||||
@@ -1,46 +0,0 @@
|
||||
//===-- Int type specifier converters for scanf -----------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "src/stdio/scanf_core/ptr_converter.h"
|
||||
|
||||
#include "src/__support/ctype_utils.h"
|
||||
#include "src/__support/macros/config.h"
|
||||
#include "src/stdio/scanf_core/converter_utils.h"
|
||||
#include "src/stdio/scanf_core/core_structs.h"
|
||||
#include "src/stdio/scanf_core/int_converter.h"
|
||||
#include "src/stdio/scanf_core/reader.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
namespace LIBC_NAMESPACE_DECL {
|
||||
namespace scanf_core {
|
||||
int convert_pointer(Reader *reader, const FormatSection &to_conv) {
|
||||
static const char nullptr_string[] = "(nullptr)";
|
||||
|
||||
// Check if it's exactly the nullptr string, if so then it's a nullptr.
|
||||
char cur_char = reader->getc();
|
||||
size_t i = 0;
|
||||
for (; i < (sizeof(nullptr_string) - 1) &&
|
||||
internal::tolower(cur_char) == nullptr_string[i];
|
||||
++i) {
|
||||
cur_char = reader->getc();
|
||||
}
|
||||
if (i == (sizeof(nullptr_string) - 1)) {
|
||||
*reinterpret_cast<void **>(to_conv.output_ptr) = nullptr;
|
||||
return READ_OK;
|
||||
} else if (i > 0) {
|
||||
return MATCHING_FAILURE;
|
||||
}
|
||||
|
||||
reader->ungetc(cur_char);
|
||||
|
||||
// Else treat it as a hex int
|
||||
return convert_int(reader, to_conv);
|
||||
}
|
||||
} // namespace scanf_core
|
||||
} // namespace LIBC_NAMESPACE_DECL
|
||||
@@ -9,8 +9,10 @@
|
||||
#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_PTR_CONVERTER_H
|
||||
#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_PTR_CONVERTER_H
|
||||
|
||||
#include "src/__support/ctype_utils.h"
|
||||
#include "src/__support/macros/config.h"
|
||||
#include "src/stdio/scanf_core/core_structs.h"
|
||||
#include "src/stdio/scanf_core/int_converter.h"
|
||||
#include "src/stdio/scanf_core/reader.h"
|
||||
|
||||
#include <stddef.h>
|
||||
@@ -18,7 +20,30 @@
|
||||
namespace LIBC_NAMESPACE_DECL {
|
||||
namespace scanf_core {
|
||||
|
||||
int convert_pointer(Reader *reader, const FormatSection &to_conv);
|
||||
template <typename T>
|
||||
int convert_pointer(Reader<T> *reader, const FormatSection &to_conv) {
|
||||
static const char nullptr_string[] = "(nullptr)";
|
||||
|
||||
// Check if it's exactly the nullptr string, if so then it's a nullptr.
|
||||
char cur_char = reader->getc();
|
||||
size_t i = 0;
|
||||
for (; i < (sizeof(nullptr_string) - 1) &&
|
||||
internal::tolower(cur_char) == nullptr_string[i];
|
||||
++i) {
|
||||
cur_char = reader->getc();
|
||||
}
|
||||
if (i == (sizeof(nullptr_string) - 1)) {
|
||||
*reinterpret_cast<void **>(to_conv.output_ptr) = nullptr;
|
||||
return READ_OK;
|
||||
} else if (i > 0) {
|
||||
return MATCHING_FAILURE;
|
||||
}
|
||||
|
||||
reader->ungetc(cur_char);
|
||||
|
||||
// Else treat it as a hex int
|
||||
return convert_int(reader, to_conv);
|
||||
}
|
||||
|
||||
} // namespace scanf_core
|
||||
} // namespace LIBC_NAMESPACE_DECL
|
||||
|
||||
@@ -9,17 +9,6 @@
|
||||
#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_READER_H
|
||||
#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_READER_H
|
||||
|
||||
#include "hdr/types/FILE.h"
|
||||
|
||||
#ifndef LIBC_COPT_STDIO_USE_SYSTEM_FILE
|
||||
#include "src/__support/File/file.h"
|
||||
#endif
|
||||
|
||||
#if defined(LIBC_TARGET_ARCH_IS_GPU)
|
||||
#include "src/stdio/getc.h"
|
||||
#include "src/stdio/ungetc.h"
|
||||
#endif
|
||||
|
||||
#include "src/__support/macros/attributes.h" // For LIBC_INLINE
|
||||
#include "src/__support/macros/config.h"
|
||||
|
||||
@@ -27,103 +16,24 @@
|
||||
|
||||
namespace LIBC_NAMESPACE_DECL {
|
||||
namespace scanf_core {
|
||||
// We use the name "reader_internal" over "internal" because
|
||||
// "internal" causes name lookups in files that include the current header to be
|
||||
// ambigious i.e. `internal::foo` in those files, will try to lookup in
|
||||
// `LIBC_NAMESPACE::scanf_core::internal` over `LIBC_NAMESPACE::internal` for
|
||||
// e.g., `internal::ArgList` in `libc/src/stdio/scanf_core/scanf_main.h`
|
||||
namespace reader_internal {
|
||||
|
||||
#if defined(LIBC_TARGET_ARCH_IS_GPU)
|
||||
// The GPU build provides FILE access through the host operating system's
|
||||
// library. So here we simply use the public entrypoints like in the SYSTEM_FILE
|
||||
// interface. Entrypoints should normally not call others, this is an exception.
|
||||
// FIXME: We do not acquire any locks here, so this is not thread safe.
|
||||
LIBC_INLINE int getc(void *f) {
|
||||
return LIBC_NAMESPACE::getc(reinterpret_cast<::FILE *>(f));
|
||||
}
|
||||
|
||||
LIBC_INLINE void ungetc(int c, void *f) {
|
||||
LIBC_NAMESPACE::ungetc(c, reinterpret_cast<::FILE *>(f));
|
||||
}
|
||||
|
||||
#elif !defined(LIBC_COPT_STDIO_USE_SYSTEM_FILE)
|
||||
|
||||
LIBC_INLINE int getc(void *f) {
|
||||
unsigned char c;
|
||||
auto result =
|
||||
reinterpret_cast<LIBC_NAMESPACE::File *>(f)->read_unlocked(&c, 1);
|
||||
size_t r = result.value;
|
||||
if (result.has_error() || r != 1)
|
||||
return '\0';
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
LIBC_INLINE void ungetc(int c, void *f) {
|
||||
reinterpret_cast<LIBC_NAMESPACE::File *>(f)->ungetc_unlocked(c);
|
||||
}
|
||||
|
||||
#else // defined(LIBC_COPT_STDIO_USE_SYSTEM_FILE)
|
||||
|
||||
// Since ungetc_unlocked isn't always available, we don't acquire the lock for
|
||||
// system files.
|
||||
LIBC_INLINE int getc(void *f) { return ::getc(reinterpret_cast<::FILE *>(f)); }
|
||||
|
||||
LIBC_INLINE void ungetc(int c, void *f) {
|
||||
::ungetc(c, reinterpret_cast<::FILE *>(f));
|
||||
}
|
||||
#endif // LIBC_COPT_STDIO_USE_SYSTEM_FILE
|
||||
|
||||
} // namespace reader_internal
|
||||
|
||||
// This is intended to be either a raw string or a buffer syncronized with the
|
||||
// file's internal buffer.
|
||||
struct ReadBuffer {
|
||||
const char *buffer;
|
||||
size_t buff_len;
|
||||
size_t buff_cur = 0;
|
||||
};
|
||||
|
||||
class Reader {
|
||||
ReadBuffer *rb;
|
||||
void *input_stream = nullptr;
|
||||
template <typename Derived> class Reader {
|
||||
size_t cur_chars_read = 0;
|
||||
|
||||
public:
|
||||
// TODO: Set buff_len with a proper constant
|
||||
LIBC_INLINE Reader(ReadBuffer *string_buffer) : rb(string_buffer) {}
|
||||
|
||||
LIBC_INLINE Reader(void *stream, ReadBuffer *stream_buffer = nullptr)
|
||||
: rb(stream_buffer), input_stream(stream) {}
|
||||
|
||||
// This returns the next character from the input and advances it by one
|
||||
// character. When it hits the end of the string or file it returns '\0' to
|
||||
// signal to stop parsing.
|
||||
LIBC_INLINE char getc() {
|
||||
++cur_chars_read;
|
||||
if (rb != nullptr) {
|
||||
char output = rb->buffer[rb->buff_cur];
|
||||
++(rb->buff_cur);
|
||||
return output;
|
||||
}
|
||||
// This should reset the buffer if applicable.
|
||||
return static_cast<char>(reader_internal::getc(input_stream));
|
||||
return static_cast<Derived *>(this)->getc();
|
||||
}
|
||||
|
||||
// This moves the input back by one character, placing c into the buffer if
|
||||
// this is a file reader, else c is ignored.
|
||||
LIBC_INLINE void ungetc(char c) {
|
||||
LIBC_INLINE void ungetc(int c) {
|
||||
--cur_chars_read;
|
||||
if (rb != nullptr && rb->buff_cur > 0) {
|
||||
// While technically c should be written back to the buffer, in scanf we
|
||||
// always write the character that was already there. Additionally, the
|
||||
// buffer is most likely to contain a string that isn't part of a file,
|
||||
// which may not be writable.
|
||||
--(rb->buff_cur);
|
||||
return;
|
||||
}
|
||||
reader_internal::ungetc(static_cast<int>(c), input_stream);
|
||||
static_cast<Derived *>(this)->ungetc(c);
|
||||
}
|
||||
|
||||
LIBC_INLINE size_t chars_read() { return cur_chars_read; }
|
||||
|
||||
@@ -1,46 +0,0 @@
|
||||
//===-- Starting point for scanf --------------------------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "src/stdio/scanf_core/scanf_main.h"
|
||||
|
||||
#include "src/__support/arg_list.h"
|
||||
#include "src/__support/macros/config.h"
|
||||
#include "src/stdio/scanf_core/converter.h"
|
||||
#include "src/stdio/scanf_core/core_structs.h"
|
||||
#include "src/stdio/scanf_core/parser.h"
|
||||
#include "src/stdio/scanf_core/reader.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
namespace LIBC_NAMESPACE_DECL {
|
||||
namespace scanf_core {
|
||||
|
||||
int scanf_main(Reader *reader, const char *__restrict str,
|
||||
internal::ArgList &args) {
|
||||
Parser<internal::ArgList> parser(str, args);
|
||||
int ret_val = READ_OK;
|
||||
int conversions = 0;
|
||||
for (FormatSection cur_section = parser.get_next_section();
|
||||
!cur_section.raw_string.empty() && ret_val == READ_OK;
|
||||
cur_section = parser.get_next_section()) {
|
||||
if (cur_section.has_conv) {
|
||||
ret_val = convert(reader, cur_section);
|
||||
// The %n (current position) conversion doesn't increment the number of
|
||||
// assignments.
|
||||
if (cur_section.conv_name != 'n')
|
||||
conversions += ret_val == READ_OK ? 1 : 0;
|
||||
} else {
|
||||
ret_val = raw_match(reader, cur_section.raw_string);
|
||||
}
|
||||
}
|
||||
|
||||
return conversions;
|
||||
}
|
||||
|
||||
} // namespace scanf_core
|
||||
} // namespace LIBC_NAMESPACE_DECL
|
||||
@@ -11,6 +11,9 @@
|
||||
|
||||
#include "src/__support/arg_list.h"
|
||||
#include "src/__support/macros/config.h"
|
||||
#include "src/stdio/scanf_core/converter.h"
|
||||
#include "src/stdio/scanf_core/core_structs.h"
|
||||
#include "src/stdio/scanf_core/parser.h"
|
||||
#include "src/stdio/scanf_core/reader.h"
|
||||
|
||||
#include <stddef.h>
|
||||
@@ -18,8 +21,28 @@
|
||||
namespace LIBC_NAMESPACE_DECL {
|
||||
namespace scanf_core {
|
||||
|
||||
int scanf_main(Reader *reader, const char *__restrict str,
|
||||
internal::ArgList &args);
|
||||
template <typename T>
|
||||
int scanf_main(Reader<T> *reader, const char *__restrict str,
|
||||
internal::ArgList &args) {
|
||||
Parser<internal::ArgList> parser(str, args);
|
||||
int ret_val = READ_OK;
|
||||
int conversions = 0;
|
||||
for (FormatSection cur_section = parser.get_next_section();
|
||||
!cur_section.raw_string.empty() && ret_val == READ_OK;
|
||||
cur_section = parser.get_next_section()) {
|
||||
if (cur_section.has_conv) {
|
||||
ret_val = convert(reader, cur_section);
|
||||
// The %n (current position) conversion doesn't increment the number of
|
||||
// assignments.
|
||||
if (cur_section.conv_name != 'n')
|
||||
conversions += ret_val == READ_OK ? 1 : 0;
|
||||
} else {
|
||||
ret_val = raw_match(reader, cur_section.raw_string);
|
||||
}
|
||||
}
|
||||
|
||||
return conversions;
|
||||
}
|
||||
|
||||
} // namespace scanf_core
|
||||
} // namespace LIBC_NAMESPACE_DECL
|
||||
|
||||
@@ -1,77 +0,0 @@
|
||||
//===-- String type specifier converters for scanf --------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "src/stdio/scanf_core/string_converter.h"
|
||||
|
||||
#include "src/__support/CPP/limits.h"
|
||||
#include "src/__support/ctype_utils.h"
|
||||
#include "src/__support/macros/config.h"
|
||||
#include "src/stdio/scanf_core/core_structs.h"
|
||||
#include "src/stdio/scanf_core/reader.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
namespace LIBC_NAMESPACE_DECL {
|
||||
namespace scanf_core {
|
||||
|
||||
int convert_string(Reader *reader, const FormatSection &to_conv) {
|
||||
// %s "Matches a sequence of non-white-space characters"
|
||||
|
||||
// %c "Matches a sequence of characters of exactly the number specified by the
|
||||
// field width (1 if no field width is present in the directive)"
|
||||
|
||||
// %[ "Matches a nonempty sequence of characters from a set of expected
|
||||
// characters (the scanset)."
|
||||
size_t max_width = 0;
|
||||
if (to_conv.max_width > 0) {
|
||||
max_width = to_conv.max_width;
|
||||
} else {
|
||||
if (to_conv.conv_name == 'c') {
|
||||
max_width = 1;
|
||||
} else {
|
||||
max_width = cpp::numeric_limits<size_t>::max();
|
||||
}
|
||||
}
|
||||
|
||||
char *output = reinterpret_cast<char *>(to_conv.output_ptr);
|
||||
|
||||
char cur_char = reader->getc();
|
||||
size_t i = 0;
|
||||
for (; i < max_width && cur_char != '\0'; ++i) {
|
||||
// If this is %s and we've hit a space, or if this is %[] and we've found
|
||||
// something not in the scanset.
|
||||
if ((to_conv.conv_name == 's' && internal::isspace(cur_char)) ||
|
||||
(to_conv.conv_name == '[' && !to_conv.scan_set.test(cur_char))) {
|
||||
break;
|
||||
}
|
||||
// if the NO_WRITE flag is not set, write to the output.
|
||||
if ((to_conv.flags & NO_WRITE) == 0)
|
||||
output[i] = cur_char;
|
||||
cur_char = reader->getc();
|
||||
}
|
||||
|
||||
// We always read one more character than will be used, so we have to put the
|
||||
// last one back.
|
||||
reader->ungetc(cur_char);
|
||||
|
||||
// If this is %s or %[]
|
||||
if (to_conv.conv_name != 'c' && (to_conv.flags & NO_WRITE) == 0) {
|
||||
// Always null terminate the string. This may cause a write to the
|
||||
// (max_width + 1) byte, which is correct. The max width describes the max
|
||||
// number of characters read from the input string, and doesn't necessarily
|
||||
// correspond to the output.
|
||||
output[i] = '\0';
|
||||
}
|
||||
|
||||
if (i == 0)
|
||||
return MATCHING_FAILURE;
|
||||
return READ_OK;
|
||||
}
|
||||
|
||||
} // namespace scanf_core
|
||||
} // namespace LIBC_NAMESPACE_DECL
|
||||
@@ -9,6 +9,8 @@
|
||||
#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_STRING_CONVERTER_H
|
||||
#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_STRING_CONVERTER_H
|
||||
|
||||
#include "src/__support/CPP/limits.h"
|
||||
#include "src/__support/ctype_utils.h"
|
||||
#include "src/__support/macros/config.h"
|
||||
#include "src/stdio/scanf_core/core_structs.h"
|
||||
#include "src/stdio/scanf_core/reader.h"
|
||||
@@ -18,7 +20,60 @@
|
||||
namespace LIBC_NAMESPACE_DECL {
|
||||
namespace scanf_core {
|
||||
|
||||
int convert_string(Reader *reader, const FormatSection &to_conv);
|
||||
template <typename T>
|
||||
int convert_string(Reader<T> *reader, const FormatSection &to_conv) {
|
||||
// %s "Matches a sequence of non-white-space characters"
|
||||
|
||||
// %c "Matches a sequence of characters of exactly the number specified by the
|
||||
// field width (1 if no field width is present in the directive)"
|
||||
|
||||
// %[ "Matches a nonempty sequence of characters from a set of expected
|
||||
// characters (the scanset)."
|
||||
size_t max_width = 0;
|
||||
if (to_conv.max_width > 0) {
|
||||
max_width = to_conv.max_width;
|
||||
} else {
|
||||
if (to_conv.conv_name == 'c') {
|
||||
max_width = 1;
|
||||
} else {
|
||||
max_width = cpp::numeric_limits<size_t>::max();
|
||||
}
|
||||
}
|
||||
|
||||
char *output = reinterpret_cast<char *>(to_conv.output_ptr);
|
||||
|
||||
char cur_char = reader->getc();
|
||||
size_t i = 0;
|
||||
for (; i < max_width && cur_char != '\0'; ++i) {
|
||||
// If this is %s and we've hit a space, or if this is %[] and we've found
|
||||
// something not in the scanset.
|
||||
if ((to_conv.conv_name == 's' && internal::isspace(cur_char)) ||
|
||||
(to_conv.conv_name == '[' && !to_conv.scan_set.test(cur_char))) {
|
||||
break;
|
||||
}
|
||||
// if the NO_WRITE flag is not set, write to the output.
|
||||
if ((to_conv.flags & NO_WRITE) == 0)
|
||||
output[i] = cur_char;
|
||||
cur_char = reader->getc();
|
||||
}
|
||||
|
||||
// We always read one more character than will be used, so we have to put the
|
||||
// last one back.
|
||||
reader->ungetc(cur_char);
|
||||
|
||||
// If this is %s or %[]
|
||||
if (to_conv.conv_name != 'c' && (to_conv.flags & NO_WRITE) == 0) {
|
||||
// Always null terminate the string. This may cause a write to the
|
||||
// (max_width + 1) byte, which is correct. The max width describes the max
|
||||
// number of characters read from the input string, and doesn't necessarily
|
||||
// correspond to the output.
|
||||
output[i] = '\0';
|
||||
}
|
||||
|
||||
if (i == 0)
|
||||
return MATCHING_FAILURE;
|
||||
return READ_OK;
|
||||
}
|
||||
|
||||
} // namespace scanf_core
|
||||
} // namespace LIBC_NAMESPACE_DECL
|
||||
|
||||
49
libc/src/stdio/scanf_core/string_reader.h
Normal file
49
libc/src/stdio/scanf_core/string_reader.h
Normal file
@@ -0,0 +1,49 @@
|
||||
//===-- Reader definition for scanf -----------------------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_STRING_READER_H
|
||||
#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_STRING_READER_H
|
||||
|
||||
#include "src/__support/macros/attributes.h" // For LIBC_INLINE
|
||||
#include "src/__support/macros/config.h"
|
||||
#include "src/stdio/scanf_core/reader.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
namespace LIBC_NAMESPACE_DECL {
|
||||
namespace scanf_core {
|
||||
|
||||
class StringReader : public Reader<StringReader> {
|
||||
const char *buffer;
|
||||
[[maybe_unused]] size_t buff_len;
|
||||
size_t buff_cur = 0;
|
||||
|
||||
public:
|
||||
LIBC_INLINE StringReader(const char *buffer, size_t buff_len)
|
||||
: buffer(buffer), buff_len(buff_len) {}
|
||||
|
||||
LIBC_INLINE char getc() {
|
||||
char output = buffer[buff_cur];
|
||||
++buff_cur;
|
||||
return output;
|
||||
}
|
||||
LIBC_INLINE void ungetc(int) {
|
||||
if (buff_cur > 0) {
|
||||
// While technically c should be written back to the buffer, in scanf we
|
||||
// always write the character that was already there. Additionally, the
|
||||
// buffer is most likely to contain a string that isn't part of a file,
|
||||
// which may not be writable.
|
||||
--buff_cur;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace scanf_core
|
||||
} // namespace LIBC_NAMESPACE_DECL
|
||||
|
||||
#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_STRING_READER_H
|
||||
@@ -38,6 +38,10 @@ LIBC_INLINE void funlockfile(::FILE *) { return; }
|
||||
|
||||
LIBC_INLINE int ferror_unlocked(::FILE *f) { return LIBC_NAMESPACE::ferror(f); }
|
||||
|
||||
LIBC_INLINE int getc(::FILE *f) { return LIBC_NAMESPACE::getc(f); }
|
||||
|
||||
LIBC_INLINE void ungetc(int c, ::FILE *f) { LIBC_NAMESPACE::ungetc(c, f); }
|
||||
|
||||
#elif !defined(LIBC_COPT_STDIO_USE_SYSTEM_FILE)
|
||||
|
||||
LIBC_INLINE void flockfile(FILE *f) {
|
||||
@@ -52,6 +56,21 @@ LIBC_INLINE int ferror_unlocked(FILE *f) {
|
||||
return reinterpret_cast<LIBC_NAMESPACE::File *>(f)->error_unlocked();
|
||||
}
|
||||
|
||||
LIBC_INLINE int getc(FILE *f) {
|
||||
unsigned char c;
|
||||
auto result =
|
||||
reinterpret_cast<LIBC_NAMESPACE::File *>(f)->read_unlocked(&c, 1);
|
||||
size_t r = result.value;
|
||||
if (result.has_error() || r != 1)
|
||||
return '\0';
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
LIBC_INLINE void ungetc(int c, FILE *f) {
|
||||
reinterpret_cast<LIBC_NAMESPACE::File *>(f)->ungetc_unlocked(c);
|
||||
}
|
||||
|
||||
#else // defined(LIBC_COPT_STDIO_USE_SYSTEM_FILE)
|
||||
|
||||
// Since ungetc_unlocked isn't always available, we don't acquire the lock for
|
||||
@@ -62,17 +81,35 @@ LIBC_INLINE void funlockfile(::FILE *) { return; }
|
||||
|
||||
LIBC_INLINE int ferror_unlocked(::FILE *f) { return ::ferror(f); }
|
||||
|
||||
LIBC_INLINE int getc(::FILE *f) { return ::getc(f); }
|
||||
|
||||
LIBC_INLINE void ungetc(int c, ::FILE *f) { ::ungetc(c, f); }
|
||||
|
||||
#endif // LIBC_COPT_STDIO_USE_SYSTEM_FILE
|
||||
|
||||
} // namespace internal
|
||||
|
||||
namespace scanf_core {
|
||||
|
||||
class StreamReader : public Reader<StreamReader> {
|
||||
::FILE *stream;
|
||||
|
||||
public:
|
||||
LIBC_INLINE StreamReader(::FILE *stream) : stream(stream) {}
|
||||
|
||||
LIBC_INLINE char getc() {
|
||||
return static_cast<char>(internal::getc(static_cast<FILE *>(stream)));
|
||||
}
|
||||
LIBC_INLINE void ungetc(int c) {
|
||||
internal::ungetc(c, static_cast<FILE *>(stream));
|
||||
}
|
||||
};
|
||||
|
||||
LIBC_INLINE int vfscanf_internal(::FILE *__restrict stream,
|
||||
const char *__restrict format,
|
||||
internal::ArgList &args) {
|
||||
internal::flockfile(stream);
|
||||
scanf_core::Reader reader(stream);
|
||||
scanf_core::StreamReader reader(stream);
|
||||
int retval = scanf_core::scanf_main(&reader, format, args);
|
||||
if (retval == 0 && internal::ferror_unlocked(stream))
|
||||
retval = EOF;
|
||||
|
||||
@@ -11,8 +11,8 @@
|
||||
#include "src/__support/CPP/limits.h"
|
||||
#include "src/__support/arg_list.h"
|
||||
#include "src/__support/macros/config.h"
|
||||
#include "src/stdio/scanf_core/reader.h"
|
||||
#include "src/stdio/scanf_core/scanf_main.h"
|
||||
#include "src/stdio/scanf_core/string_reader.h"
|
||||
|
||||
#include "hdr/stdio_macros.h"
|
||||
#include "hdr/types/FILE.h"
|
||||
@@ -29,8 +29,7 @@ LLVM_LIBC_FUNCTION(int, sscanf,
|
||||
// and pointer semantics, as well as handling
|
||||
// destruction automatically.
|
||||
va_end(vlist);
|
||||
scanf_core::ReadBuffer rb{buffer, cpp::numeric_limits<size_t>::max()};
|
||||
scanf_core::Reader reader(&rb);
|
||||
scanf_core::StringReader reader(buffer, cpp::numeric_limits<size_t>::max());
|
||||
int ret_val = scanf_core::scanf_main(&reader, format, args);
|
||||
// This is done to avoid including stdio.h in the internals. On most systems
|
||||
// EOF is -1, so this will be transformed into just "return ret_val".
|
||||
|
||||
@@ -11,8 +11,8 @@
|
||||
#include "hdr/stdio_macros.h"
|
||||
#include "src/__support/CPP/limits.h"
|
||||
#include "src/__support/arg_list.h"
|
||||
#include "src/stdio/scanf_core/reader.h"
|
||||
#include "src/stdio/scanf_core/scanf_main.h"
|
||||
#include "src/stdio/scanf_core/string_reader.h"
|
||||
|
||||
#include <stdarg.h>
|
||||
|
||||
@@ -21,9 +21,7 @@ namespace LIBC_NAMESPACE_DECL {
|
||||
LLVM_LIBC_FUNCTION(int, vsscanf,
|
||||
(const char *buffer, const char *format, va_list vlist)) {
|
||||
internal::ArgList args(vlist);
|
||||
scanf_core::ReadBuffer rb{const_cast<char *>(buffer),
|
||||
cpp::numeric_limits<size_t>::max()};
|
||||
scanf_core::Reader reader(&rb);
|
||||
scanf_core::StringReader reader(buffer, cpp::numeric_limits<size_t>::max());
|
||||
int ret_val = scanf_core::scanf_main(&reader, format, args);
|
||||
// This is done to avoid including stdio.h in the internals. On most systems
|
||||
// EOF is -1, so this will be transformed into just "return ret_val".
|
||||
|
||||
@@ -32,7 +32,7 @@ add_libc_unittest(
|
||||
SRCS
|
||||
reader_test.cpp
|
||||
DEPENDS
|
||||
libc.src.stdio.scanf_core.reader
|
||||
libc.src.stdio.scanf_core.string_reader
|
||||
libc.src.__support.CPP.string_view
|
||||
COMPILE_OPTIONS
|
||||
${use_system_file}
|
||||
@@ -45,8 +45,8 @@ add_libc_unittest(
|
||||
SRCS
|
||||
converter_test.cpp
|
||||
DEPENDS
|
||||
libc.src.stdio.scanf_core.reader
|
||||
libc.src.stdio.scanf_core.converter
|
||||
libc.src.stdio.scanf_core.string_reader
|
||||
libc.src.__support.CPP.string_view
|
||||
COMPILE_OPTIONS
|
||||
${use_system_file}
|
||||
|
||||
@@ -9,14 +9,13 @@
|
||||
#include "src/__support/CPP/string_view.h"
|
||||
#include "src/stdio/scanf_core/converter.h"
|
||||
#include "src/stdio/scanf_core/core_structs.h"
|
||||
#include "src/stdio/scanf_core/reader.h"
|
||||
#include "src/stdio/scanf_core/string_reader.h"
|
||||
|
||||
#include "test/UnitTest/Test.h"
|
||||
|
||||
TEST(LlvmLibcScanfConverterTest, RawMatchBasic) {
|
||||
const char *str = "abcdef";
|
||||
LIBC_NAMESPACE::scanf_core::ReadBuffer str_reader{str, sizeof(str)};
|
||||
LIBC_NAMESPACE::scanf_core::Reader reader(&str_reader);
|
||||
LIBC_NAMESPACE::scanf_core::StringReader reader(str, sizeof(str));
|
||||
|
||||
// Reading "abc" should succeed.
|
||||
ASSERT_EQ(LIBC_NAMESPACE::scanf_core::raw_match(&reader, "abc"),
|
||||
@@ -51,8 +50,7 @@ TEST(LlvmLibcScanfConverterTest, RawMatchBasic) {
|
||||
|
||||
TEST(LlvmLibcScanfConverterTest, RawMatchSpaces) {
|
||||
const char *str = " a \t\n b cd";
|
||||
LIBC_NAMESPACE::scanf_core::ReadBuffer str_reader{str, sizeof(str)};
|
||||
LIBC_NAMESPACE::scanf_core::Reader reader(&str_reader);
|
||||
LIBC_NAMESPACE::scanf_core::StringReader reader(str, sizeof(str));
|
||||
|
||||
// Reading "a" should fail and not advance.
|
||||
// Since there's nothing in the format string (the second argument to
|
||||
@@ -98,8 +96,7 @@ TEST(LlvmLibcScanfConverterTest, RawMatchSpaces) {
|
||||
TEST(LlvmLibcScanfConverterTest, StringConvSimple) {
|
||||
const char *str = "abcDEF123 654LKJihg";
|
||||
char result[20];
|
||||
LIBC_NAMESPACE::scanf_core::ReadBuffer str_reader{str, sizeof(str)};
|
||||
LIBC_NAMESPACE::scanf_core::Reader reader(&str_reader);
|
||||
LIBC_NAMESPACE::scanf_core::StringReader reader(str, sizeof(str));
|
||||
|
||||
LIBC_NAMESPACE::scanf_core::FormatSection conv;
|
||||
conv.has_conv = true;
|
||||
@@ -120,8 +117,7 @@ TEST(LlvmLibcScanfConverterTest, StringConvSimple) {
|
||||
|
||||
TEST(LlvmLibcScanfConverterTest, StringConvNoWrite) {
|
||||
const char *str = "abcDEF123 654LKJihg";
|
||||
LIBC_NAMESPACE::scanf_core::ReadBuffer str_reader{str, sizeof(str)};
|
||||
LIBC_NAMESPACE::scanf_core::Reader reader(&str_reader);
|
||||
LIBC_NAMESPACE::scanf_core::StringReader reader(str, sizeof(str));
|
||||
|
||||
LIBC_NAMESPACE::scanf_core::FormatSection conv;
|
||||
conv.has_conv = true;
|
||||
@@ -141,8 +137,7 @@ TEST(LlvmLibcScanfConverterTest, StringConvNoWrite) {
|
||||
TEST(LlvmLibcScanfConverterTest, StringConvWidth) {
|
||||
const char *str = "abcDEF123 654LKJihg";
|
||||
char result[6];
|
||||
LIBC_NAMESPACE::scanf_core::ReadBuffer str_reader{str, sizeof(str)};
|
||||
LIBC_NAMESPACE::scanf_core::Reader reader(&str_reader);
|
||||
LIBC_NAMESPACE::scanf_core::StringReader reader(str, sizeof(str));
|
||||
|
||||
LIBC_NAMESPACE::scanf_core::FormatSection conv;
|
||||
conv.has_conv = true;
|
||||
@@ -175,8 +170,7 @@ TEST(LlvmLibcScanfConverterTest, StringConvWidth) {
|
||||
TEST(LlvmLibcScanfConverterTest, CharsConv) {
|
||||
const char *str = "abcDEF123 654LKJihg MNOpqr&*(";
|
||||
char result[20];
|
||||
LIBC_NAMESPACE::scanf_core::ReadBuffer str_reader{str, sizeof(str)};
|
||||
LIBC_NAMESPACE::scanf_core::Reader reader(&str_reader);
|
||||
LIBC_NAMESPACE::scanf_core::StringReader reader(str, sizeof(str));
|
||||
|
||||
LIBC_NAMESPACE::scanf_core::FormatSection conv;
|
||||
conv.has_conv = true;
|
||||
@@ -230,8 +224,7 @@ TEST(LlvmLibcScanfConverterTest, CharsConv) {
|
||||
TEST(LlvmLibcScanfConverterTest, ScansetConv) {
|
||||
const char *str = "abcDEF[123] 654LKJihg";
|
||||
char result[20];
|
||||
LIBC_NAMESPACE::scanf_core::ReadBuffer str_reader{str, sizeof(str)};
|
||||
LIBC_NAMESPACE::scanf_core::Reader reader(&str_reader);
|
||||
LIBC_NAMESPACE::scanf_core::StringReader reader(str, sizeof(str));
|
||||
|
||||
LIBC_NAMESPACE::scanf_core::FormatSection conv;
|
||||
conv.has_conv = true;
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "src/__support/CPP/string_view.h"
|
||||
#include "src/stdio/scanf_core/reader.h"
|
||||
#include "src/stdio/scanf_core/string_reader.h"
|
||||
|
||||
#include "test/UnitTest/Test.h"
|
||||
|
||||
@@ -15,14 +15,14 @@ TEST(LlvmLibcScanfStringReaderTest, Constructor) {
|
||||
char str[10];
|
||||
// buff_len justneeds to be a big number. The specific value isn't important
|
||||
// in the real world.
|
||||
LIBC_NAMESPACE::scanf_core::ReadBuffer rb{const_cast<char *>(str), 1000000};
|
||||
LIBC_NAMESPACE::scanf_core::Reader reader(&rb);
|
||||
LIBC_NAMESPACE::scanf_core::StringReader reader(const_cast<char *>(str),
|
||||
1000000);
|
||||
}
|
||||
|
||||
TEST(LlvmLibcScanfStringReaderTest, SimpleRead) {
|
||||
const char *str = "abc";
|
||||
LIBC_NAMESPACE::scanf_core::ReadBuffer rb{const_cast<char *>(str), 1000000};
|
||||
LIBC_NAMESPACE::scanf_core::Reader reader(&rb);
|
||||
LIBC_NAMESPACE::scanf_core::StringReader reader(const_cast<char *>(str),
|
||||
1000000);
|
||||
|
||||
for (size_t i = 0; i < sizeof("abc"); ++i) {
|
||||
ASSERT_EQ(str[i], reader.getc());
|
||||
@@ -31,8 +31,8 @@ TEST(LlvmLibcScanfStringReaderTest, SimpleRead) {
|
||||
|
||||
TEST(LlvmLibcScanfStringReaderTest, ReadAndReverse) {
|
||||
const char *str = "abcDEF123";
|
||||
LIBC_NAMESPACE::scanf_core::ReadBuffer rb{const_cast<char *>(str), 1000000};
|
||||
LIBC_NAMESPACE::scanf_core::Reader reader(&rb);
|
||||
LIBC_NAMESPACE::scanf_core::StringReader reader(const_cast<char *>(str),
|
||||
1000000);
|
||||
|
||||
for (size_t i = 0; i < 5; ++i) {
|
||||
ASSERT_EQ(str[i], reader.getc());
|
||||
|
||||
Reference in New Issue
Block a user