Files
clang-p2996/flang/runtime/connection.h
Peter Klausler bad5205595 [flang][runtime] Support internal I/O to CHARACTER(KIND/=1)
Allow internal I/O to support non-default kinds of CHARACTER.

The I/O runtime design anticipated this standard feature, but
this patch is somewhat larger than I thought it would be because
many code sites had to have assumptions about units (characters
vs. bytes) brought into harmony, and some encoding utilities
had to be pulled out of IoStatementState and templatized into
their own new header file so that they are available to formatted
output code without having to "thread" an IoStatementState reference
through many call chains.

Differential Revision: https://reviews.llvm.org/D131107
2022-08-09 08:46:21 -07:00

122 lines
4.6 KiB
C++

//===-- runtime/connection.h ------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Fortran I/O connection state (abstracted over internal & external units)
#ifndef FORTRAN_RUNTIME_IO_CONNECTION_H_
#define FORTRAN_RUNTIME_IO_CONNECTION_H_
#include "format.h"
#include <cinttypes>
#include <optional>
namespace Fortran::runtime::io {
class IoStatementState;
enum class Direction { Output, Input };
enum class Access { Sequential, Direct, Stream };
// These characteristics of a connection are immutable after being
// established in an OPEN statement.
struct ConnectionAttributes {
Access access{Access::Sequential}; // ACCESS='SEQUENTIAL', 'DIRECT', 'STREAM'
std::optional<bool> isUnformatted; // FORM='UNFORMATTED' if true
bool isUTF8{false}; // ENCODING='UTF-8'
unsigned char internalIoCharKind{0}; // 0->external, 1/2/4->internal
std::optional<std::int64_t> openRecl; // RECL= on OPEN
bool IsRecordFile() const {
// Formatted stream files are viewed as having records, at least on input
return access != Access::Stream || !isUnformatted.value_or(true);
}
template <typename CHAR = char> constexpr bool useUTF8() const {
// For wide CHARACTER kinds, always use UTF-8 for formatted I/O.
// For single-byte CHARACTER, encode characters >= 0x80 with
// UTF-8 iff the mode is set.
return internalIoCharKind == 0 && (sizeof(CHAR) > 1 || isUTF8);
}
};
struct ConnectionState : public ConnectionAttributes {
bool IsAtEOF() const; // true when read has hit EOF or endfile record
bool IsAfterEndfile() const; // true after ENDFILE until repositioned
// All positions and measurements are always in units of bytes,
// not characters. Multi-byte character encodings are possible in
// both internal I/O (when the character kind of the variable is 2 or 4)
// and external formatted I/O (when the encoding is UTF-8).
std::size_t RemainingSpaceInRecord() const;
bool NeedAdvance(std::size_t) const;
void HandleAbsolutePosition(std::int64_t);
void HandleRelativePosition(std::int64_t);
void BeginRecord() {
positionInRecord = 0;
furthestPositionInRecord = 0;
unterminatedRecord = false;
}
std::optional<std::int64_t> EffectiveRecordLength() const {
// When an input record is longer than an explicit RECL= from OPEN
// it is effectively truncated on input.
return openRecl && recordLength && *openRecl < *recordLength ? openRecl
: recordLength;
}
std::optional<std::int64_t> recordLength;
std::int64_t currentRecordNumber{1}; // 1 is first
// positionInRecord is the 0-based bytes offset in the current recurd
// to/from which the next data transfer will occur. It can be past
// furthestPositionInRecord if moved by an X or T or TR control edit
// descriptor.
std::int64_t positionInRecord{0};
// furthestPositionInRecord is the 0-based byte offset of the greatest
// position in the current record to/from which any data transfer has
// occurred, plus one. It can be viewed as a count of bytes processed.
std::int64_t furthestPositionInRecord{0}; // max(position+bytes)
// Set at end of non-advancing I/O data transfer
std::optional<std::int64_t> leftTabLimit; // offset in current record
// currentRecordNumber value captured after ENDFILE/REWIND/BACKSPACE statement
// or an end-of-file READ condition on a sequential access file
std::optional<std::int64_t> endfileRecordNumber;
// Mutable modes set at OPEN() that can be overridden in READ/WRITE & FORMAT
MutableModes modes; // BLANK=, DECIMAL=, SIGN=, ROUND=, PAD=, DELIM=, kP
// Set when processing repeated items during list-directed & NAMELIST input
// in order to keep a span of records in frame on a non-positionable file,
// so that backspacing to the beginning of the repeated item doesn't require
// repositioning the external storage medium when that's impossible.
bool pinnedFrame{false};
// Set when the last record of a file is not properly terminated
// so that a non-advancing READ will not signal EOR.
bool unterminatedRecord{false};
};
// Utility class for capturing and restoring a position in an input stream.
class SavedPosition {
public:
explicit SavedPosition(IoStatementState &);
~SavedPosition();
private:
IoStatementState &io_;
ConnectionState saved_;
};
} // namespace Fortran::runtime::io
#endif // FORTRAN_RUNTIME_IO_CONNECTION_H_