[libc] Character converter skeleton class (#143619)

Made CharacterConverter class skeleton
This commit is contained in:
Uzair Nawaz
2025-06-11 20:11:31 +00:00
committed by GitHub
parent c70658e32d
commit 52583b3ed7
8 changed files with 258 additions and 0 deletions

22
libc/hdr/types/char32_t.h Normal file
View File

@@ -0,0 +1,22 @@
//===-- Definition of char32_t.h ------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_HDR_TYPES_CHAR32_T_H
#define LLVM_LIBC_HDR_TYPES_CHAR32_T_H
#ifdef LIBC_FULL_BUILD
#include "include/llvm-libc-types/char32_t.h"
#else // overlay mode
#include "hdr/uchar_overlay.h"
#endif // LLVM_LIBC_FULL_BUILD
#endif // LLVM_LIBC_HDR_TYPES_CHAR32_T_H

22
libc/hdr/types/char8_t.h Normal file
View File

@@ -0,0 +1,22 @@
//===-- Definition of char8_t.h -------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_HDR_TYPES_CHAR8_T_H
#define LLVM_LIBC_HDR_TYPES_CHAR8_T_H
#ifdef LIBC_FULL_BUILD
#include "include/llvm-libc-types/char8_t.h"
#else // overlay mode
#include "hdr/uchar_overlay.h"
#endif // LLVM_LIBC_FULL_BUILD
#endif // LLVM_LIBC_HDR_TYPES_CHAR8_T_H

69
libc/hdr/uchar_overlay.h Normal file
View File

@@ -0,0 +1,69 @@
//===-- Including uchar.h in overlay mode ---------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_HDR_UCHAR_OVERLAY_H
#define LLVM_LIBC_HDR_UCHAR_OVERLAY_H
#ifdef LIBC_FULL_BUILD
#error "This header should only be included in overlay mode"
#endif
// Overlay mode
// glibc <uchar.h> header might provide extern inline definitions for few
// functions, causing external alias errors. They are guarded by
// `__USE_EXTERN_INLINES` macro. We temporarily disable `__USE_EXTERN_INLINES`
// macro by defining `__NO_INLINE__` before including <uchar.h>.
// And the same with `__USE_FORTIFY_LEVEL`, which will be temporarily disabled
// with `_FORTIFY_SOURCE`.
#ifdef _FORTIFY_SOURCE
#define LIBC_OLD_FORTIFY_SOURCE _FORTIFY_SOURCE
#undef _FORTIFY_SOURCE
#endif
#ifndef __NO_INLINE__
#define __NO_INLINE__ 1
#define LIBC_SET_NO_INLINE
#endif
#ifdef __USE_EXTERN_INLINES
#define LIBC_OLD_USE_EXTERN_INLINES
#undef __USE_EXTERN_INLINES
#endif
#ifdef __USE_FORTIFY_LEVEL
#define LIBC_OLD_USE_FORTIFY_LEVEL __USE_FORTIFY_LEVEL
#undef __USE_FORTIFY_LEVEL
#define __USE_FORTIFY_LEVEL 0
#endif
#include <uchar.h>
#ifdef LIBC_OLD_FORTIFY_SOURCE
#define _FORTIFY_SOURCE LIBC_OLD_FORTIFY_SOURCE
#undef LIBC_OLD_FORTIFY_SOURCE
#endif
#ifdef LIBC_SET_NO_INLINE
#undef __NO_INLINE__
#undef LIBC_SET_NO_INLINE
#endif
#ifdef LIBC_OLD_USE_FORTIFY_LEVEL
#undef __USE_FORTIFY_LEVEL
#define __USE_FORTIFY_LEVEL LIBC_OLD_USE_FORTIFY_LEVEL
#undef LIBC_OLD_USE_FORTIFY_LEVEL
#endif
#ifdef LIBC_OLD_USE_EXTERN_INLINES
#define __USE_EXTERN_INLINES
#undef LIBC_OLD_USE_EXTERN_INLINES
#endif
#endif // LLVM_LIBC_HDR_UCHAR_OVERLAY_H

View File

@@ -0,0 +1,26 @@
add_header_library(
mbstate
HDRS
mbstate.h
DEPENDS
libc.hdr.types.char32_t
)
add_object_library(
character_converter
HDRS
character_converter.h
SRCS
character_converter.cpp
DEPENDS
libc.hdr.types.char8_t
libc.hdr.types.char32_t
.mbstate
.utf_ret
)
add_header_library(
utf_ret
HDRS
utf_ret.h
)

View File

@@ -0,0 +1,32 @@
//===-- Implementation of a class for conversion --------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "hdr/types/char32_t.h"
#include "hdr/types/char8_t.h"
#include "src/__support/wchar/mbstate.h"
#include "src/__support/wchar/utf_ret.h"
#include "character_converter.h"
namespace LIBC_NAMESPACE_DECL {
namespace internal {
CharacterConverter::CharacterConverter(mbstate_t *mbstate) { state = mbstate; }
bool CharacterConverter::isComplete() {}
int CharacterConverter::push(char8_t utf8_byte) {}
int CharacterConverter::push(char32_t utf32) {}
utf_ret<char8_t> CharacterConverter::pop_utf8() {}
utf_ret<char32_t> CharacterConverter::pop_utf32() {}
} // namespace internal
} // namespace LIBC_NAMESPACE_DECL

View File

@@ -0,0 +1,39 @@
//===-- Definition of a class for mbstate_t and conversion -----*-- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_SRC___SUPPORT_CHARACTER_CONVERTER_H
#define LLVM_LIBC_SRC___SUPPORT_CHARACTER_CONVERTER_H
#include "hdr/types/char32_t.h"
#include "hdr/types/char8_t.h"
#include "src/__support/wchar/mbstate.h"
#include "src/__support/wchar/utf_ret.h"
namespace LIBC_NAMESPACE_DECL {
namespace internal {
class CharacterConverter {
private:
mbstate_t *state;
public:
CharacterConverter(mbstate_t *mbstate);
bool isComplete();
int push(char8_t utf8_byte);
int push(char32_t utf32);
utf_ret<char8_t> pop_utf8();
utf_ret<char32_t> pop_utf32();
};
} // namespace internal
} // namespace LIBC_NAMESPACE_DECL
#endif // LLVM_LIBC_SRC___SUPPORT_CHARACTER_CONVERTER_H

View File

@@ -0,0 +1,27 @@
//===-- Definition of mbstate-----------------------------------*-- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_SRC___SUPPORT_MBSTATE_H
#define LLVM_LIBC_SRC___SUPPORT_MBSTATE_H
#include "hdr/types/char32_t.h"
#include <stdint.h>
namespace LIBC_NAMESPACE_DECL {
namespace internal {
struct mbstate {
char32_t partial;
uint8_t bits_processed;
uint8_t total_bytes;
};
} // namespace internal
} // namespace LIBC_NAMESPACE_DECL
#endif // LLVM_LIBC_SRC___SUPPORT_MBSTATE_H

View File

@@ -0,0 +1,21 @@
//===-- Definition of utf_ret ----------------------------------*-- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_SRC___SUPPORT_UTF_RET_H
#define LLVM_LIBC_SRC___SUPPORT_UTF_RET_H
namespace LIBC_NAMESPACE_DECL {
template <typename T> struct utf_ret {
T out;
int error;
};
} // namespace LIBC_NAMESPACE_DECL
#endif // LLVM_LIBC_SRC___SUPPORT_UTF_RET_H