[libc] wcstok implementation (#145989)

Implemented wcstok and added tests
This commit is contained in:
Uzair Nawaz
2025-06-30 10:41:00 -07:00
committed by GitHub
parent 790bc5bc72
commit 7a33b709b1
7 changed files with 282 additions and 0 deletions

View File

@@ -387,6 +387,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.wchar.wmemchr
libc.src.wchar.wcpcpy
libc.src.wchar.wcpncpy
libc.src.wchar.wcstok
# sys/uio.h entrypoints
libc.src.sys.uio.writev

View File

@@ -196,6 +196,14 @@ functions:
arguments:
- type: wchar_t *__restrict
- type: const wchar_t *__restrict
- name: wcstok
standards:
- stdc
return_type: wchar_t *
arguments:
- type: wchar_t *__restrict
- type: const wchar_t *__restrict
- type: wchar_t** __restrict
- name: wcpcpy
standards:
- stdc

View File

@@ -45,6 +45,16 @@ add_entrypoint_object(
libc.src.__support.wctype_utils
)
add_entrypoint_object(
wcstok
SRCS
wcstok.cpp
HDRS
wcstok.h
DEPENDS
libc.hdr.types.wchar_t
)
add_entrypoint_object(
wcrtomb
SRCS

50
libc/src/wchar/wcstok.cpp Normal file
View File

@@ -0,0 +1,50 @@
//===-- Implementation of wcstok ------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "src/wchar/wcstok.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/common.h"
namespace LIBC_NAMESPACE_DECL {
bool isADelimeter(wchar_t wc, const wchar_t *delimiters) {
for (const wchar_t *delim_ptr = delimiters; *delim_ptr != L'\0'; ++delim_ptr)
if (wc == *delim_ptr)
return true;
return false;
}
LLVM_LIBC_FUNCTION(wchar_t *, wcstok,
(wchar_t *__restrict str, const wchar_t *__restrict delim,
wchar_t **__restrict context)) {
if (str == nullptr) {
if (*context == nullptr)
return nullptr;
str = *context;
}
wchar_t *tok_start, *tok_end;
for (tok_start = str; *tok_start != L'\0' && isADelimeter(*tok_start, delim);
++tok_start)
;
for (tok_end = tok_start; *tok_end != L'\0' && !isADelimeter(*tok_end, delim);
++tok_end)
;
if (*tok_end != L'\0') {
*tok_end = L'\0';
++tok_end;
}
*context = tok_end;
return *tok_start == L'\0' ? nullptr : tok_start;
}
} // namespace LIBC_NAMESPACE_DECL

22
libc/src/wchar/wcstok.h Normal file
View File

@@ -0,0 +1,22 @@
//===-- Implementation header for wcstok ----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_SRC_WCHAR_WCSTOK_H
#define LLVM_LIBC_SRC_WCHAR_WCSTOK_H
#include "hdr/types/wchar_t.h"
#include "src/__support/macros/config.h"
namespace LIBC_NAMESPACE_DECL {
wchar_t *wcstok(wchar_t *__restrict str, const wchar_t *__restrict delim,
wchar_t **__restrict context);
} // namespace LIBC_NAMESPACE_DECL
#endif // LLVM_LIBC_SRC_WCHAR_WCSTOK_H

View File

@@ -123,6 +123,16 @@ add_libc_test(
libc.src.wchar.wcschr
)
add_libc_test(
wcstok_test
SUITE
libc_wchar_unittests
SRCS
wcstok_test.cpp
DEPENDS
libc.src.wchar.wcstok
)
add_libc_test(
wcsncmp_test
SUITE

View File

@@ -0,0 +1,181 @@
//===-- Unittests for wcstok ----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
#include "src/wchar/wcstok.h"
#include "test/UnitTest/Test.h"
TEST(LlvmLibcWCSTokReentrantTest, NoTokenFound) {
{ // Empty source and delimiter string.
wchar_t empty[] = L"";
wchar_t *reserve = nullptr;
ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"", &reserve), nullptr);
// Another call to ensure that 'reserve' is not in a bad state.
ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"", &reserve), nullptr);
ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"", &reserve), nullptr);
}
{ // Empty source and single character delimiter string.
wchar_t empty[] = L"";
wchar_t *reserve = nullptr;
ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"_", &reserve), nullptr);
// Another call to ensure that 'reserve' is not in a bad state.
ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"_", &reserve), nullptr);
ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"_", &reserve), nullptr);
}
{ // Same character source and delimiter string.
wchar_t single[] = L"_";
wchar_t *reserve = nullptr;
ASSERT_EQ(LIBC_NAMESPACE::wcstok(single, L"_", &reserve), nullptr);
// Another call to ensure that 'reserve' is not in a bad state.
ASSERT_EQ(LIBC_NAMESPACE::wcstok(single, L"_", &reserve), nullptr);
ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"_", &reserve), nullptr);
}
{ // Multiple character source and single character delimiter string.
wchar_t multiple[] = L"1,2";
wchar_t *reserve = nullptr;
wchar_t *tok = LIBC_NAMESPACE::wcstok(multiple, L":", &reserve);
ASSERT_TRUE(tok[0] == L'1');
ASSERT_TRUE(tok[1] == L',');
ASSERT_TRUE(tok[2] == L'2');
ASSERT_TRUE(tok[3] == L'\0');
// Another call to ensure that 'reserve' is not in a bad state.
tok = LIBC_NAMESPACE::wcstok(multiple, L":", &reserve);
ASSERT_TRUE(tok[0] == L'1');
ASSERT_TRUE(tok[1] == L',');
ASSERT_TRUE(tok[2] == L'2');
ASSERT_TRUE(tok[3] == L'\0');
ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L":", &reserve), nullptr);
}
}
TEST(LlvmLibcWCSTokReentrantTest, DelimiterAsFirstCharacterShouldBeIgnored) {
wchar_t src[] = L".123";
wchar_t *reserve = nullptr;
wchar_t *tok = LIBC_NAMESPACE::wcstok(src, L".", &reserve);
ASSERT_TRUE(tok[0] == L'1');
ASSERT_TRUE(tok[1] == L'2');
ASSERT_TRUE(tok[2] == L'3');
ASSERT_TRUE(tok[3] == L'\0');
// Another call to ensure that 'reserve' is not in a bad state.
tok = LIBC_NAMESPACE::wcstok(src, L".", &reserve);
ASSERT_TRUE(tok[0] == L'1');
ASSERT_TRUE(tok[1] == L'2');
ASSERT_TRUE(tok[2] == L'3');
ASSERT_TRUE(tok[3] == L'\0');
ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L".", &reserve), nullptr);
}
TEST(LlvmLibcWCSTokReentrantTest, DelimiterIsMiddleCharacter) {
wchar_t src[] = L"12,34";
wchar_t *reserve = nullptr;
wchar_t *tok = LIBC_NAMESPACE::wcstok(src, L",", &reserve);
ASSERT_TRUE(tok[0] == L'1');
ASSERT_TRUE(tok[1] == L'2');
ASSERT_TRUE(tok[2] == L'\0');
// Another call to ensure that 'reserve' is not in a bad state.
tok = LIBC_NAMESPACE::wcstok(src, L",", &reserve);
ASSERT_TRUE(tok[0] == L'1');
ASSERT_TRUE(tok[1] == L'2');
ASSERT_TRUE(tok[2] == L'\0');
ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L",", &reserve), nullptr);
}
TEST(LlvmLibcWCSTokReentrantTest, DelimiterAsLastCharacterShouldBeIgnored) {
wchar_t src[] = L"1234:";
wchar_t *reserve = nullptr;
wchar_t *tok = LIBC_NAMESPACE::wcstok(src, L":", &reserve);
ASSERT_TRUE(tok[0] == L'1');
ASSERT_TRUE(tok[1] == L'2');
ASSERT_TRUE(tok[2] == L'3');
ASSERT_TRUE(tok[3] == L'4');
ASSERT_TRUE(tok[4] == L'\0');
// Another call to ensure that 'reserve' is not in a bad state.
tok = LIBC_NAMESPACE::wcstok(src, L":", &reserve);
ASSERT_TRUE(tok[0] == L'1');
ASSERT_TRUE(tok[1] == L'2');
ASSERT_TRUE(tok[2] == L'3');
ASSERT_TRUE(tok[3] == L'4');
ASSERT_TRUE(tok[4] == L'\0');
ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L":", &reserve), nullptr);
}
TEST(LlvmLibcWCSTokReentrantTest, ShouldNotGoPastNullTerminator) {
wchar_t src[] = {L'1', L'2', L'\0', L',', L'3'};
wchar_t *reserve = nullptr;
wchar_t *tok = LIBC_NAMESPACE::wcstok(src, L",", &reserve);
ASSERT_TRUE(tok[0] == L'1');
ASSERT_TRUE(tok[1] == L'2');
ASSERT_TRUE(tok[2] == L'\0');
// Another call to ensure that 'reserve' is not in a bad state.
tok = LIBC_NAMESPACE::wcstok(src, L",", &reserve);
ASSERT_TRUE(tok[0] == L'1');
ASSERT_TRUE(tok[1] == L'2');
ASSERT_TRUE(tok[2] == L'\0');
ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L",", &reserve), nullptr);
}
TEST(LlvmLibcWCSTokReentrantTest,
ShouldReturnNullptrWhenBothSrcAndSaveptrAreNull) {
wchar_t *src = nullptr;
wchar_t *reserve = nullptr;
// Ensure that instead of crashing if src and reserve are null, nullptr is
// returned
ASSERT_EQ(LIBC_NAMESPACE::wcstok(src, L",", &reserve), nullptr);
// And that neither src nor reserve are changed when that happens
ASSERT_EQ(src, nullptr);
ASSERT_EQ(reserve, nullptr);
}
TEST(LlvmLibcWCSTokReentrantTest,
SubsequentCallsShouldFindFollowingDelimiters) {
wchar_t src[] = L"12,34.56";
wchar_t *reserve = nullptr;
wchar_t *token = LIBC_NAMESPACE::wcstok(src, L",.", &reserve);
ASSERT_TRUE(token[0] == L'1');
ASSERT_TRUE(token[1] == L'2');
ASSERT_TRUE(token[2] == L'\0');
token = LIBC_NAMESPACE::wcstok(nullptr, L",.", &reserve);
ASSERT_TRUE(token[0] == L'3');
ASSERT_TRUE(token[1] == L'4');
ASSERT_TRUE(token[2] == L'\0');
token = LIBC_NAMESPACE::wcstok(nullptr, L",.", &reserve);
ASSERT_TRUE(token[0] == L'5');
ASSERT_TRUE(token[1] == L'6');
ASSERT_TRUE(token[2] == L'\0');
token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &reserve);
ASSERT_EQ(token, nullptr);
// Subsequent calls after hitting the end of the string should also return
// nullptr.
token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &reserve);
ASSERT_EQ(token, nullptr);
}
TEST(LlvmLibcWCSTokReentrantTest, DelimitersShouldNotBeIncludedInToken) {
wchar_t src[] = L"__ab__:_cd__:__ef__:__";
wchar_t *reserve = nullptr;
wchar_t *token = LIBC_NAMESPACE::wcstok(src, L"_:", &reserve);
ASSERT_TRUE(token[0] == L'a');
ASSERT_TRUE(token[1] == L'b');
ASSERT_TRUE(token[2] == L'\0');
token = LIBC_NAMESPACE::wcstok(nullptr, L":_", &reserve);
ASSERT_TRUE(token[0] == L'c');
ASSERT_TRUE(token[1] == L'd');
ASSERT_TRUE(token[2] == L'\0');
token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,", &reserve);
ASSERT_TRUE(token[0] == L'e');
ASSERT_TRUE(token[1] == L'f');
ASSERT_TRUE(token[2] == L'\0');
token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &reserve);
ASSERT_EQ(token, nullptr);
}