diff --git a/CMakeLists.txt b/CMakeLists.txt index 1239d3a..e47e6f1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,19 +15,14 @@ set(PROJECT_URL "lang.artichoke.dev") set(PROJECT_AUTHOR "erick-alcachofa") set(PROJECT_AUTHOR_GITHUB "@erick-alcachofa") -include(cmake/testing.cmake) +option(ENABLE_TESTING "Enable build of tests for library" OFF) add_subdirectory(lib) add_subdirectory(frontend) install( - TARGETS frontend library - EXPORT artichokeTargets - FILE_SET HEADERS - LIBRARY DESTINATION lib - ARCHIVE DESTINATION lib + TARGETS frontend RUNTIME DESTINATION bin - INCLUDES DESTINATION include ) get_target_property( @@ -43,6 +38,16 @@ install( )" ) +install( + TARGETS library library_static + EXPORT artichokeTargets + FILE_SET HEADERS + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib + RUNTIME DESTINATION bin + INCLUDES DESTINATION include +) + install( EXPORT artichokeTargets FILE artichokeTargets.cmake @@ -62,3 +67,7 @@ install( "${CMAKE_CURRENT_BINARY_DIR}/artichokeConfigVersion.cmake" DESTINATION lib/cmake/artichoke ) + +if(ENABLE_TESTING) + add_subdirectory(tests) +endif() diff --git a/README.md b/README.md index a4e5e44..a5d9dfd 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,7 @@ grammar is stable, and the next step is the implementation of a compiler git clone https://git.artichoke.dev/me/artichoke-lang.git # Configure cmake +# Optionally add -DENABLE_TESTING=ON for building tests cmake -DCMAKE_BUILD_TYPE=Release -S . -B build # Build the project @@ -54,6 +55,9 @@ cmake --build build # Run the binary ./build/frontend/artichoke-c +# Run the tests if enabled +ctest --test-dir build/tests --output-on-failure + # Install if wanted cmake --install build --prefix=/usr/local diff --git a/cmake/testing.cmake b/cmake/testing.cmake deleted file mode 100644 index e69de29..0000000 diff --git a/frontend/CMakeLists.txt b/frontend/CMakeLists.txt index 2bacfdb..14091df 100644 --- a/frontend/CMakeLists.txt +++ b/frontend/CMakeLists.txt @@ -34,5 +34,5 @@ target_include_directories( target_link_libraries( frontend PUBLIC - library + artichoke::library_static ) diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index a29bf00..938a2c4 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -5,17 +5,17 @@ file(GLOB_RECURSE ARTI_LIB_HEADERS "include/**.hpp") file(GLOB_RECURSE ARTI_LIB_GEN_HEADERS "${CMAKE_CURRENT_BINARY_DIR}/include/**.hpp") add_library( - library SHARED + objs OBJECT ${ARTI_LIB_SOURCES} ) set_target_properties( - library PROPERTIES - OUTPUT_NAME "artichoke" + objs PROPERTIES + POSITION_INDEPENDENT_CODE 1 ) target_compile_options( - library PRIVATE + objs PRIVATE -pedantic -Wall -Wextra @@ -30,24 +30,58 @@ target_compile_options( -Wno-unused ) -target_sources( - library PUBLIC - FILE_SET HEADERS - BASE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/include - FILES - ${ARTI_LIB_HEADERS} -) - -target_sources( - library PUBLIC - FILE_SET HEADERS - BASE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/include - FILES - ${ARTI_LIB_GEN_HEADERS} -) - target_include_directories( - library PUBLIC + objs PUBLIC $ $ ) + +add_library( + library SHARED + $ +) + +add_library( + artichoke::library ALIAS + library +) + +set_target_properties( + library PROPERTIES + OUTPUT_NAME "artichoke" +) + +add_library( + library_static STATIC + $ +) + +add_library( + artichoke::library_static ALIAS + library_static +) + +set_target_properties( + library_static PROPERTIES + OUTPUT_NAME "artichoke" +) + +set(LIB_TARGETS objs library library_static) + +foreach(TGET IN LISTS LIB_TARGETS) + target_sources( + ${TGET} INTERFACE + FILE_SET HEADERS + BASE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/include + FILES + ${ARTI_LIB_HEADERS} + ) + + target_sources( + ${TGET} INTERFACE + FILE_SET HEADERS + BASE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/include + FILES + ${ARTI_LIB_GEN_HEADERS} + ) +endforeach() diff --git a/lib/include/artichoke/Tokenizer/Tokenizer.hpp b/lib/include/artichoke/Tokenizer/Tokenizer.hpp index 3ea09a5..7269d57 100644 --- a/lib/include/artichoke/Tokenizer/Tokenizer.hpp +++ b/lib/include/artichoke/Tokenizer/Tokenizer.hpp @@ -1,12 +1,12 @@ #pragma once #include -#include #include #include #include +#include namespace arti::lang { @@ -30,6 +30,8 @@ namespace arti::lang { void swap(Tokenizer &other) noexcept; + TokenizerRange range() noexcept; + private: Generator> tokenize(); diff --git a/lib/include/artichoke/Tokenizer/TokenizerRange.hpp b/lib/include/artichoke/Tokenizer/TokenizerRange.hpp new file mode 100644 index 0000000..63891d5 --- /dev/null +++ b/lib/include/artichoke/Tokenizer/TokenizerRange.hpp @@ -0,0 +1,71 @@ +#pragma once + +#include +#include + +namespace arti::lang { + struct Tokenizer; + + struct [[nodiscard]] TokenizerRange { + friend struct Tokenizer; + struct Iterator; + struct Sentinel; + + using iterator_type = Iterator; + using sentinel_type = Sentinel; + + TokenizerRange(TokenizerRange &&) noexcept; + TokenizerRange &operator=(TokenizerRange &&) noexcept; + + TokenizerRange(const TokenizerRange &) noexcept = delete; + TokenizerRange &operator=(const TokenizerRange &) noexcept = delete; + + Iterator begin(); + Sentinel end() const noexcept; + + struct Iterator { + friend struct TokenizerRange; + using iterator_category = std::input_iterator_tag; + using difference_type = std::ptrdiff_t; + + using ValueType = Expected; + using ReferenceType = ValueType &; + using PointerType = ValueType *; + + using value_type = ValueType; + using pointer_type = PointerType; + + Iterator(Iterator &&) noexcept; + Iterator &operator=(Iterator &&) noexcept; + + Iterator(const Iterator &) noexcept = delete; + Iterator &operator=(const Iterator &) noexcept = delete; + + Iterator &operator++(); + + void operator++(int); + + ReferenceType operator*() const noexcept; + PointerType operator->() const noexcept; + + friend bool operator==(const Iterator &, Sentinel); + friend bool operator==(Sentinel, const Iterator &); + + friend bool operator!=(const Iterator &, Sentinel); + friend bool operator!=(Sentinel, const Iterator &); + + private: + Iterator(Tokenizer *tokenizer) noexcept; + + Tokenizer *tokenizer; + mutable Expected cvalue; + }; + + struct Sentinel {}; + + private: + TokenizerRange(Tokenizer *tokenizer); + + Tokenizer *tokenizer; + }; +} diff --git a/lib/src/Tokenizer/Tokenizer.cpp b/lib/src/Tokenizer/Tokenizer.cpp index 09f0fec..c74574c 100644 --- a/lib/src/Tokenizer/Tokenizer.cpp +++ b/lib/src/Tokenizer/Tokenizer.cpp @@ -1,8 +1,8 @@ #include -#include #include +#include #include #include #include @@ -36,8 +36,22 @@ namespace arti::lang { return *this; } + TokenizerRange Tokenizer::range() noexcept { + return TokenizerRange{ this }; + } + Expected Tokenizer::consume(std::size_t n) noexcept { - while (not tokensBuffer.empty()) { + while (n > 0) { + if (tokensBuffer.empty()) { + if (auto ok = peek(); ! ok) { + return Unexpected<>{ ok.error() }; + } + + if (finished()) { + break; + } + } + tokensBuffer.pop_front(); n -= 1; } @@ -94,7 +108,11 @@ namespace arti::lang { bool Tokenizer::finished() const noexcept { if (tokensGenerator.finished()) { - return tokensBuffer.empty(); + if (!tokensBuffer.empty()) { + return tokensBuffer.front().value == TokenV::tkEOF; + } + + return true; } return false; @@ -340,6 +358,7 @@ namespace arti::lang { } if (*iter == '.') { + auto dotIter = iter; forward(); while (iter != source.end()) { @@ -350,6 +369,21 @@ namespace arti::lang { forward(); } + if ((iter - dotIter) == 1) { + /* Revert to dot */ + --iter; + --column; + + return langException( + line, + column, + "digit", + iter == source.end() + ? "EOF" + : std::string{ *(iter + 1) } + ); + } + return Token{ TokenV::tkDecimal, cLine, @@ -390,7 +424,6 @@ namespace arti::lang { } if (*iter == '"') { - forward(); break; } @@ -412,12 +445,23 @@ namespace arti::lang { forward(); } - return Token{ - TokenV::tkString, - cLine, - cColumn, - { stIter, iter } - }; + if (*iter == '"') { + forward(); + + return Token{ + TokenV::tkString, + cLine, + cColumn, + { stIter, iter } + }; + } + + return langException( + line, + column, + "end of string (\")", + "EOF" + ); } Expected Tokenizer::readCharacter() { diff --git a/lib/src/Tokenizer/TokenizerRange.cpp b/lib/src/Tokenizer/TokenizerRange.cpp new file mode 100644 index 0000000..299c816 --- /dev/null +++ b/lib/src/Tokenizer/TokenizerRange.cpp @@ -0,0 +1,84 @@ +#include + +#include + +#include + +namespace arti::lang { + + using Iterator = TokenizerRange::Iterator; + using Sentinel = TokenizerRange::Sentinel; + + TokenizerRange::TokenizerRange(Tokenizer *tokenizer) + : tokenizer(tokenizer) { } + + TokenizerRange::TokenizerRange(TokenizerRange &&other) noexcept + : tokenizer(std::exchange(other.tokenizer, nullptr)) { } + + TokenizerRange &TokenizerRange::operator=(TokenizerRange &&other) noexcept { + this->tokenizer = std::exchange(other.tokenizer, nullptr); + return *this; + } + + Iterator TokenizerRange::begin() { + return Iterator{ this->tokenizer }; + } + + Sentinel TokenizerRange::end() const noexcept { + return Sentinel{}; + } + + Iterator::Iterator(Tokenizer *tokenizer) noexcept + : tokenizer(tokenizer) + , cvalue(tokenizer->peek()) { } + + Iterator::Iterator(Iterator &&other) noexcept + : tokenizer(std::exchange(other.tokenizer, nullptr)) + , cvalue(std::exchange(other.cvalue, {})) { } + + Iterator &Iterator::operator=(Iterator &&other) noexcept { + this->tokenizer = std::exchange(other.tokenizer, nullptr); + this->cvalue = std::exchange(other.cvalue, {}); + return *this; + } + + Iterator &Iterator::operator++() { + if (this->cvalue) { + std::ignore = tokenizer->consume(); + } + this->cvalue = tokenizer->peek(); + return *this; + } + + void Iterator::operator++(int) { + std::ignore = this->operator++(); + } + + Iterator::ReferenceType Iterator::operator*() const noexcept { + return this->cvalue; + } + + Iterator::PointerType Iterator::operator->() const noexcept { + return &this->cvalue; + } + + bool operator==(const Iterator &it, Sentinel) { + if (it.tokenizer->finished()) { + return true; + } + return !it.cvalue.has_value() || it.cvalue->value == TokenV::tkEOF; + } + + bool operator==(Sentinel, const Iterator &it) { + return it == Sentinel{}; + } + + bool operator!=(const Iterator &it, Sentinel) { + return !(it == Sentinel{}); + } + + bool operator!=(Sentinel, const Iterator &it) { + return !(it == Sentinel{}); + } + +} // namespace arti::lang diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 0000000..14b6ab3 --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,20 @@ +include(cmake/dependencies.cmake) + +enable_testing() + +add_library(test INTERFACE) + +target_include_directories( + test INTERFACE + ${CMAKE_CURRENT_SOURCE_DIR}/include +) + +target_link_libraries( + test INTERFACE + artichoke::library + Catch2::Catch2WithMain +) + +include(Catch) + +add_subdirectory(Tokenizer) diff --git a/tests/Tokenizer/CMakeLists.txt b/tests/Tokenizer/CMakeLists.txt new file mode 100644 index 0000000..7b30d1f --- /dev/null +++ b/tests/Tokenizer/CMakeLists.txt @@ -0,0 +1,16 @@ +file(GLOB_RECURSE TOKENIZER_TEST_SRC "src/**.cpp") + +add_executable( + test-tokenizer + ${TOKENIZER_TEST_SRC} +) + +target_link_libraries( + test-tokenizer PRIVATE + test +) + +catch_discover_tests( + test-tokenizer + TEST_PREFIX "Tokenizer." +) diff --git a/tests/Tokenizer/src/Api.cpp b/tests/Tokenizer/src/Api.cpp new file mode 100644 index 0000000..68b6f50 --- /dev/null +++ b/tests/Tokenizer/src/Api.cpp @@ -0,0 +1,99 @@ +#include + +#include +#include + +#include + +namespace lang = arti::lang; + +TEST_CASE("API_PeekOffset", "[api][peek]") { + const std::string source = "a b c"; + lang::Tokenizer tkz{ source }; + + auto t0 = tkz.peek(0); + REQUIRE(t0.has_value()); + REQUIRE(t0->value == lang::TokenV::tkIdentifier); + REQUIRE(t0->strValue == "a"); + + auto t1 = tkz.peek(1); + REQUIRE(t1.has_value()); + REQUIRE(t1->value == lang::TokenV::tkIdentifier); + REQUIRE(t1->strValue == "b"); + + auto t2 = tkz.peek(2); + REQUIRE(t2.has_value()); + REQUIRE(t2->value == lang::TokenV::tkIdentifier); + REQUIRE(t2->strValue == "c"); + + auto t3 = tkz.peek(3); + REQUIRE(t3.has_value()); + REQUIRE(t3->value == lang::TokenV::tkEOF); + + // Ensure nothing was consumed by peeks + REQUIRE_FALSE(tkz.finished()); + auto t0_again = tkz.peek(); + REQUIRE(t0_again.has_value()); + REQUIRE(t0_again->value == lang::TokenV::tkIdentifier); + REQUIRE(t0_again->strValue == "a"); +} + +TEST_CASE("API_ConsumeAndFinishedSemantics", "[api][consume][finished]") { + const std::string source = "x y z"; + lang::Tokenizer tkz{ source }; + + // consume 2 tokens: x, y + REQUIRE(tkz.consume(2).has_value()); + REQUIRE_FALSE(tkz.finished()); + + auto t = tkz.peek(); + REQUIRE(t.has_value()); + REQUIRE(t->value == lang::TokenV::tkIdentifier); + REQUIRE(t->strValue == "z"); + + // consume last token + REQUIRE(tkz.consume().has_value()); + // finished() returns true only after EOF token has been produced + REQUIRE_FALSE(tkz.finished()); + + auto eof = tkz.peek(); + REQUIRE(eof.has_value()); + REQUIRE(eof->value == lang::TokenV::tkEOF); + REQUIRE(tkz.finished()); +} + +TEST_CASE("API_RangeIterationStopsAtEOF", "[api][range]") { + const std::string source = + "let ident := 42 /* skip this */ + 1"; + lang::Tokenizer tkz{ source }; + + std::vector kinds; + std::vector lex; + + for (auto token : tkz.range()) { + REQUIRE(token.has_value()); + kinds.push_back(token->value); + lex.push_back(token->strValue); + } + + // Expected sequence: kwLet, tkIdentifier("ident"), opLabel, tkInteger("42"), opPlus, tkInteger("1") + REQUIRE(kinds.size() == 6); + REQUIRE(kinds[0] == lang::TokenV::kwLet); + REQUIRE(kinds[1] == lang::TokenV::tkIdentifier); + REQUIRE(kinds[2] == lang::TokenV::opLabel); + REQUIRE(kinds[3] == lang::TokenV::tkInteger); + REQUIRE(kinds[4] == lang::TokenV::opPlus); + REQUIRE(kinds[5] == lang::TokenV::tkInteger); + + REQUIRE(lex[0] == "let"); + REQUIRE(lex[1] == "ident"); + REQUIRE(lex[2] == ":="); + REQUIRE(lex[3] == "42"); + REQUIRE(lex[4] == "+"); + REQUIRE(lex[5] == "1"); + + // After iterating the range, peek should yield EOF + auto eof = tkz.peek(); + REQUIRE(eof.has_value()); + REQUIRE(eof->value == lang::TokenV::tkEOF); +} diff --git a/tests/Tokenizer/src/Comments.cpp b/tests/Tokenizer/src/Comments.cpp new file mode 100644 index 0000000..30cbdc6 --- /dev/null +++ b/tests/Tokenizer/src/Comments.cpp @@ -0,0 +1,87 @@ +#include + +#include + +#include +#include + +namespace lang = arti::lang; + +TEST_CASE("Comments_BlockSkipped", "[comments][block][skip]") { + // Ensure that block comments are ignored and do not emit tokens. + const std::string source = "foo /* a block comment with symbols 123 !@# */ bar"; + + lang::Tokenizer tkz{source}; + + auto t1 = tkz.peek(); + REQUIRE(t1.has_value()); + REQUIRE(t1->value == lang::TokenV::tkIdentifier); + REQUIRE(t1->strValue == "foo"); + + REQUIRE(tkz.consume().has_value()); + + auto t2 = tkz.peek(); + REQUIRE(t2.has_value()); + REQUIRE(t2->value == lang::TokenV::tkIdentifier); + REQUIRE(t2->strValue == "bar"); + + REQUIRE(tkz.consume().has_value()); + + auto eof = tkz.peek(); + REQUIRE(eof.has_value()); + REQUIRE(eof->value == lang::TokenV::tkEOF); +} + +TEST_CASE("Comments_BlockMultiline", "[comments][block][multiline]") { + const std::string source = + "alpha /* line1\n" + "line2\n" + "line3 */ beta"; + + lang::Tokenizer tkz{source}; + + auto t1 = tkz.peek(); + REQUIRE(t1.has_value()); + REQUIRE(t1->value == lang::TokenV::tkIdentifier); + REQUIRE(t1->strValue == "alpha"); + REQUIRE(tkz.consume().has_value()); + + auto t2 = tkz.peek(); + REQUIRE(t2.has_value()); + REQUIRE(t2->value == lang::TokenV::tkIdentifier); + REQUIRE(t2->strValue == "beta"); + REQUIRE(tkz.consume().has_value()); + + auto eof = tkz.peek(); + REQUIRE(eof.has_value()); + REQUIRE(eof->value == lang::TokenV::tkEOF); +} + +TEST_CASE("Comments_UnterminatedBlock_Error", "[comments][block][error]") { + // Unterminated block comments should surface an error from the tokenizer. + const std::string source = "foo /* this never ends..."; + + lang::Tokenizer tkz{source}; + + auto t1 = tkz.peek(); + REQUIRE(t1.has_value()); + REQUIRE(t1->value == lang::TokenV::tkIdentifier); + REQUIRE(t1->strValue == "foo"); + REQUIRE(tkz.consume().has_value()); + + auto errTok = tkz.peek(); + REQUIRE_FALSE(errTok.has_value()); + + // Check error message mentions invalid comment. + const auto &err = errTok.error(); + REQUIRE(err.message.find("Invalid comment") != std::string::npos); +} + +TEST_CASE("Comments_SingleLineUnsupported_Skip", "[comments][.line]") { + // The tokenizer currently does NOT support '//' comments. + // Keep this test as a placeholder and mark it skipped to avoid failures. + SKIP("Single-line '//' comments are not supported yet by the tokenizer"); + const std::string source = "foo // comment\n bar"; + lang::Tokenizer tkz{source}; + (void)tkz; // silence unused +} diff --git a/tests/Tokenizer/src/Identifiers.cpp b/tests/Tokenizer/src/Identifiers.cpp new file mode 100644 index 0000000..b34317d --- /dev/null +++ b/tests/Tokenizer/src/Identifiers.cpp @@ -0,0 +1,127 @@ +#include + +#include +#include +#include + +#include +#include + +namespace lang = arti::lang; + +template +static void CommonIdentifiersSuccess( + const std::array &ids +) { + const std::string source = SourceFromTokens(ids); + + std::size_t it = 0; + lang::Tokenizer tkz{ source }; + + for (auto token : tkz.range()) { + REQUIRE(token.has_value()); + REQUIRE(token->value == lang::TokenV::tkIdentifier); + REQUIRE(token->strValue == ids.at(it++)); + } + + REQUIRE(it == ids.size()); + REQUIRE(tkz.peek().has_value()); + REQUIRE(tkz.peek()->value == lang::TokenV::tkEOF); +} + +TEST_CASE("Identifiers_Basic", "[identifiers][valid]") { + constexpr std::array ids = { + "a", "abc", "a_b", "snake_case", "camelCase", "PascalCase", "_id", "with123" + }; + + CommonIdentifiersSuccess(ids); +} + +TEST_CASE("Identifiers_DigitsAfterFirst", "[identifiers][valid]") { + constexpr std::array ids = { + "a1", "abc123", "_a1_b2", "v2", "x9y8z7", "i18n" + }; + + CommonIdentifiersSuccess(ids); +} + +TEST_CASE("Identifiers_Long", "[identifiers][valid][long]") { + // Create a long identifier to ensure tokenizer handles large spans. + std::string longId(512, 'a'); + std::vector toks{ longId }; + + const std::string source = SourceFromTokens(toks); + lang::Tokenizer tkz{ source }; + + auto t = tkz.peek(); + REQUIRE(t.has_value()); + REQUIRE(t->value == lang::TokenV::tkIdentifier); + REQUIRE(t->strValue == longId); + + REQUIRE(tkz.consume().has_value()); + auto eof = tkz.peek(); + REQUIRE(eof.has_value()); + REQUIRE(eof->value == lang::TokenV::tkEOF); +} + +TEST_CASE("Identifiers_WithOperators", "[identifiers][operators]") { + // '$' and '?' are operators, not identifier characters. + // '$foo' -> opMut, tkIdentifier("foo") + // '?bar' -> opOpt, tkIdentifier("bar") + const std::string source = "$foo ?bar"; + + lang::Tokenizer tkz{ source }; + + auto t1 = tkz.peek(); + REQUIRE(t1.has_value()); + REQUIRE(t1->value == lang::TokenV::opMut); + REQUIRE(tkz.consume().has_value()); + + auto t2 = tkz.peek(); + REQUIRE(t2.has_value()); + REQUIRE(t2->value == lang::TokenV::tkIdentifier); + REQUIRE(t2->strValue == "foo"); + REQUIRE(tkz.consume().has_value()); + + auto t3 = tkz.peek(); + REQUIRE(t3.has_value()); + REQUIRE(t3->value == lang::TokenV::opOpt); + REQUIRE(tkz.consume().has_value()); + + auto t4 = tkz.peek(); + REQUIRE(t4.has_value()); + REQUIRE(t4->value == lang::TokenV::tkIdentifier); + REQUIRE(t4->strValue == "bar"); + REQUIRE(tkz.consume().has_value()); + + auto eof = tkz.peek(); + REQUIRE(eof.has_value()); + REQUIRE(eof->value == lang::TokenV::tkEOF); +} + +TEST_CASE("Identifiers_DotAccess", "[identifiers][dot]") { + const std::string source = "foo.bar"; + + lang::Tokenizer tkz{ source }; + + auto t1 = tkz.peek(); + REQUIRE(t1.has_value()); + REQUIRE(t1->value == lang::TokenV::tkIdentifier); + REQUIRE(t1->strValue == "foo"); + REQUIRE(tkz.consume().has_value()); + + auto t2 = tkz.peek(); + REQUIRE(t2.has_value()); + REQUIRE(t2->value == lang::TokenV::opDot); + REQUIRE(tkz.consume().has_value()); + + auto t3 = tkz.peek(); + REQUIRE(t3.has_value()); + REQUIRE(t3->value == lang::TokenV::tkIdentifier); + REQUIRE(t3->strValue == "bar"); + REQUIRE(tkz.consume().has_value()); + + auto eof = tkz.peek(); + REQUIRE(eof.has_value()); + REQUIRE(eof->value == lang::TokenV::tkEOF); +} diff --git a/tests/Tokenizer/src/Keywords.cpp b/tests/Tokenizer/src/Keywords.cpp new file mode 100644 index 0000000..fd85cec --- /dev/null +++ b/tests/Tokenizer/src/Keywords.cpp @@ -0,0 +1,93 @@ +#include + +#include + +#include + +#include + +namespace lang = arti::lang; + +template +static void CommonKeywordsSuccess( + const std::array &lexemes, + const std::array &kinds +) { + static_assert(N > 0, "Must provide at least one keyword"); + const std::string source = SourceFromTokens(lexemes); + + std::size_t it = 0; + lang::Tokenizer tkz{ source }; + + for (auto token : tkz.range()) { + REQUIRE(token.has_value()); + REQUIRE(token->value == kinds.at(it)); + REQUIRE(token->strValue == lexemes.at(it)); + ++it; + } + + REQUIRE(it == lexemes.size()); + REQUIRE(tkz.peek().has_value()); + REQUIRE(tkz.peek()->value == lang::TokenV::tkEOF); +} + +TEST_CASE("Keywords_AllRecognized", "[keywords][valid]") { + constexpr std::array lexemes = { + "_","or","not","and","if","else","fn","enum","struct","def","let","for", + "loop","break","continue","while","match","switch","return","unreachable", + "defer","errdefer","true","false","null","this","import","export","module","using","this" + }; + + constexpr std::array kinds = { + lang::TokenV::kwUnderscore, + lang::TokenV::kwOr, + lang::TokenV::kwNot, + lang::TokenV::kwAnd, + lang::TokenV::kwIf, + lang::TokenV::kwElse, + lang::TokenV::kwFn, + lang::TokenV::kwEnum, + lang::TokenV::kwStruct, + lang::TokenV::kwDef, + lang::TokenV::kwLet, + lang::TokenV::kwFor, + lang::TokenV::kwLoop, + lang::TokenV::kwBreak, + lang::TokenV::kwContinue, + lang::TokenV::kwWhile, + lang::TokenV::kwMatch, + lang::TokenV::kwSwitch, + lang::TokenV::kwReturn, + lang::TokenV::kwUnreachable, + lang::TokenV::kwDefer, + lang::TokenV::kwErrDefer, + lang::TokenV::kwTrue, + lang::TokenV::kwFalse, + lang::TokenV::kwNull, + lang::TokenV::kwThis, + lang::TokenV::kwImport, + lang::TokenV::kwExport, + lang::TokenV::kwModule, + lang::TokenV::kwUsing, + lang::TokenV::kwThis, + }; + + CommonKeywordsSuccess(lexemes, kinds); +} + +TEST_CASE("Keywords_PrecedenceOverIdentifiers", "[keywords][precedence]") { + // Ensure that keywords are recognized as keywords, while longer names remain identifiers. + constexpr std::array lexemes = { + "if", "iff", "return", "returnX", "_", "_id" + }; + constexpr std::array kinds = { + lang::TokenV::kwIf, // "if" is a keyword + lang::TokenV::tkIdentifier,// "iff" should be an identifier + lang::TokenV::kwReturn, // "return" is a keyword + lang::TokenV::tkIdentifier,// "returnX" is not a keyword + lang::TokenV::kwUnderscore,// "_" is a keyword in this language + lang::TokenV::tkIdentifier // "_id" is a regular identifier + }; + + CommonKeywordsSuccess(lexemes, kinds); +} diff --git a/tests/Tokenizer/src/Numbers.cpp b/tests/Tokenizer/src/Numbers.cpp new file mode 100644 index 0000000..1fac2f5 --- /dev/null +++ b/tests/Tokenizer/src/Numbers.cpp @@ -0,0 +1,171 @@ +#include + +#include + +#include + +#include + +namespace lang = arti::lang; + +template +static void CommonIntegersSuccess( + lang::TokenV type, + const std::array &expected +) { + const std::string source = SourceFromTokens(expected); + + std::size_t it = 0; + lang::Tokenizer tkz{ source }; + + for (auto token : tkz.range()) { + REQUIRE(token.has_value()); + REQUIRE(token->value == type); + REQUIRE(token->strValue == expected.at(it++)); + } + + REQUIRE(it == expected.size()); + REQUIRE(tkz.peek().has_value()); + REQUIRE(tkz.peek()->value == lang::TokenV::tkEOF); +} + +TEST_CASE("Numbers_Integers", "[integers][valid]") { + constexpr std::array expected = { + "0", "1", "42", "123456", "98712390", "12381723912465471" + }; + + CommonIntegersSuccess(lang::TokenV::tkInteger, expected); +} + +TEST_CASE("Numbers_HexIntegers", "[integers][valid]") { + constexpr std::array expected = { + "0x831", "0xAFEFE", "0xABEBE", + "0x7a147e8a3", "0x98712390", "0x1d238c18e7ff239a12465471" + }; + + CommonIntegersSuccess(lang::TokenV::tkInteger, expected); +} + +TEST_CASE("Numbers_OctIntegers", "[integers][valid]") { + constexpr std::array expected = { + "041", "064123", "0136237", "012345", "01", "071236571236512631723651" + }; + + CommonIntegersSuccess(lang::TokenV::tkInteger, expected); +} + +TEST_CASE("Numbers_BinIntegers", "[integers][valid]") { + constexpr std::array expected = { + "0b0101101", "0b1", "0b01", "0b0", "0b011010101110101101110101011", + "0b11110101011010101" + }; + + CommonIntegersSuccess(lang::TokenV::tkInteger, expected); +} + +TEST_CASE("Numbers_Decimal", "[decimals][valid]") { + constexpr std::array expected = { + "1.0", "0.5", "3.14159", "10.50", "9999.0001", "1375123476175981.813751235" + }; + + CommonIntegersSuccess(lang::TokenV::tkDecimal, expected); +} + +TEST_CASE("Numbers_UnaryMinusSeparate", "[numbers][unary-minus]") { + const std::string source = "-1 -2.5"; + lang::Tokenizer tkz{ source }; + + auto t1 = tkz.peek(); + REQUIRE(t1.has_value()); + REQUIRE(t1->value == lang::TokenV::opHyphen); + REQUIRE(tkz.consume().has_value()); + + auto t2 = tkz.peek(); + REQUIRE(t2.has_value()); + REQUIRE(t2->value == lang::TokenV::tkInteger); + REQUIRE(t2->strValue == "1"); + REQUIRE(tkz.consume().has_value()); + + auto t3 = tkz.peek(); + REQUIRE(t3.has_value()); + REQUIRE(t3->value == lang::TokenV::opHyphen); + REQUIRE(tkz.consume().has_value()); + + auto t4 = tkz.peek(); + REQUIRE(t4.has_value()); + REQUIRE(t4->value == lang::TokenV::tkDecimal); + REQUIRE(t4->strValue == "2.5"); + REQUIRE(tkz.consume().has_value()); + + auto eof = tkz.peek(); + REQUIRE(eof.has_value()); + REQUIRE(eof->value == lang::TokenV::tkEOF); +} + +TEST_CASE("Numbers_DotBoundaries_Disambiguation", "[numbers][dot][edge]") { + // Expect numbers must start with a digit: + // .5 -> '.' + '5' + // 10. -> ERROR (expects digit after '.'), then '.' token if continued + // 1..2 -> ERROR (expects digit after '.'), then '.' '.' '2' if continued + const std::string source = ".5 10. 1..2"; + lang::Tokenizer tkz{ source }; + + // .5 -> '.' then '5' + auto a1 = tkz.peek(); + REQUIRE(a1.has_value()); + REQUIRE(a1->value == lang::TokenV::opDot); + REQUIRE(tkz.consume().has_value()); + + auto a2 = tkz.peek(); + REQUIRE(a2.has_value()); + REQUIRE(a2->value == lang::TokenV::tkInteger); + REQUIRE(a2->strValue == "5"); + REQUIRE(tkz.consume().has_value()); + + // 10. -> first an error (expects a digit after '.'), then '.' is parsed if we continue + auto err1 = tkz.peek(); + REQUIRE_FALSE(err1.has_value()); + + auto after_err1 = tkz.peek(); + REQUIRE(after_err1.has_value()); + REQUIRE(after_err1->value == lang::TokenV::opDot); + REQUIRE(tkz.consume().has_value()); + + // 1..2 -> first an error (expects a digit after '.'), then '.' '.' '2' + auto err2 = tkz.peek(); + REQUIRE_FALSE(err2.has_value()); + + auto dot1 = tkz.peek(); + REQUIRE(dot1.has_value()); + REQUIRE(dot1->value == lang::TokenV::opDot); + REQUIRE(tkz.consume().has_value()); + + auto dot2 = tkz.peek(); + REQUIRE(dot2.has_value()); + REQUIRE(dot2->value == lang::TokenV::opDot); + REQUIRE(tkz.consume().has_value()); + + auto last = tkz.peek(); + REQUIRE(last.has_value()); + REQUIRE(last->value == lang::TokenV::tkInteger); + REQUIRE(last->strValue == "2"); + REQUIRE(tkz.consume().has_value()); + + auto eof = tkz.peek(); + REQUIRE(eof.has_value()); + REQUIRE(eof->value == lang::TokenV::tkEOF); +} + +TEST_CASE("Numbers_InvalidPrefixes", "[numbers][invalid]") { + const std::array invalids = { "0x", "0b", "0xG", "0b2", "08" }; + + for (auto src : invalids) { + lang::Tokenizer tkz{ std::string{src} }; + auto tok = tkz.peek(); + REQUIRE_FALSE(tok.has_value()); + const auto &err = tok.error(); + REQUIRE( + err.message.find("Invalid literal") != std::string::npos + ); + } +} diff --git a/tests/Tokenizer/src/Operators.cpp b/tests/Tokenizer/src/Operators.cpp new file mode 100644 index 0000000..9955bc0 --- /dev/null +++ b/tests/Tokenizer/src/Operators.cpp @@ -0,0 +1,178 @@ +#include + +#include +#include +#include + +#include +#include + +namespace lang = arti::lang; + +template +static void CommonOpsSuccess( + const std::array &lexemes, + const std::array &kinds +) { + static_assert(N > 0, "Must provide at least one operator"); + const std::string source = SourceFromTokens(lexemes); + + std::size_t it = 0; + lang::Tokenizer tkz{ source }; + + for (auto token : tkz.range()) { + REQUIRE(token.has_value()); + REQUIRE(token->value == kinds.at(it)); + REQUIRE(token->strValue == lexemes.at(it)); + ++it; + } + + REQUIRE(it == lexemes.size()); + REQUIRE(tkz.peek().has_value()); + REQUIRE(tkz.peek()->value == lang::TokenV::tkEOF); +} + +TEST_CASE("Operators_SingleChar", "[operators][single]") { + constexpr std::array lex = { + ".", "%", "+", "-", "/", "!", "*", ":", ",", "=", ";", "^", "~", + "&", "|", "<", ">", "(", ")", "[", "]", "{", "}", "?", "$" + }; + constexpr std::array kinds = { + lang::TokenV::opDot, lang::TokenV::opMod, lang::TokenV::opPlus, lang::TokenV::opHyphen, + lang::TokenV::opSlash, lang::TokenV::opBang, lang::TokenV::opStar, lang::TokenV::opColon, + lang::TokenV::opComma, lang::TokenV::opAssign, lang::TokenV::opSemicolon, lang::TokenV::opCaret, + lang::TokenV::opTilde, lang::TokenV::opAnd, lang::TokenV::opOr, lang::TokenV::opLt, + lang::TokenV::opGt, lang::TokenV::opLParen, lang::TokenV::opRParen, lang::TokenV::opLBracket, + lang::TokenV::opRBracket, lang::TokenV::opLSquirly, lang::TokenV::opRSquirly, lang::TokenV::opOpt, + lang::TokenV::opMut + }; + + CommonOpsSuccess(lex, kinds); +} + +TEST_CASE("Operators_MultiChar", "[operators][multi]") { + constexpr std::array lex = { + "==","!=", "<=", ">=", "<<", ">>", "&&", "||", + "+=", "-=", "*=", "/=", "%=", "&=", "|=", + "<<=", ">>=", "&&=", "||=", "->", "::", ":=" + }; + constexpr std::array kinds = { + lang::TokenV::opEq, lang::TokenV::opNeq, lang::TokenV::opLtEq, lang::TokenV::opGtEq, + lang::TokenV::opLShift, lang::TokenV::opRShift, lang::TokenV::opBoolAnd, lang::TokenV::opBoolOr, + lang::TokenV::opPlusAssign, lang::TokenV::opHyphenAssign, lang::TokenV::opStarAssign, lang::TokenV::opSlashAssign, + lang::TokenV::opModAssign, lang::TokenV::opAndAssign, lang::TokenV::opOrAssign, + lang::TokenV::opLShiftAssign, lang::TokenV::opRShiftAssign, lang::TokenV::opBoolAndAssign, lang::TokenV::opBoolORAssign, + lang::TokenV::opArrow, lang::TokenV::opAccess, lang::TokenV::opLabel + }; + + CommonOpsSuccess(lex, kinds); +} + +TEST_CASE("Operators_DotPrefixedSpecials", "[operators][dot][special]") { + constexpr std::array lex = { + ".#", ".[", ".*", ".@" + }; + constexpr std::array kinds = { + lang::TokenV::opSliceSize, lang::TokenV::opPtrSlice, lang::TokenV::opSlicePtr, lang::TokenV::opReflect + }; + + CommonOpsSuccess(lex, kinds); +} + +TEST_CASE("Operators_GreedyLongestMatch", "[operators][greedy]") { + // Ensure longest valid operator is selected. + constexpr std::array lex = { + ">>=", "<<=", "&&=", "||=", + ">=", "<=", "->", "::" + }; + constexpr std::array kinds = { + lang::TokenV::opRShiftAssign, lang::TokenV::opLShiftAssign, lang::TokenV::opBoolAndAssign, lang::TokenV::opBoolORAssign, + lang::TokenV::opGtEq, lang::TokenV::opLtEq, lang::TokenV::opArrow, lang::TokenV::opAccess + }; + + CommonOpsSuccess(lex, kinds); +} + +TEST_CASE("Operators_BoundariesWhitespace", "[operators][boundaries]") { + // '= =' should not be '==' + const std::string source = "=\n="; + lang::Tokenizer tkz{ source }; + + auto t1 = tkz.peek(); + REQUIRE(t1.has_value()); + REQUIRE(t1->value == lang::TokenV::opAssign); + REQUIRE(tkz.consume().has_value()); + + auto t2 = tkz.peek(); + REQUIRE(t2.has_value()); + REQUIRE(t2->value == lang::TokenV::opAssign); + REQUIRE(tkz.consume().has_value()); + + auto eof = tkz.peek(); + REQUIRE(eof.has_value()); + REQUIRE(eof->value == lang::TokenV::tkEOF); +} + +TEST_CASE("Operators_ContextAccessLabelArrow", "[operators][context]") { + const std::string source = "ns::name := src->field"; + lang::Tokenizer tkz{ source }; + + auto t1 = tkz.peek(); + REQUIRE(t1.has_value()); + REQUIRE(t1->value == lang::TokenV::tkIdentifier); + REQUIRE(t1->strValue == "ns"); + REQUIRE(tkz.consume().has_value()); + + auto t2 = tkz.peek(); + REQUIRE(t2.has_value()); + REQUIRE(t2->value == lang::TokenV::opAccess); + REQUIRE(t2->strValue == "::"); + REQUIRE(tkz.consume().has_value()); + + auto t3 = tkz.peek(); + REQUIRE(t3.has_value()); + REQUIRE(t3->value == lang::TokenV::tkIdentifier); + REQUIRE(t3->strValue == "name"); + REQUIRE(tkz.consume().has_value()); + + auto t4 = tkz.peek(); + REQUIRE(t4.has_value()); + REQUIRE(t4->value == lang::TokenV::opLabel); + REQUIRE(t4->strValue == ":="); + REQUIRE(tkz.consume().has_value()); + + auto t5 = tkz.peek(); + REQUIRE(t5.has_value()); + REQUIRE(t5->value == lang::TokenV::tkIdentifier); + REQUIRE(t5->strValue == "src"); + REQUIRE(tkz.consume().has_value()); + + auto t6 = tkz.peek(); + REQUIRE(t6.has_value()); + REQUIRE(t6->value == lang::TokenV::opArrow); + REQUIRE(t6->strValue == "->"); + REQUIRE(tkz.consume().has_value()); + + auto t7 = tkz.peek(); + REQUIRE(t7.has_value()); + REQUIRE(t7->value == lang::TokenV::tkIdentifier); + REQUIRE(t7->strValue == "field"); + REQUIRE(tkz.consume().has_value()); + + auto eof = tkz.peek(); + REQUIRE(eof.has_value()); + REQUIRE(eof->value == lang::TokenV::tkEOF); +} + +TEST_CASE("Operators_InvalidStandalone_Error", "[operators][error]") { + // '#' and '@' alone are not valid tokens (only .# and .@ are valid). + const std::vector invalids = { "#", "@", "`" }; + + for (const auto &src : invalids) { + lang::Tokenizer tkz{ src }; + auto tok = tkz.peek(); + REQUIRE_FALSE(tok.has_value()); + const auto &err = tok.error(); + REQUIRE(err.message.find("Invalid") != std::string::npos); + } +} diff --git a/tests/Tokenizer/src/Strings.cpp b/tests/Tokenizer/src/Strings.cpp new file mode 100644 index 0000000..a16a57c --- /dev/null +++ b/tests/Tokenizer/src/Strings.cpp @@ -0,0 +1,168 @@ +#include + +#include +#include + +#include +#include +#include + +namespace lang = arti::lang; + +template +static void CommonLiteralsSuccess( + lang::TokenV kind, + const std::array &lexemes +) { + const std::string source = SourceFromTokens(lexemes); + + std::size_t it = 0; + lang::Tokenizer tkz{ source }; + + for (auto token : tkz.range()) { + REQUIRE(token.has_value()); + REQUIRE(token->value == kind); + REQUIRE(token->strValue == lexemes.at(it++)); + } + + REQUIRE(it == lexemes.size()); + REQUIRE(tkz.peek().has_value()); + REQUIRE(tkz.peek()->value == lang::TokenV::tkEOF); +} + +TEST_CASE("Strings_Simple", "[strings][valid]") { + constexpr std::array lexemes = { + R"("a")", + R"("hello")", + R"("with spaces")", + R"("12345")", + R"Q("!@#$%^&*()")Q" + }; + + CommonLiteralsSuccess(lang::TokenV::tkString, lexemes); +} + +TEST_CASE("Strings_Escapes", "[strings][valid][escapes]") { + // Validate common escape sequences remain part of lexeme text. + constexpr std::array lexemes = { + R"("quote: \"")", + R"("backslash: \\")", + R"("newline: \n")", + R"("tab: \t")", + R"("mix: \"\\\n\t")" + }; + + CommonLiteralsSuccess(lang::TokenV::tkString, lexemes); +} + +TEST_CASE("Strings_OperatorsAdjacency", "[strings][operators]") { + // "foo"+"bar" -> tkString, opPlus, tkString + const std::string source = R"("foo"+"bar")"; + lang::Tokenizer tkz{ source }; + + auto t1 = tkz.peek(); + REQUIRE(t1.has_value()); + REQUIRE(t1->value == lang::TokenV::tkString); + REQUIRE(t1->strValue == R"("foo")"); + REQUIRE(tkz.consume().has_value()); + + auto t2 = tkz.peek(); + REQUIRE(t2.has_value()); + REQUIRE(t2->value == lang::TokenV::opPlus); + REQUIRE(tkz.consume().has_value()); + + auto t3 = tkz.peek(); + REQUIRE(t3.has_value()); + REQUIRE(t3->value == lang::TokenV::tkString); + REQUIRE(t3->strValue == R"("bar")"); + REQUIRE(tkz.consume().has_value()); + + auto eof = tkz.peek(); + REQUIRE(eof.has_value()); + REQUIRE(eof->value == lang::TokenV::tkEOF); +} + +TEST_CASE("Strings_Unterminated_Error", "[strings][error]") { + // Missing closing quote should yield an error. + const std::string source = "\"unterminated"; + lang::Tokenizer tkz{ source }; + + auto errTok = tkz.peek(); + REQUIRE_FALSE(errTok.has_value()); + const auto &err = errTok.error(); + + REQUIRE( + err.message.find("Invalid literal") != std::string::npos + ); +} + +TEST_CASE("Chars_Simple", "[chars][valid]") { + constexpr std::array lexemes = { + R"('a')", + R"('Z')", + R"('0')", + R"('_')", + R"('$')" + }; + + CommonLiteralsSuccess(lang::TokenV::tkCharacter, lexemes); +} + +TEST_CASE("Chars_Escapes", "[chars][valid][escapes]") { + constexpr std::array lexemes = { + R"('\n')", + R"('\t')", + R"('\\')", + R"('\'')" + }; + + CommonLiteralsSuccess(lang::TokenV::tkCharacter, lexemes); +} + +TEST_CASE("Chars_Invalid_Empty", "[chars][error]") { + const std::string source = "''"; + lang::Tokenizer tkz{ source }; + + auto errTok = tkz.peek(); + REQUIRE_FALSE(errTok.has_value()); + const auto &err = errTok.error(); + REQUIRE( + err.message.find("Invalid literal") != std::string::npos + ); +} + +TEST_CASE("Chars_Invalid_Multiple", "[chars][error]") { + const std::string source = "'ab'"; + lang::Tokenizer tkz{ source }; + + auto errTok = tkz.peek(); + REQUIRE_FALSE(errTok.has_value()); + const auto &err = errTok.error(); + REQUIRE( + err.message.find("Invalid literal") != std::string::npos + ); +} + +TEST_CASE("Chars_Unterminated", "[chars][error]") { + const std::string source = "'a"; + lang::Tokenizer tkz{ source }; + + auto errTok = tkz.peek(); + REQUIRE_FALSE(errTok.has_value()); + const auto &err = errTok.error(); + REQUIRE( + err.message.find("Invalid literal") != std::string::npos + ); +} + +TEST_CASE("Chars_InvalidEscape", "[chars][error][.escapes]") { + const std::string source = "'\\x'"; + lang::Tokenizer tkz{ source }; + + auto errTok = tkz.peek(); + REQUIRE_FALSE(errTok.has_value()); + const auto &err = errTok.error(); + REQUIRE( + err.message.find("Invalid literal") != std::string::npos + ); +} diff --git a/tests/cmake/dependencies.cmake b/tests/cmake/dependencies.cmake new file mode 100644 index 0000000..67d09cb --- /dev/null +++ b/tests/cmake/dependencies.cmake @@ -0,0 +1,18 @@ +include(FetchContent) + +# Get CPM +file( + DOWNLOAD + https://github.com/cpm-cmake/CPM.cmake/releases/download/v0.40.8/CPM.cmake + ${CMAKE_CURRENT_BINARY_DIR}/cmake/CPM.cmake + EXPECTED_HASH + SHA256=78ba32abdf798bc616bab7c73aac32a17bbd7b06ad9e26a6add69de8f3ae4791 +) +include(${CMAKE_CURRENT_BINARY_DIR}/cmake/CPM.cmake) + +# Get dependencies + +CPMAddPackage("gh:catchorg/Catch2@3.6.0") + +# Include Catch2 CMake scripts +list(APPEND CMAKE_MODULE_PATH ${Catch2_SOURCE_DIR}/extras) diff --git a/tests/include/Utils.hpp b/tests/include/Utils.hpp new file mode 100644 index 0000000..1f958ca --- /dev/null +++ b/tests/include/Utils.hpp @@ -0,0 +1,58 @@ +#pragma once + +#include + +#include + +template +requires( + std::is_convertible_v, std::string_view> and + std::is_same_v, std::ranges::range_value_t> +) +arti::lang::Generator> +InterleaveRanges(R1 &&r1, R2 &&r2) { + auto it1 = std::ranges::begin(r1); + auto end1 = std::ranges::end(r1); + + auto it2 = std::ranges::begin(r2); + auto end2 = std::ranges::end(r2); + + while (it1 != end1 && it2 != end2) { + yield *it1; + ++it1; + yield *it2; + ++it2; + } +} + +static arti::lang::Generator +WhitespaceGenerator(uint32_t maxLen = 5) { + constexpr std::array spaceChars{ ' ', '\t', '\n' }; + + std::string str; + std::random_device device; + std::mt19937 engine(device()); + std::uniform_int_distribution dist(1, maxLen); + std::uniform_int_distribution distChars(0, 2); + + str.reserve(maxLen); + + while (true) { + str.resize(0); + + auto sz = dist(engine); + + for (uint32_t i = 0; i < sz; ++i) { + str += spaceChars[distChars(engine)]; + } + + yield str; + } +} + +template +requires(std::is_same_v, std::string_view>) +static std::string SourceFromTokens(R &&tokens) { + return InterleaveRanges(tokens, WhitespaceGenerator(10)) | std::views::join | + std::ranges::to(); +}