From e1b9e054f31409e86740d2e6118dda866f657a66 Mon Sep 17 00:00:00 2001
From: erick-alcachofa <erick@artichoke.dev>
Date: Fri, 3 Oct 2025 12:54:41 -0600
Subject: [PATCH] feat(test, tokenizer): Add test suite, in Tokenizer fixed
 catched issues and range-based API Signed-off-by: erick-alcachofa
 <erick@artichoke.dev>

This commit introduces a comprehensive test suite for the tokenizer
using the Catch2 framework. To support this and improve the project
structure, the build system and the tokenizer's API have been
significantly updated.

- Removed `cmake/testing.cmake` as it's no longer needed.
- A new `TokenizerRange` class provides a C++20-style range interface,
  allowing for simple `for-each` loop iteration over tokens. This is
  used extensively in the new tests.

- The CMake build system has been refactored:
    - An `ENABLE_TESTING` option (OFF by default) now controls whether
      the test suite is built.
    - The core library is now compiled into an object library, which is
      then used to produce both a shared (`.so`/`.dll`) and a static
      (`.a`/`.lib`) library. This improves build efficiency and provides
      more flexible linkage options.
    - The frontend executable now links against the static version of
      the library.

- Implemented tests for tokenizer using Catch2 framework, covering
  various cases like identifiers, keywords, numbers, etc. that already
  catched some issues in current implementation.

- Several parsing bugs and edge cases in the tokenizer were fixed,
  including the handling of unterminated strings and invalid numeric
  literals. The README has been updated with instructions for building
  and running tests.
---
 CMakeLists.txt                                |  23 ++-
 README.md                                     |   4 +
 cmake/testing.cmake                           |   0
 frontend/CMakeLists.txt                       |   2 +-
 lib/CMakeLists.txt                            |  76 +++++---
 lib/include/artichoke/Tokenizer/Tokenizer.hpp |   4 +-
 .../artichoke/Tokenizer/TokenizerRange.hpp    |  71 +++++++
 lib/src/Tokenizer/Tokenizer.cpp               |  64 ++++++-
 lib/src/Tokenizer/TokenizerRange.cpp          |  84 +++++++++
 tests/CMakeLists.txt                          |  20 ++
 tests/Tokenizer/CMakeLists.txt                |  16 ++
 tests/Tokenizer/src/Api.cpp                   |  99 ++++++++++
 tests/Tokenizer/src/Comments.cpp              |  87 +++++++++
 tests/Tokenizer/src/Identifiers.cpp           | 127 +++++++++++++
 tests/Tokenizer/src/Keywords.cpp              |  93 +++++++++
 tests/Tokenizer/src/Numbers.cpp               | 171 +++++++++++++++++
 tests/Tokenizer/src/Operators.cpp             | 178 ++++++++++++++++++
 tests/Tokenizer/src/Strings.cpp               | 168 +++++++++++++++++
 tests/cmake/dependencies.cmake                |  18 ++
 tests/include/Utils.hpp                       |  58 ++++++
 20 files changed, 1323 insertions(+), 40 deletions(-)
 delete mode 100644 cmake/testing.cmake
 create mode 100644 lib/include/artichoke/Tokenizer/TokenizerRange.hpp
 create mode 100644 lib/src/Tokenizer/TokenizerRange.cpp
 create mode 100644 tests/CMakeLists.txt
 create mode 100644 tests/Tokenizer/CMakeLists.txt
 create mode 100644 tests/Tokenizer/src/Api.cpp
 create mode 100644 tests/Tokenizer/src/Comments.cpp
 create mode 100644 tests/Tokenizer/src/Identifiers.cpp
 create mode 100644 tests/Tokenizer/src/Keywords.cpp
 create mode 100644 tests/Tokenizer/src/Numbers.cpp
 create mode 100644 tests/Tokenizer/src/Operators.cpp
 create mode 100644 tests/Tokenizer/src/Strings.cpp
 create mode 100644 tests/cmake/dependencies.cmake
 create mode 100644 tests/include/Utils.hpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1239d3a..e47e6f1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,19 +15,14 @@ set(PROJECT_URL "lang.artichoke.dev")
 set(PROJECT_AUTHOR "erick-alcachofa")
 set(PROJECT_AUTHOR_GITHUB "@erick-alcachofa")
 
-include(cmake/testing.cmake)
+option(ENABLE_TESTING "Enable build of tests for library" OFF)
 
 add_subdirectory(lib)
 add_subdirectory(frontend)
 
 install(
-  TARGETS frontend library
-  EXPORT  artichokeTargets
-  FILE_SET HEADERS
-  LIBRARY  DESTINATION lib
-  ARCHIVE  DESTINATION lib
+  TARGETS frontend
   RUNTIME  DESTINATION bin
-  INCLUDES DESTINATION include
 )
 
 get_target_property(
@@ -43,6 +38,16 @@ install(
   )"
 )
 
+install(
+  TARGETS library library_static
+  EXPORT  artichokeTargets
+  FILE_SET HEADERS
+  LIBRARY  DESTINATION lib
+  ARCHIVE  DESTINATION lib
+  RUNTIME  DESTINATION bin
+  INCLUDES DESTINATION include
+)
+
 install(
   EXPORT artichokeTargets
   FILE artichokeTargets.cmake
@@ -62,3 +67,7 @@ install(
         "${CMAKE_CURRENT_BINARY_DIR}/artichokeConfigVersion.cmake"
   DESTINATION lib/cmake/artichoke
 )
+
+if(ENABLE_TESTING)
+  add_subdirectory(tests)
+endif()
diff --git a/README.md b/README.md
index a4e5e44..a5d9dfd 100644
--- a/README.md
+++ b/README.md
@@ -46,6 +46,7 @@ grammar is stable, and the next step is the implementation of a compiler
 git clone https://git.artichoke.dev/me/artichoke-lang.git
 
 # Configure cmake
+# Optionally add -DENABLE_TESTING=ON for building tests
 cmake -DCMAKE_BUILD_TYPE=Release -S . -B build
 
 # Build the project
@@ -54,6 +55,9 @@ cmake --build build
 # Run the binary
 ./build/frontend/artichoke-c
 
+# Run the tests if enabled
+ctest --test-dir build/tests --output-on-failure
+
 # Install if wanted
 cmake --install build --prefix=/usr/local
 
diff --git a/cmake/testing.cmake b/cmake/testing.cmake
deleted file mode 100644
index e69de29..0000000
diff --git a/frontend/CMakeLists.txt b/frontend/CMakeLists.txt
index 2bacfdb..14091df 100644
--- a/frontend/CMakeLists.txt
+++ b/frontend/CMakeLists.txt
@@ -34,5 +34,5 @@ target_include_directories(
 
 target_link_libraries(
   frontend PUBLIC
-    library
+    artichoke::library_static
 )
diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt
index a29bf00..938a2c4 100644
--- a/lib/CMakeLists.txt
+++ b/lib/CMakeLists.txt
@@ -5,17 +5,17 @@ file(GLOB_RECURSE ARTI_LIB_HEADERS "include/**.hpp")
 file(GLOB_RECURSE ARTI_LIB_GEN_HEADERS "${CMAKE_CURRENT_BINARY_DIR}/include/**.hpp")
 
 add_library(
-  library SHARED
+  objs OBJECT
     ${ARTI_LIB_SOURCES}
 )
 
 set_target_properties(
-  library PROPERTIES
-  OUTPUT_NAME "artichoke"
+  objs PROPERTIES
+  POSITION_INDEPENDENT_CODE 1
 )
 
 target_compile_options(
-  library PRIVATE
+  objs PRIVATE
     -pedantic
     -Wall
     -Wextra
@@ -30,24 +30,58 @@ target_compile_options(
     -Wno-unused
 )
 
-target_sources(
-  library PUBLIC
-  FILE_SET HEADERS
-  BASE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/include
-  FILES
-    ${ARTI_LIB_HEADERS}
-)
-
-target_sources(
-  library PUBLIC
-  FILE_SET HEADERS
-  BASE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/include
-  FILES
-    ${ARTI_LIB_GEN_HEADERS}
-)
-
 target_include_directories(
-  library PUBLIC
+  objs PUBLIC
     $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
     $<INSTALL_INTERFACE:>
 )
+
+add_library(
+  library SHARED
+    $<TARGET_OBJECTS:objs>
+)
+
+add_library(
+  artichoke::library ALIAS
+    library
+)
+
+set_target_properties(
+  library PROPERTIES
+  OUTPUT_NAME "artichoke"
+)
+
+add_library(
+  library_static STATIC
+    $<TARGET_OBJECTS:objs>
+)
+
+add_library(
+  artichoke::library_static ALIAS
+    library_static
+)
+
+set_target_properties(
+  library_static PROPERTIES
+  OUTPUT_NAME "artichoke"
+)
+
+set(LIB_TARGETS objs library library_static)
+
+foreach(TGET IN LISTS LIB_TARGETS)
+  target_sources(
+    ${TGET} INTERFACE
+    FILE_SET HEADERS
+    BASE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/include
+    FILES
+      ${ARTI_LIB_HEADERS}
+  )
+
+  target_sources(
+    ${TGET} INTERFACE
+    FILE_SET HEADERS
+    BASE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/include
+    FILES
+      ${ARTI_LIB_GEN_HEADERS}
+  )
+endforeach()
diff --git a/lib/include/artichoke/Tokenizer/Tokenizer.hpp b/lib/include/artichoke/Tokenizer/Tokenizer.hpp
index 3ea09a5..7269d57 100644
--- a/lib/include/artichoke/Tokenizer/Tokenizer.hpp
+++ b/lib/include/artichoke/Tokenizer/Tokenizer.hpp
@@ -1,12 +1,12 @@
 #pragma once
 
 #include <deque>
-#include <vector>
 
 #include <artichoke/Util/Expected.hpp>
 #include <artichoke/Coroutine/Generator.hpp>
 
 #include <artichoke/Tokenizer/Token.hpp>
+#include <artichoke/Tokenizer/TokenizerRange.hpp>
 
 namespace arti::lang {
 
@@ -30,6 +30,8 @@ namespace arti::lang {
 
     void swap(Tokenizer &other) noexcept;
 
+    TokenizerRange range() noexcept;
+
    private:
     Generator<Expected<Token>> tokenize();
 
diff --git a/lib/include/artichoke/Tokenizer/TokenizerRange.hpp b/lib/include/artichoke/Tokenizer/TokenizerRange.hpp
new file mode 100644
index 0000000..63891d5
--- /dev/null
+++ b/lib/include/artichoke/Tokenizer/TokenizerRange.hpp
@@ -0,0 +1,71 @@
+#pragma once
+
+#include <artichoke/Util/Expected.hpp>
+#include <artichoke/Tokenizer/Token.hpp>
+
+namespace arti::lang {
+  struct Tokenizer;
+
+  struct [[nodiscard]] TokenizerRange {
+    friend struct Tokenizer;
+    struct Iterator;
+    struct Sentinel;
+
+    using iterator_type = Iterator;
+    using sentinel_type = Sentinel;
+
+    TokenizerRange(TokenizerRange &&) noexcept;
+    TokenizerRange &operator=(TokenizerRange &&) noexcept;
+
+    TokenizerRange(const TokenizerRange &) noexcept = delete;
+    TokenizerRange &operator=(const TokenizerRange &) noexcept = delete;
+
+    Iterator begin();
+    Sentinel end() const noexcept;
+
+    struct Iterator {
+      friend struct TokenizerRange;
+      using iterator_category = std::input_iterator_tag;
+      using difference_type = std::ptrdiff_t;
+
+      using ValueType = Expected<Token>;
+      using ReferenceType = ValueType &;
+      using PointerType = ValueType *;
+
+      using value_type = ValueType;
+      using pointer_type = PointerType;
+
+      Iterator(Iterator &&) noexcept;
+      Iterator &operator=(Iterator &&) noexcept;
+
+      Iterator(const Iterator &) noexcept = delete;
+      Iterator &operator=(const Iterator &) noexcept = delete;
+
+      Iterator &operator++();
+
+      void operator++(int);
+
+      ReferenceType operator*() const noexcept;
+      PointerType operator->() const noexcept;
+
+      friend bool operator==(const Iterator &, Sentinel);
+      friend bool operator==(Sentinel, const Iterator &);
+
+      friend bool operator!=(const Iterator &, Sentinel);
+      friend bool operator!=(Sentinel, const Iterator &);
+
+     private:
+      Iterator(Tokenizer *tokenizer) noexcept;
+
+      Tokenizer *tokenizer;
+      mutable Expected<Token> cvalue;
+    };
+
+    struct Sentinel {};
+
+   private:
+    TokenizerRange(Tokenizer *tokenizer);
+
+    Tokenizer *tokenizer;
+  };
+}
diff --git a/lib/src/Tokenizer/Tokenizer.cpp b/lib/src/Tokenizer/Tokenizer.cpp
index 09f0fec..c74574c 100644
--- a/lib/src/Tokenizer/Tokenizer.cpp
+++ b/lib/src/Tokenizer/Tokenizer.cpp
@@ -1,8 +1,8 @@
 #include <artichoke/Tokenizer/Tokenizer.hpp>
 
-#include <print>
 #include <utility>
 
+#include <artichoke/Tokenizer/TokenizerRange.hpp>
 #include <artichoke/Util/Strings.hpp>
 #include <artichoke/Util/Demangle.hpp>
 #include <artichoke/Util/TrieMap.hpp>
@@ -36,8 +36,22 @@ namespace arti::lang {
     return *this;
   }
 
+  TokenizerRange Tokenizer::range() noexcept {
+    return TokenizerRange{ this };
+  }
+
   Expected<void> Tokenizer::consume(std::size_t n) noexcept {
-    while (not tokensBuffer.empty()) {
+    while (n > 0) {
+      if (tokensBuffer.empty()) {
+        if (auto ok = peek(); ! ok) {
+          return Unexpected<>{ ok.error() };
+        }
+
+        if (finished()) {
+          break;
+        }
+      }
+
       tokensBuffer.pop_front();
       n -= 1;
     }
@@ -94,7 +108,11 @@ namespace arti::lang {
 
   bool Tokenizer::finished() const noexcept {
     if (tokensGenerator.finished()) {
-      return tokensBuffer.empty();
+      if (!tokensBuffer.empty()) {
+        return tokensBuffer.front().value == TokenV::tkEOF;
+      }
+
+      return true;
     }
 
     return false;
@@ -340,6 +358,7 @@ namespace arti::lang {
     }
 
     if (*iter == '.') {
+      auto dotIter = iter;
       forward();
 
       while (iter != source.end()) {
@@ -350,6 +369,21 @@ namespace arti::lang {
         forward();
       }
 
+      if ((iter - dotIter) == 1) {
+        /* Revert to dot */
+        --iter;
+        --column;
+
+        return langException<ExceptCode::ecInvalidLiteral>(
+          line,
+          column,
+          "digit",
+          iter == source.end()
+            ? "EOF"
+            : std::string{ *(iter + 1) }
+        );
+      }
+
       return Token{
         TokenV::tkDecimal,
         cLine,
@@ -390,7 +424,6 @@ namespace arti::lang {
       }
 
       if (*iter == '"') {
-        forward();
         break;
       }
 
@@ -412,12 +445,23 @@ namespace arti::lang {
       forward();
     }
 
-    return Token{
-      TokenV::tkString,
-      cLine,
-      cColumn,
-      { stIter, iter }
-    };
+    if (*iter == '"') {
+      forward();
+
+      return Token{
+        TokenV::tkString,
+        cLine,
+        cColumn,
+        { stIter, iter }
+      };
+    }
+
+    return langException<ExceptCode::ecInvalidLiteral>(
+      line,
+      column,
+      "end of string (\")",
+      "EOF"
+    );
   }
 
   Expected<Token> Tokenizer::readCharacter() {
diff --git a/lib/src/Tokenizer/TokenizerRange.cpp b/lib/src/Tokenizer/TokenizerRange.cpp
new file mode 100644
index 0000000..299c816
--- /dev/null
+++ b/lib/src/Tokenizer/TokenizerRange.cpp
@@ -0,0 +1,84 @@
+#include <artichoke/Tokenizer/TokenizerRange.hpp>
+
+#include <utility>
+
+#include <artichoke/Tokenizer/Tokenizer.hpp>
+
+namespace arti::lang {
+
+  using Iterator = TokenizerRange::Iterator;
+  using Sentinel = TokenizerRange::Sentinel;
+
+  TokenizerRange::TokenizerRange(Tokenizer *tokenizer)
+    : tokenizer(tokenizer) { }
+
+  TokenizerRange::TokenizerRange(TokenizerRange &&other) noexcept
+    : tokenizer(std::exchange(other.tokenizer, nullptr)) { }
+
+  TokenizerRange &TokenizerRange::operator=(TokenizerRange &&other) noexcept {
+    this->tokenizer = std::exchange(other.tokenizer, nullptr);
+    return *this;
+  }
+
+  Iterator TokenizerRange::begin() {
+    return Iterator{ this->tokenizer };
+  }
+
+  Sentinel TokenizerRange::end() const noexcept {
+    return Sentinel{};
+  }
+
+  Iterator::Iterator(Tokenizer *tokenizer) noexcept
+    : tokenizer(tokenizer)
+    , cvalue(tokenizer->peek()) { }
+
+  Iterator::Iterator(Iterator &&other) noexcept
+    : tokenizer(std::exchange(other.tokenizer, nullptr))
+    , cvalue(std::exchange(other.cvalue, {})) { }
+
+  Iterator &Iterator::operator=(Iterator &&other) noexcept {
+    this->tokenizer = std::exchange(other.tokenizer, nullptr);
+    this->cvalue = std::exchange(other.cvalue, {});
+    return *this;
+  }
+
+  Iterator &Iterator::operator++() {
+    if (this->cvalue) {
+      std::ignore = tokenizer->consume();
+    }
+    this->cvalue = tokenizer->peek();
+    return *this;
+  }
+
+  void Iterator::operator++(int) {
+    std::ignore = this->operator++();
+  }
+
+  Iterator::ReferenceType Iterator::operator*() const noexcept {
+    return this->cvalue;
+  }
+
+  Iterator::PointerType Iterator::operator->() const noexcept {
+    return &this->cvalue;
+  }
+
+  bool operator==(const Iterator &it, Sentinel) {
+    if (it.tokenizer->finished()) {
+      return true;
+    }
+    return !it.cvalue.has_value() || it.cvalue->value == TokenV::tkEOF;
+  }
+
+  bool operator==(Sentinel, const Iterator &it) {
+    return it == Sentinel{};
+  }
+
+  bool operator!=(const Iterator &it, Sentinel) {
+    return !(it == Sentinel{});
+  }
+
+  bool operator!=(Sentinel, const Iterator &it) {
+    return !(it == Sentinel{});
+  }
+
+} // namespace arti::lang
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
new file mode 100644
index 0000000..14b6ab3
--- /dev/null
+++ b/tests/CMakeLists.txt
@@ -0,0 +1,20 @@
+include(cmake/dependencies.cmake)
+
+enable_testing()
+
+add_library(test INTERFACE)
+
+target_include_directories(
+  test INTERFACE
+    ${CMAKE_CURRENT_SOURCE_DIR}/include
+)
+
+target_link_libraries(
+  test INTERFACE
+    artichoke::library
+    Catch2::Catch2WithMain
+)
+
+include(Catch)
+
+add_subdirectory(Tokenizer)
diff --git a/tests/Tokenizer/CMakeLists.txt b/tests/Tokenizer/CMakeLists.txt
new file mode 100644
index 0000000..7b30d1f
--- /dev/null
+++ b/tests/Tokenizer/CMakeLists.txt
@@ -0,0 +1,16 @@
+file(GLOB_RECURSE TOKENIZER_TEST_SRC "src/**.cpp")
+
+add_executable(
+  test-tokenizer
+    ${TOKENIZER_TEST_SRC}
+)
+
+target_link_libraries(
+  test-tokenizer PRIVATE
+    test
+)
+
+catch_discover_tests(
+  test-tokenizer
+  TEST_PREFIX "Tokenizer."
+)
diff --git a/tests/Tokenizer/src/Api.cpp b/tests/Tokenizer/src/Api.cpp
new file mode 100644
index 0000000..68b6f50
--- /dev/null
+++ b/tests/Tokenizer/src/Api.cpp
@@ -0,0 +1,99 @@
+#include <catch2/catch_test_macros.hpp>
+
+#include <string>
+#include <vector>
+
+#include <artichoke/Tokenizer/Tokenizer.hpp>
+
+namespace lang = arti::lang;
+
+TEST_CASE("API_PeekOffset", "[api][peek]") {
+  const std::string source = "a b c";
+  lang::Tokenizer tkz{ source };
+
+  auto t0 = tkz.peek(0);
+  REQUIRE(t0.has_value());
+  REQUIRE(t0->value == lang::TokenV::tkIdentifier);
+  REQUIRE(t0->strValue == "a");
+
+  auto t1 = tkz.peek(1);
+  REQUIRE(t1.has_value());
+  REQUIRE(t1->value == lang::TokenV::tkIdentifier);
+  REQUIRE(t1->strValue == "b");
+
+  auto t2 = tkz.peek(2);
+  REQUIRE(t2.has_value());
+  REQUIRE(t2->value == lang::TokenV::tkIdentifier);
+  REQUIRE(t2->strValue == "c");
+
+  auto t3 = tkz.peek(3);
+  REQUIRE(t3.has_value());
+  REQUIRE(t3->value == lang::TokenV::tkEOF);
+
+  // Ensure nothing was consumed by peeks
+  REQUIRE_FALSE(tkz.finished());
+  auto t0_again = tkz.peek();
+  REQUIRE(t0_again.has_value());
+  REQUIRE(t0_again->value == lang::TokenV::tkIdentifier);
+  REQUIRE(t0_again->strValue == "a");
+}
+
+TEST_CASE("API_ConsumeAndFinishedSemantics", "[api][consume][finished]") {
+  const std::string source = "x y z";
+  lang::Tokenizer tkz{ source };
+
+  // consume 2 tokens: x, y
+  REQUIRE(tkz.consume(2).has_value());
+  REQUIRE_FALSE(tkz.finished());
+
+  auto t = tkz.peek();
+  REQUIRE(t.has_value());
+  REQUIRE(t->value == lang::TokenV::tkIdentifier);
+  REQUIRE(t->strValue == "z");
+
+  // consume last token
+  REQUIRE(tkz.consume().has_value());
+  // finished() returns true only after EOF token has been produced
+  REQUIRE_FALSE(tkz.finished());
+
+  auto eof = tkz.peek();
+  REQUIRE(eof.has_value());
+  REQUIRE(eof->value == lang::TokenV::tkEOF);
+  REQUIRE(tkz.finished());
+}
+
+TEST_CASE("API_RangeIterationStopsAtEOF", "[api][range]") {
+  const std::string source =
+    "let ident := 42 /* skip this */ + 1";
+  lang::Tokenizer tkz{ source };
+
+  std::vector<lang::TokenV> kinds;
+  std::vector<std::string_view> lex;
+
+  for (auto token : tkz.range()) {
+    REQUIRE(token.has_value());
+    kinds.push_back(token->value);
+    lex.push_back(token->strValue);
+  }
+
+  // Expected sequence: kwLet, tkIdentifier("ident"), opLabel, tkInteger("42"), opPlus, tkInteger("1")
+  REQUIRE(kinds.size() == 6);
+  REQUIRE(kinds[0] == lang::TokenV::kwLet);
+  REQUIRE(kinds[1] == lang::TokenV::tkIdentifier);
+  REQUIRE(kinds[2] == lang::TokenV::opLabel);
+  REQUIRE(kinds[3] == lang::TokenV::tkInteger);
+  REQUIRE(kinds[4] == lang::TokenV::opPlus);
+  REQUIRE(kinds[5] == lang::TokenV::tkInteger);
+
+  REQUIRE(lex[0] == "let");
+  REQUIRE(lex[1] == "ident");
+  REQUIRE(lex[2] == ":=");
+  REQUIRE(lex[3] == "42");
+  REQUIRE(lex[4] == "+");
+  REQUIRE(lex[5] == "1");
+
+  // After iterating the range, peek should yield EOF
+  auto eof = tkz.peek();
+  REQUIRE(eof.has_value());
+  REQUIRE(eof->value == lang::TokenV::tkEOF);
+}
diff --git a/tests/Tokenizer/src/Comments.cpp b/tests/Tokenizer/src/Comments.cpp
new file mode 100644
index 0000000..30cbdc6
--- /dev/null
+++ b/tests/Tokenizer/src/Comments.cpp
@@ -0,0 +1,87 @@
+#include <catch2/catch_test_macros.hpp>
+
+#include <string>
+
+#include <artichoke/Tokenizer/Tokenizer.hpp>
+#include <artichoke/Util/Expected.hpp>
+
+namespace lang = arti::lang;
+
+TEST_CASE("Comments_BlockSkipped", "[comments][block][skip]") {
+  // Ensure that block comments are ignored and do not emit tokens.
+  const std::string source = "foo /* a block comment with symbols 123 !@# */ bar";
+
+  lang::Tokenizer tkz{source};
+
+  auto t1 = tkz.peek();
+  REQUIRE(t1.has_value());
+  REQUIRE(t1->value == lang::TokenV::tkIdentifier);
+  REQUIRE(t1->strValue == "foo");
+
+  REQUIRE(tkz.consume().has_value());
+
+  auto t2 = tkz.peek();
+  REQUIRE(t2.has_value());
+  REQUIRE(t2->value == lang::TokenV::tkIdentifier);
+  REQUIRE(t2->strValue == "bar");
+
+  REQUIRE(tkz.consume().has_value());
+
+  auto eof = tkz.peek();
+  REQUIRE(eof.has_value());
+  REQUIRE(eof->value == lang::TokenV::tkEOF);
+}
+
+TEST_CASE("Comments_BlockMultiline", "[comments][block][multiline]") {
+  const std::string source =
+      "alpha /* line1\n"
+      "line2\n"
+      "line3 */ beta";
+
+  lang::Tokenizer tkz{source};
+
+  auto t1 = tkz.peek();
+  REQUIRE(t1.has_value());
+  REQUIRE(t1->value == lang::TokenV::tkIdentifier);
+  REQUIRE(t1->strValue == "alpha");
+  REQUIRE(tkz.consume().has_value());
+
+  auto t2 = tkz.peek();
+  REQUIRE(t2.has_value());
+  REQUIRE(t2->value == lang::TokenV::tkIdentifier);
+  REQUIRE(t2->strValue == "beta");
+  REQUIRE(tkz.consume().has_value());
+
+  auto eof = tkz.peek();
+  REQUIRE(eof.has_value());
+  REQUIRE(eof->value == lang::TokenV::tkEOF);
+}
+
+TEST_CASE("Comments_UnterminatedBlock_Error", "[comments][block][error]") {
+  // Unterminated block comments should surface an error from the tokenizer.
+  const std::string source = "foo /* this never ends...";
+
+  lang::Tokenizer tkz{source};
+
+  auto t1 = tkz.peek();
+  REQUIRE(t1.has_value());
+  REQUIRE(t1->value == lang::TokenV::tkIdentifier);
+  REQUIRE(t1->strValue == "foo");
+  REQUIRE(tkz.consume().has_value());
+
+  auto errTok = tkz.peek();
+  REQUIRE_FALSE(errTok.has_value());
+
+  // Check error message mentions invalid comment.
+  const auto &err = errTok.error();
+  REQUIRE(err.message.find("Invalid comment") != std::string::npos);
+}
+
+TEST_CASE("Comments_SingleLineUnsupported_Skip", "[comments][.line]") {
+  // The tokenizer currently does NOT support '//' comments.
+  // Keep this test as a placeholder and mark it skipped to avoid failures.
+  SKIP("Single-line '//' comments are not supported yet by the tokenizer");
+  const std::string source = "foo // comment\n bar";
+  lang::Tokenizer tkz{source};
+  (void)tkz; // silence unused
+}
diff --git a/tests/Tokenizer/src/Identifiers.cpp b/tests/Tokenizer/src/Identifiers.cpp
new file mode 100644
index 0000000..b34317d
--- /dev/null
+++ b/tests/Tokenizer/src/Identifiers.cpp
@@ -0,0 +1,127 @@
+#include <catch2/catch_test_macros.hpp>
+
+#include <array>
+#include <string>
+#include <vector>
+
+#include <artichoke/Tokenizer/Tokenizer.hpp>
+#include <Utils.hpp>
+
+namespace lang = arti::lang;
+
+template <std::size_t N>
+static void CommonIdentifiersSuccess(
+  const std::array<std::string_view, N> &ids
+) {
+  const std::string source = SourceFromTokens(ids);
+
+  std::size_t it = 0;
+  lang::Tokenizer tkz{ source };
+
+  for (auto token : tkz.range()) {
+    REQUIRE(token.has_value());
+    REQUIRE(token->value == lang::TokenV::tkIdentifier);
+    REQUIRE(token->strValue == ids.at(it++));
+  }
+
+  REQUIRE(it == ids.size());
+  REQUIRE(tkz.peek().has_value());
+  REQUIRE(tkz.peek()->value == lang::TokenV::tkEOF);
+}
+
+TEST_CASE("Identifiers_Basic", "[identifiers][valid]") {
+  constexpr std::array<std::string_view, 8> ids = {
+    "a", "abc", "a_b", "snake_case", "camelCase", "PascalCase", "_id", "with123"
+  };
+
+  CommonIdentifiersSuccess(ids);
+}
+
+TEST_CASE("Identifiers_DigitsAfterFirst", "[identifiers][valid]") {
+  constexpr std::array<std::string_view, 6> ids = {
+    "a1", "abc123", "_a1_b2", "v2", "x9y8z7", "i18n"
+  };
+
+  CommonIdentifiersSuccess(ids);
+}
+
+TEST_CASE("Identifiers_Long", "[identifiers][valid][long]") {
+  // Create a long identifier to ensure tokenizer handles large spans.
+  std::string longId(512, 'a');
+  std::vector<std::string_view> toks{ longId };
+
+  const std::string source = SourceFromTokens(toks);
+  lang::Tokenizer tkz{ source };
+
+  auto t = tkz.peek();
+  REQUIRE(t.has_value());
+  REQUIRE(t->value == lang::TokenV::tkIdentifier);
+  REQUIRE(t->strValue == longId);
+
+  REQUIRE(tkz.consume().has_value());
+  auto eof = tkz.peek();
+  REQUIRE(eof.has_value());
+  REQUIRE(eof->value == lang::TokenV::tkEOF);
+}
+
+TEST_CASE("Identifiers_WithOperators", "[identifiers][operators]") {
+  // '$' and '?' are operators, not identifier characters.
+  // '$foo' -> opMut, tkIdentifier("foo")
+  // '?bar' -> opOpt, tkIdentifier("bar")
+  const std::string source = "$foo ?bar";
+
+  lang::Tokenizer tkz{ source };
+
+  auto t1 = tkz.peek();
+  REQUIRE(t1.has_value());
+  REQUIRE(t1->value == lang::TokenV::opMut);
+  REQUIRE(tkz.consume().has_value());
+
+  auto t2 = tkz.peek();
+  REQUIRE(t2.has_value());
+  REQUIRE(t2->value == lang::TokenV::tkIdentifier);
+  REQUIRE(t2->strValue == "foo");
+  REQUIRE(tkz.consume().has_value());
+
+  auto t3 = tkz.peek();
+  REQUIRE(t3.has_value());
+  REQUIRE(t3->value == lang::TokenV::opOpt);
+  REQUIRE(tkz.consume().has_value());
+
+  auto t4 = tkz.peek();
+  REQUIRE(t4.has_value());
+  REQUIRE(t4->value == lang::TokenV::tkIdentifier);
+  REQUIRE(t4->strValue == "bar");
+  REQUIRE(tkz.consume().has_value());
+
+  auto eof = tkz.peek();
+  REQUIRE(eof.has_value());
+  REQUIRE(eof->value == lang::TokenV::tkEOF);
+}
+
+TEST_CASE("Identifiers_DotAccess", "[identifiers][dot]") {
+  const std::string source = "foo.bar";
+
+  lang::Tokenizer tkz{ source };
+
+  auto t1 = tkz.peek();
+  REQUIRE(t1.has_value());
+  REQUIRE(t1->value == lang::TokenV::tkIdentifier);
+  REQUIRE(t1->strValue == "foo");
+  REQUIRE(tkz.consume().has_value());
+
+  auto t2 = tkz.peek();
+  REQUIRE(t2.has_value());
+  REQUIRE(t2->value == lang::TokenV::opDot);
+  REQUIRE(tkz.consume().has_value());
+
+  auto t3 = tkz.peek();
+  REQUIRE(t3.has_value());
+  REQUIRE(t3->value == lang::TokenV::tkIdentifier);
+  REQUIRE(t3->strValue == "bar");
+  REQUIRE(tkz.consume().has_value());
+
+  auto eof = tkz.peek();
+  REQUIRE(eof.has_value());
+  REQUIRE(eof->value == lang::TokenV::tkEOF);
+}
diff --git a/tests/Tokenizer/src/Keywords.cpp b/tests/Tokenizer/src/Keywords.cpp
new file mode 100644
index 0000000..fd85cec
--- /dev/null
+++ b/tests/Tokenizer/src/Keywords.cpp
@@ -0,0 +1,93 @@
+#include <catch2/catch_test_macros.hpp>
+
+#include <array>
+
+#include <artichoke/Tokenizer/Tokenizer.hpp>
+
+#include <Utils.hpp>
+
+namespace lang = arti::lang;
+
+template <std::size_t N>
+static void CommonKeywordsSuccess(
+  const std::array<std::string_view, N> &lexemes,
+  const std::array<lang::TokenV, N> &kinds
+) {
+  static_assert(N > 0, "Must provide at least one keyword");
+  const std::string source = SourceFromTokens(lexemes);
+
+  std::size_t it = 0;
+  lang::Tokenizer tkz{ source };
+
+  for (auto token : tkz.range()) {
+    REQUIRE(token.has_value());
+    REQUIRE(token->value == kinds.at(it));
+    REQUIRE(token->strValue == lexemes.at(it));
+    ++it;
+  }
+
+  REQUIRE(it == lexemes.size());
+  REQUIRE(tkz.peek().has_value());
+  REQUIRE(tkz.peek()->value == lang::TokenV::tkEOF);
+}
+
+TEST_CASE("Keywords_AllRecognized", "[keywords][valid]") {
+  constexpr std::array<std::string_view, 31> lexemes = {
+    "_","or","not","and","if","else","fn","enum","struct","def","let","for",
+    "loop","break","continue","while","match","switch","return","unreachable",
+    "defer","errdefer","true","false","null","this","import","export","module","using","this"
+  };
+
+  constexpr std::array<lang::TokenV, 31> kinds = {
+    lang::TokenV::kwUnderscore,
+    lang::TokenV::kwOr,
+    lang::TokenV::kwNot,
+    lang::TokenV::kwAnd,
+    lang::TokenV::kwIf,
+    lang::TokenV::kwElse,
+    lang::TokenV::kwFn,
+    lang::TokenV::kwEnum,
+    lang::TokenV::kwStruct,
+    lang::TokenV::kwDef,
+    lang::TokenV::kwLet,
+    lang::TokenV::kwFor,
+    lang::TokenV::kwLoop,
+    lang::TokenV::kwBreak,
+    lang::TokenV::kwContinue,
+    lang::TokenV::kwWhile,
+    lang::TokenV::kwMatch,
+    lang::TokenV::kwSwitch,
+    lang::TokenV::kwReturn,
+    lang::TokenV::kwUnreachable,
+    lang::TokenV::kwDefer,
+    lang::TokenV::kwErrDefer,
+    lang::TokenV::kwTrue,
+    lang::TokenV::kwFalse,
+    lang::TokenV::kwNull,
+    lang::TokenV::kwThis,
+    lang::TokenV::kwImport,
+    lang::TokenV::kwExport,
+    lang::TokenV::kwModule,
+    lang::TokenV::kwUsing,
+    lang::TokenV::kwThis,
+  };
+
+  CommonKeywordsSuccess(lexemes, kinds);
+}
+
+TEST_CASE("Keywords_PrecedenceOverIdentifiers", "[keywords][precedence]") {
+  // Ensure that keywords are recognized as keywords, while longer names remain identifiers.
+  constexpr std::array<std::string_view, 6> lexemes = {
+    "if", "iff", "return", "returnX", "_", "_id"
+  };
+  constexpr std::array<lang::TokenV, 6> kinds = {
+    lang::TokenV::kwIf,        // "if" is a keyword
+    lang::TokenV::tkIdentifier,// "iff" should be an identifier
+    lang::TokenV::kwReturn,    // "return" is a keyword
+    lang::TokenV::tkIdentifier,// "returnX" is not a keyword
+    lang::TokenV::kwUnderscore,// "_" is a keyword in this language
+    lang::TokenV::tkIdentifier // "_id" is a regular identifier
+  };
+
+  CommonKeywordsSuccess(lexemes, kinds);
+}
diff --git a/tests/Tokenizer/src/Numbers.cpp b/tests/Tokenizer/src/Numbers.cpp
new file mode 100644
index 0000000..1fac2f5
--- /dev/null
+++ b/tests/Tokenizer/src/Numbers.cpp
@@ -0,0 +1,171 @@
+#include <catch2/catch_test_macros.hpp>
+
+#include <array>
+
+#include <artichoke/Tokenizer/Tokenizer.hpp>
+
+#include <Utils.hpp>
+
+namespace lang = arti::lang;
+
+template <std::size_t N>
+static void CommonIntegersSuccess(
+  lang::TokenV type,
+  const std::array<std::string_view, N> &expected
+) {
+  const std::string source = SourceFromTokens(expected);
+
+  std::size_t it = 0;
+  lang::Tokenizer tkz{ source };
+
+  for (auto token : tkz.range()) {
+    REQUIRE(token.has_value());
+    REQUIRE(token->value == type);
+    REQUIRE(token->strValue == expected.at(it++));
+  }
+
+  REQUIRE(it == expected.size());
+  REQUIRE(tkz.peek().has_value());
+  REQUIRE(tkz.peek()->value == lang::TokenV::tkEOF);
+}
+
+TEST_CASE("Numbers_Integers", "[integers][valid]") {
+  constexpr std::array<std::string_view, 6> expected = {
+    "0", "1", "42", "123456", "98712390", "12381723912465471"
+  };
+
+  CommonIntegersSuccess(lang::TokenV::tkInteger, expected);
+}
+
+TEST_CASE("Numbers_HexIntegers", "[integers][valid]") {
+  constexpr std::array<std::string_view, 6> expected = {
+    "0x831",       "0xAFEFE",    "0xABEBE",
+    "0x7a147e8a3", "0x98712390", "0x1d238c18e7ff239a12465471"
+  };
+
+  CommonIntegersSuccess(lang::TokenV::tkInteger, expected);
+}
+
+TEST_CASE("Numbers_OctIntegers", "[integers][valid]") {
+  constexpr std::array<std::string_view, 6> expected = {
+    "041", "064123", "0136237", "012345", "01", "071236571236512631723651"
+  };
+
+  CommonIntegersSuccess(lang::TokenV::tkInteger, expected);
+}
+
+TEST_CASE("Numbers_BinIntegers", "[integers][valid]") {
+  constexpr std::array<std::string_view, 6> expected = {
+    "0b0101101",          "0b1", "0b01", "0b0", "0b011010101110101101110101011",
+    "0b11110101011010101"
+  };
+
+  CommonIntegersSuccess(lang::TokenV::tkInteger, expected);
+}
+
+TEST_CASE("Numbers_Decimal", "[decimals][valid]") {
+  constexpr std::array<std::string_view, 6> expected = {
+    "1.0", "0.5", "3.14159", "10.50", "9999.0001", "1375123476175981.813751235"
+  };
+
+  CommonIntegersSuccess(lang::TokenV::tkDecimal, expected);
+}
+
+TEST_CASE("Numbers_UnaryMinusSeparate", "[numbers][unary-minus]") {
+  const std::string source = "-1 -2.5";
+  lang::Tokenizer tkz{ source };
+
+  auto t1 = tkz.peek();
+  REQUIRE(t1.has_value());
+  REQUIRE(t1->value == lang::TokenV::opHyphen);
+  REQUIRE(tkz.consume().has_value());
+
+  auto t2 = tkz.peek();
+  REQUIRE(t2.has_value());
+  REQUIRE(t2->value == lang::TokenV::tkInteger);
+  REQUIRE(t2->strValue == "1");
+  REQUIRE(tkz.consume().has_value());
+
+  auto t3 = tkz.peek();
+  REQUIRE(t3.has_value());
+  REQUIRE(t3->value == lang::TokenV::opHyphen);
+  REQUIRE(tkz.consume().has_value());
+
+  auto t4 = tkz.peek();
+  REQUIRE(t4.has_value());
+  REQUIRE(t4->value == lang::TokenV::tkDecimal);
+  REQUIRE(t4->strValue == "2.5");
+  REQUIRE(tkz.consume().has_value());
+
+  auto eof = tkz.peek();
+  REQUIRE(eof.has_value());
+  REQUIRE(eof->value == lang::TokenV::tkEOF);
+}
+
+TEST_CASE("Numbers_DotBoundaries_Disambiguation", "[numbers][dot][edge]") {
+  // Expect numbers must start with a digit:
+  // .5   -> '.' + '5'
+  // 10.  -> ERROR (expects digit after '.'), then '.' token if continued
+  // 1..2 -> ERROR (expects digit after '.'), then '.' '.' '2' if continued
+  const std::string source = ".5 10. 1..2";
+  lang::Tokenizer tkz{ source };
+
+  // .5 -> '.' then '5'
+  auto a1 = tkz.peek();
+  REQUIRE(a1.has_value());
+  REQUIRE(a1->value == lang::TokenV::opDot);
+  REQUIRE(tkz.consume().has_value());
+
+  auto a2 = tkz.peek();
+  REQUIRE(a2.has_value());
+  REQUIRE(a2->value == lang::TokenV::tkInteger);
+  REQUIRE(a2->strValue == "5");
+  REQUIRE(tkz.consume().has_value());
+
+  // 10. -> first an error (expects a digit after '.'), then '.' is parsed if we continue
+  auto err1 = tkz.peek();
+  REQUIRE_FALSE(err1.has_value());
+
+  auto after_err1 = tkz.peek();
+  REQUIRE(after_err1.has_value());
+  REQUIRE(after_err1->value == lang::TokenV::opDot);
+  REQUIRE(tkz.consume().has_value());
+
+  // 1..2 -> first an error (expects a digit after '.'), then '.' '.' '2'
+  auto err2 = tkz.peek();
+  REQUIRE_FALSE(err2.has_value());
+
+  auto dot1 = tkz.peek();
+  REQUIRE(dot1.has_value());
+  REQUIRE(dot1->value == lang::TokenV::opDot);
+  REQUIRE(tkz.consume().has_value());
+
+  auto dot2 = tkz.peek();
+  REQUIRE(dot2.has_value());
+  REQUIRE(dot2->value == lang::TokenV::opDot);
+  REQUIRE(tkz.consume().has_value());
+
+  auto last = tkz.peek();
+  REQUIRE(last.has_value());
+  REQUIRE(last->value == lang::TokenV::tkInteger);
+  REQUIRE(last->strValue == "2");
+  REQUIRE(tkz.consume().has_value());
+
+  auto eof = tkz.peek();
+  REQUIRE(eof.has_value());
+  REQUIRE(eof->value == lang::TokenV::tkEOF);
+}
+
+TEST_CASE("Numbers_InvalidPrefixes", "[numbers][invalid]") {
+  const std::array<const char*, 5> invalids = { "0x", "0b", "0xG", "0b2", "08" };
+
+  for (auto src : invalids) {
+    lang::Tokenizer tkz{ std::string{src} };
+    auto tok = tkz.peek();
+    REQUIRE_FALSE(tok.has_value());
+    const auto &err = tok.error();
+    REQUIRE(
+      err.message.find("Invalid literal") != std::string::npos
+    );
+  }
+}
diff --git a/tests/Tokenizer/src/Operators.cpp b/tests/Tokenizer/src/Operators.cpp
new file mode 100644
index 0000000..9955bc0
--- /dev/null
+++ b/tests/Tokenizer/src/Operators.cpp
@@ -0,0 +1,178 @@
+#include <catch2/catch_test_macros.hpp>
+
+#include <array>
+#include <string>
+#include <vector>
+
+#include <artichoke/Tokenizer/Tokenizer.hpp>
+#include <Utils.hpp>
+
+namespace lang = arti::lang;
+
+template <std::size_t N>
+static void CommonOpsSuccess(
+  const std::array<std::string_view, N> &lexemes,
+  const std::array<lang::TokenV, N> &kinds
+) {
+  static_assert(N > 0, "Must provide at least one operator");
+  const std::string source = SourceFromTokens(lexemes);
+
+  std::size_t it = 0;
+  lang::Tokenizer tkz{ source };
+
+  for (auto token : tkz.range()) {
+    REQUIRE(token.has_value());
+    REQUIRE(token->value == kinds.at(it));
+    REQUIRE(token->strValue == lexemes.at(it));
+    ++it;
+  }
+
+  REQUIRE(it == lexemes.size());
+  REQUIRE(tkz.peek().has_value());
+  REQUIRE(tkz.peek()->value == lang::TokenV::tkEOF);
+}
+
+TEST_CASE("Operators_SingleChar", "[operators][single]") {
+  constexpr std::array<std::string_view, 25> lex = {
+    ".", "%", "+", "-", "/", "!", "*", ":", ",", "=", ";", "^", "~",
+    "&", "|", "<", ">", "(", ")", "[", "]", "{", "}", "?", "$"
+  };
+  constexpr std::array<lang::TokenV, 25> kinds = {
+    lang::TokenV::opDot, lang::TokenV::opMod, lang::TokenV::opPlus, lang::TokenV::opHyphen,
+    lang::TokenV::opSlash, lang::TokenV::opBang, lang::TokenV::opStar, lang::TokenV::opColon,
+    lang::TokenV::opComma, lang::TokenV::opAssign, lang::TokenV::opSemicolon, lang::TokenV::opCaret,
+    lang::TokenV::opTilde, lang::TokenV::opAnd, lang::TokenV::opOr, lang::TokenV::opLt,
+    lang::TokenV::opGt, lang::TokenV::opLParen, lang::TokenV::opRParen, lang::TokenV::opLBracket,
+    lang::TokenV::opRBracket, lang::TokenV::opLSquirly, lang::TokenV::opRSquirly, lang::TokenV::opOpt,
+    lang::TokenV::opMut
+  };
+
+  CommonOpsSuccess(lex, kinds);
+}
+
+TEST_CASE("Operators_MultiChar", "[operators][multi]") {
+  constexpr std::array<std::string_view, 22> lex = {
+    "==","!=", "<=", ">=", "<<", ">>", "&&", "||",
+    "+=", "-=", "*=", "/=", "%=", "&=", "|=",
+    "<<=", ">>=", "&&=", "||=", "->", "::", ":="
+  };
+  constexpr std::array<lang::TokenV, 22> kinds = {
+    lang::TokenV::opEq, lang::TokenV::opNeq, lang::TokenV::opLtEq, lang::TokenV::opGtEq,
+    lang::TokenV::opLShift, lang::TokenV::opRShift, lang::TokenV::opBoolAnd, lang::TokenV::opBoolOr,
+    lang::TokenV::opPlusAssign, lang::TokenV::opHyphenAssign, lang::TokenV::opStarAssign, lang::TokenV::opSlashAssign,
+    lang::TokenV::opModAssign, lang::TokenV::opAndAssign, lang::TokenV::opOrAssign,
+    lang::TokenV::opLShiftAssign, lang::TokenV::opRShiftAssign, lang::TokenV::opBoolAndAssign, lang::TokenV::opBoolORAssign,
+    lang::TokenV::opArrow, lang::TokenV::opAccess, lang::TokenV::opLabel
+  };
+
+  CommonOpsSuccess(lex, kinds);
+}
+
+TEST_CASE("Operators_DotPrefixedSpecials", "[operators][dot][special]") {
+  constexpr std::array<std::string_view, 4> lex = {
+    ".#", ".[", ".*", ".@"
+  };
+  constexpr std::array<lang::TokenV, 4> kinds = {
+    lang::TokenV::opSliceSize, lang::TokenV::opPtrSlice, lang::TokenV::opSlicePtr, lang::TokenV::opReflect
+  };
+
+  CommonOpsSuccess(lex, kinds);
+}
+
+TEST_CASE("Operators_GreedyLongestMatch", "[operators][greedy]") {
+  // Ensure longest valid operator is selected.
+  constexpr std::array<std::string_view, 8> lex = {
+    ">>=", "<<=", "&&=", "||=",
+    ">=", "<=", "->", "::"
+  };
+  constexpr std::array<lang::TokenV, 8> kinds = {
+    lang::TokenV::opRShiftAssign, lang::TokenV::opLShiftAssign, lang::TokenV::opBoolAndAssign, lang::TokenV::opBoolORAssign,
+    lang::TokenV::opGtEq, lang::TokenV::opLtEq, lang::TokenV::opArrow, lang::TokenV::opAccess
+  };
+
+  CommonOpsSuccess(lex, kinds);
+}
+
+TEST_CASE("Operators_BoundariesWhitespace", "[operators][boundaries]") {
+  // '= =' should not be '=='
+  const std::string source = "=\n=";
+  lang::Tokenizer tkz{ source };
+
+  auto t1 = tkz.peek();
+  REQUIRE(t1.has_value());
+  REQUIRE(t1->value == lang::TokenV::opAssign);
+  REQUIRE(tkz.consume().has_value());
+
+  auto t2 = tkz.peek();
+  REQUIRE(t2.has_value());
+  REQUIRE(t2->value == lang::TokenV::opAssign);
+  REQUIRE(tkz.consume().has_value());
+
+  auto eof = tkz.peek();
+  REQUIRE(eof.has_value());
+  REQUIRE(eof->value == lang::TokenV::tkEOF);
+}
+
+TEST_CASE("Operators_ContextAccessLabelArrow", "[operators][context]") {
+  const std::string source = "ns::name := src->field";
+  lang::Tokenizer tkz{ source };
+
+  auto t1 = tkz.peek();
+  REQUIRE(t1.has_value());
+  REQUIRE(t1->value == lang::TokenV::tkIdentifier);
+  REQUIRE(t1->strValue == "ns");
+  REQUIRE(tkz.consume().has_value());
+
+  auto t2 = tkz.peek();
+  REQUIRE(t2.has_value());
+  REQUIRE(t2->value == lang::TokenV::opAccess);
+  REQUIRE(t2->strValue == "::");
+  REQUIRE(tkz.consume().has_value());
+
+  auto t3 = tkz.peek();
+  REQUIRE(t3.has_value());
+  REQUIRE(t3->value == lang::TokenV::tkIdentifier);
+  REQUIRE(t3->strValue == "name");
+  REQUIRE(tkz.consume().has_value());
+
+  auto t4 = tkz.peek();
+  REQUIRE(t4.has_value());
+  REQUIRE(t4->value == lang::TokenV::opLabel);
+  REQUIRE(t4->strValue == ":=");
+  REQUIRE(tkz.consume().has_value());
+
+  auto t5 = tkz.peek();
+  REQUIRE(t5.has_value());
+  REQUIRE(t5->value == lang::TokenV::tkIdentifier);
+  REQUIRE(t5->strValue == "src");
+  REQUIRE(tkz.consume().has_value());
+
+  auto t6 = tkz.peek();
+  REQUIRE(t6.has_value());
+  REQUIRE(t6->value == lang::TokenV::opArrow);
+  REQUIRE(t6->strValue == "->");
+  REQUIRE(tkz.consume().has_value());
+
+  auto t7 = tkz.peek();
+  REQUIRE(t7.has_value());
+  REQUIRE(t7->value == lang::TokenV::tkIdentifier);
+  REQUIRE(t7->strValue == "field");
+  REQUIRE(tkz.consume().has_value());
+
+  auto eof = tkz.peek();
+  REQUIRE(eof.has_value());
+  REQUIRE(eof->value == lang::TokenV::tkEOF);
+}
+
+TEST_CASE("Operators_InvalidStandalone_Error", "[operators][error]") {
+  // '#' and '@' alone are not valid tokens (only .# and .@ are valid).
+  const std::vector<std::string> invalids = { "#", "@", "`" };
+
+  for (const auto &src : invalids) {
+    lang::Tokenizer tkz{ src };
+    auto tok = tkz.peek();
+    REQUIRE_FALSE(tok.has_value());
+    const auto &err = tok.error();
+    REQUIRE(err.message.find("Invalid") != std::string::npos);
+  }
+}
diff --git a/tests/Tokenizer/src/Strings.cpp b/tests/Tokenizer/src/Strings.cpp
new file mode 100644
index 0000000..a16a57c
--- /dev/null
+++ b/tests/Tokenizer/src/Strings.cpp
@@ -0,0 +1,168 @@
+#include <catch2/catch_test_macros.hpp>
+
+#include <array>
+#include <string>
+
+#include <artichoke/Tokenizer/Tokenizer.hpp>
+#include <artichoke/Util/Expected.hpp>
+#include <Utils.hpp>
+
+namespace lang = arti::lang;
+
+template <std::size_t N>
+static void CommonLiteralsSuccess(
+  lang::TokenV kind,
+  const std::array<std::string_view, N> &lexemes
+) {
+  const std::string source = SourceFromTokens(lexemes);
+
+  std::size_t it = 0;
+  lang::Tokenizer tkz{ source };
+
+  for (auto token : tkz.range()) {
+    REQUIRE(token.has_value());
+    REQUIRE(token->value == kind);
+    REQUIRE(token->strValue == lexemes.at(it++));
+  }
+
+  REQUIRE(it == lexemes.size());
+  REQUIRE(tkz.peek().has_value());
+  REQUIRE(tkz.peek()->value == lang::TokenV::tkEOF);
+}
+
+TEST_CASE("Strings_Simple", "[strings][valid]") {
+  constexpr std::array<std::string_view, 5> lexemes = {
+    R"("a")",
+    R"("hello")",
+    R"("with spaces")",
+    R"("12345")",
+    R"Q("!@#$%^&*()")Q"
+  };
+
+  CommonLiteralsSuccess(lang::TokenV::tkString, lexemes);
+}
+
+TEST_CASE("Strings_Escapes", "[strings][valid][escapes]") {
+  // Validate common escape sequences remain part of lexeme text.
+  constexpr std::array<std::string_view, 5> lexemes = {
+    R"("quote: \"")",
+    R"("backslash: \\")",
+    R"("newline: \n")",
+    R"("tab: \t")",
+    R"("mix: \"\\\n\t")"
+  };
+
+  CommonLiteralsSuccess(lang::TokenV::tkString, lexemes);
+}
+
+TEST_CASE("Strings_OperatorsAdjacency", "[strings][operators]") {
+  // "foo"+"bar" -> tkString, opPlus, tkString
+  const std::string source = R"("foo"+"bar")";
+  lang::Tokenizer tkz{ source };
+
+  auto t1 = tkz.peek();
+  REQUIRE(t1.has_value());
+  REQUIRE(t1->value == lang::TokenV::tkString);
+  REQUIRE(t1->strValue == R"("foo")");
+  REQUIRE(tkz.consume().has_value());
+
+  auto t2 = tkz.peek();
+  REQUIRE(t2.has_value());
+  REQUIRE(t2->value == lang::TokenV::opPlus);
+  REQUIRE(tkz.consume().has_value());
+
+  auto t3 = tkz.peek();
+  REQUIRE(t3.has_value());
+  REQUIRE(t3->value == lang::TokenV::tkString);
+  REQUIRE(t3->strValue == R"("bar")");
+  REQUIRE(tkz.consume().has_value());
+
+  auto eof = tkz.peek();
+  REQUIRE(eof.has_value());
+  REQUIRE(eof->value == lang::TokenV::tkEOF);
+}
+
+TEST_CASE("Strings_Unterminated_Error", "[strings][error]") {
+  // Missing closing quote should yield an error.
+  const std::string source = "\"unterminated";
+  lang::Tokenizer tkz{ source };
+
+  auto errTok = tkz.peek();
+  REQUIRE_FALSE(errTok.has_value());
+  const auto &err = errTok.error();
+
+  REQUIRE(
+    err.message.find("Invalid literal") != std::string::npos
+  );
+}
+
+TEST_CASE("Chars_Simple", "[chars][valid]") {
+  constexpr std::array<std::string_view, 5> lexemes = {
+    R"('a')",
+    R"('Z')",
+    R"('0')",
+    R"('_')",
+    R"('$')"
+  };
+
+  CommonLiteralsSuccess(lang::TokenV::tkCharacter, lexemes);
+}
+
+TEST_CASE("Chars_Escapes", "[chars][valid][escapes]") {
+  constexpr std::array<std::string_view, 4> lexemes = {
+    R"('\n')",
+    R"('\t')",
+    R"('\\')",
+    R"('\'')"
+  };
+
+  CommonLiteralsSuccess(lang::TokenV::tkCharacter, lexemes);
+}
+
+TEST_CASE("Chars_Invalid_Empty", "[chars][error]") {
+  const std::string source = "''";
+  lang::Tokenizer tkz{ source };
+
+  auto errTok = tkz.peek();
+  REQUIRE_FALSE(errTok.has_value());
+  const auto &err = errTok.error();
+  REQUIRE(
+    err.message.find("Invalid literal") != std::string::npos
+  );
+}
+
+TEST_CASE("Chars_Invalid_Multiple", "[chars][error]") {
+  const std::string source = "'ab'";
+  lang::Tokenizer tkz{ source };
+
+  auto errTok = tkz.peek();
+  REQUIRE_FALSE(errTok.has_value());
+  const auto &err = errTok.error();
+  REQUIRE(
+    err.message.find("Invalid literal") != std::string::npos
+  );
+}
+
+TEST_CASE("Chars_Unterminated", "[chars][error]") {
+  const std::string source = "'a";
+  lang::Tokenizer tkz{ source };
+
+  auto errTok = tkz.peek();
+  REQUIRE_FALSE(errTok.has_value());
+  const auto &err = errTok.error();
+  REQUIRE(
+    err.message.find("Invalid literal") != std::string::npos
+  );
+}
+
+TEST_CASE("Chars_InvalidEscape", "[chars][error][.escapes]") {
+  const std::string source = "'\\x'";
+  lang::Tokenizer tkz{ source };
+
+  auto errTok = tkz.peek();
+  REQUIRE_FALSE(errTok.has_value());
+  const auto &err = errTok.error();
+  REQUIRE(
+    err.message.find("Invalid literal") != std::string::npos
+  );
+}
diff --git a/tests/cmake/dependencies.cmake b/tests/cmake/dependencies.cmake
new file mode 100644
index 0000000..67d09cb
--- /dev/null
+++ b/tests/cmake/dependencies.cmake
@@ -0,0 +1,18 @@
+include(FetchContent)
+
+# Get CPM
+file(
+  DOWNLOAD
+  https://github.com/cpm-cmake/CPM.cmake/releases/download/v0.40.8/CPM.cmake
+  ${CMAKE_CURRENT_BINARY_DIR}/cmake/CPM.cmake
+  EXPECTED_HASH
+    SHA256=78ba32abdf798bc616bab7c73aac32a17bbd7b06ad9e26a6add69de8f3ae4791
+)
+include(${CMAKE_CURRENT_BINARY_DIR}/cmake/CPM.cmake)
+
+# Get dependencies
+
+CPMAddPackage("gh:catchorg/Catch2@3.6.0")
+
+# Include Catch2 CMake scripts
+list(APPEND CMAKE_MODULE_PATH ${Catch2_SOURCE_DIR}/extras)
diff --git a/tests/include/Utils.hpp b/tests/include/Utils.hpp
new file mode 100644
index 0000000..1f958ca
--- /dev/null
+++ b/tests/include/Utils.hpp
@@ -0,0 +1,58 @@
+#pragma once
+
+#include <random>
+
+#include <artichoke/Coroutine/Generator.hpp>
+
+template <std::ranges::range R1, std::ranges::range R2>
+requires(
+  std::is_convertible_v<std::ranges::range_value_t<R1>, std::string_view> and
+  std::is_same_v<std::ranges::range_value_t<R1>, std::ranges::range_value_t<R2>>
+)
+arti::lang::Generator<std::ranges::range_value_t<R1>>
+InterleaveRanges(R1 &&r1, R2 &&r2) {
+  auto it1 = std::ranges::begin(r1);
+  auto end1 = std::ranges::end(r1);
+
+  auto it2 = std::ranges::begin(r2);
+  auto end2 = std::ranges::end(r2);
+
+  while (it1 != end1 && it2 != end2) {
+    yield *it1;
+    ++it1;
+    yield *it2;
+    ++it2;
+  }
+}
+
+static arti::lang::Generator<std::string_view>
+WhitespaceGenerator(uint32_t maxLen = 5) {
+  constexpr std::array<char, 3> spaceChars{ ' ', '\t', '\n' };
+
+  std::string str;
+  std::random_device device;
+  std::mt19937 engine(device());
+  std::uniform_int_distribution<uint32_t> dist(1, maxLen);
+  std::uniform_int_distribution<uint32_t> distChars(0, 2);
+
+  str.reserve(maxLen);
+
+  while (true) {
+    str.resize(0);
+
+    auto sz = dist(engine);
+
+    for (uint32_t i = 0; i < sz; ++i) {
+      str += spaceChars[distChars(engine)];
+    }
+
+    yield str;
+  }
+}
+
+template <std::ranges::range R>
+requires(std::is_same_v<std::ranges::range_value_t<R>, std::string_view>)
+static std::string SourceFromTokens(R &&tokens) {
+  return InterleaveRanges(tokens, WhitespaceGenerator(10)) | std::views::join |
+         std::ranges::to<std::string>();
+}