artichoke-lang/lib/src/Parser/Literals.cpp
erick-alcachofa 5762497f56
feat(parser): implement Pratt expression parsing and refactor operator types
Signed-off-by: erick-alcachofa <erick@artichoke.dev>

Overhaul the expression parsing mechanism to utilize a Pratt (top-down
operator precedence) parser. This change provides a more scalable and
maintainable way to handle operator precedence and associativity
compared to standard recursive descent.

As part of this transition, the nomenclature for operators has been
refined to reflect their position in the grammar (Prefix, Infix,
Postfix) rather than their arity.

* Renamed `UnaryOperator` and `UnaryExpression` to `PrefixOperator` and
  `PrefixExpression`.
* Renamed `BinaryOperator` and `BinaryExpression` to `InfixOperator` and
  `InfixExpression`.
* Renamed `ScopeAccessExpression` to `ModuleAccessExpression`.
* Introduced `PostfixOperator` enum and associated logic for function
  calls, slicing, and reflection attributes.
* Updated `toDot.cpp` and `toString.cpp` to support the new node types
  and renamed operators.

* Added `Pratt.hpp` and `Pratt.cpp` to define `BindingPower` and map
  operators to their respective precedence levels.
* Added `Operators.cpp` to handle token-to-operator mapping and
  classification (isPrefix, isInfix, isPostfix).
* Refactored `Parser::parseExpression` to implement the core Pratt loop
  using binding power comparisons.

* Moved literal parsing logic into a dedicated `Literals.cpp`.
* Implemented explicit parsing methods for `Integer`, `Float`, `Char`,
  `String`, `Boolean`, and `Null` literals.
* Added support for `this` and `_` (underscore) as identifier
  expressions.

* **Prefix**: `!`, `-`, `~`, `&` (MemPtr), `*` (DerefPtr).
* **Infix**: Arithmetic, Comparison, Bitwise, Logical, and all Compound
  Assignments.
* **Postfix**: `()` (Call), `[]` (Slice/Access), `.#` (Slice length),
  `.*` (Slice pointer), and `.@` (Reflection).

* **Missing Literals**: Struct literals and Array literals are not yet
  implemented in the new parsing flow.
* **Node Specialization**: `MemberAccess`, `PointerMemberAccess`, and
  `ModuleAccess` currently use generic infix logic and need to be
  migrated to their specific AST node types.
* **Error Handling**: Literal parsing (specifically `std::stold` and
  `std::stoul`) needs safety checks to prevent potential exceptions
  during conversion.
* **Diagnostics**: Refine the error message for unexpected tokens in
  postfix expressions to explicitly list supported operators.
* **Generic Ambiguity**: Generic type/function instantiation currently
  causes parsing conflicts with comparison operators (e.g., `Foo<T>`).
  This is a known issue that will be resolved by transitioning the
  grammar to a turbofish-style `::<...>` syntax.
2025-12-26 23:32:49 -06:00

179 lines
5.3 KiB
C++

//============================================================================//
// //
// artichoke programming language //
// //
// Copyright (C) 2025 Erick Saul Guzman Ramos, whoami.artichoke.dev //
// //
// //
// This program is free software: you can redistribute it and/or modify //
// it under the terms of the GNU Affero General Public License as published //
// by the Free Software Foundation, either version 3 of the License, or //
// (at your option) any later version. //
// //
// This program is distributed in the hope that it will be useful, //
// but WITHOUT ANY WARRANTY; without even the implied warranty of //
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
// GNU Affero General Public License for more details. //
// //
// You should have received a copy of the GNU Affero General Public License //
// along with this program. If not, see <https://www.gnu.org/licenses/>. //
// //
//============================================================================//
#include <artichoke/Parser/Parser.hpp>
namespace arti::lang {
Expected<ast::CharLtrlNode>
Parser::parseCharLiteral() {
auto node = ast::MakeNode<ast::CharLtrlNode>();
if (auto ltrl = consume(TokenV::tkCharacter, "character literal"); ! ltrl) {
return Unexpected<>{ std::move(ltrl).error() };
}
else {
node->location = {
.line = ltrl->line,
.column = ltrl->column
};
node->value = static_cast<uint8_t>(ltrl->strValue[1]);
}
return node;
}
Expected<ast::NullLtrlNode>
Parser::parseNullLiteral() {
auto node = ast::MakeNode<ast::NullLtrlNode>();
if (auto ltrl = consume(TokenV::kwNull, "null keyword"); ! ltrl) {
return Unexpected<>{ std::move(ltrl).error() };
}
else {
node->location = {
.line = ltrl->line,
.column = ltrl->column
};
}
return node;
}
Expected<ast::StringLtrlNode>
Parser::parseStringLiteral() {
auto node = ast::MakeNode<ast::StringLtrlNode>();
if (auto ltrl = consume(TokenV::tkString, "string literal"); ! ltrl) {
return Unexpected<>{ std::move(ltrl).error() };
}
else {
node->location = {
.line = ltrl->line,
.column = ltrl->column
};
ltrl->strValue.remove_suffix(1);
ltrl->strValue.remove_prefix(1);
node->value = ltrl->strValue;
}
return node;
}
Expected<ast::FloatLtrlNode>
Parser::parseFloatLiteral() {
auto node = ast::MakeNode<ast::FloatLtrlNode>();
if (auto ltrl = consume(TokenV::tkDecimal, "number literal"); ! ltrl) {
return Unexpected<>{ std::move(ltrl).error() };
}
else {
node->location = {
.line = ltrl->line,
.column = ltrl->column
};
/* TODO: This could throw? */
std::string value{ ltrl->strValue };
node->value = std::stold(value);
}
return node;
}
Expected<ast::IntegerLtrlNode>
Parser::parseIntegerLiteral() {
auto node = ast::MakeNode<ast::IntegerLtrlNode>();
if (auto ltrl = consume(TokenV::tkInteger, "integer literal"); ! ltrl) {
return Unexpected<>{ std::move(ltrl).error() };
}
else {
node->location = {
.line = ltrl->line,
.column = ltrl->column
};
/* TODO: This could throw? */
std::string value{ ltrl->strValue };
node->value = std::stoul(value);
}
return node;
}
Expected<ast::BooleanLtrlNode>
Parser::parseBooleanLiteral() {
auto node = ast::MakeNode<ast::BooleanLtrlNode>();
auto peekToken = tokenizer.peek();
if (! peekToken) {
return Unexpected<>{ std::move(peekToken).error() };
}
node->location = {
.line = peekToken->line,
.column = peekToken->column
};
if (peekToken->value == TokenV::kwTrue) {
node->value = true;
}
else if (peekToken->value == TokenV::kwFalse) {
node->value = false;
}
else {
return langException<ExceptCode::ecUnexpectedToken>(
peekToken->line,
peekToken->column,
toString(*peekToken),
"boolean literal, i.e. "
"any of ( true, false )"
);
}
std::ignore = tokenizer.consume();
return node;
}
Expected<ast::IdentifierExprNode>
Parser::parseIdentifierExpression() {
auto node = ast::MakeNode<ast::IdentifierExprNode>();
if (auto ltrl = consume(TokenV::tkIdentifier, "identifier"); ! ltrl) {
return Unexpected<>{ std::move(ltrl).error() };
}
else {
node->location = {
.line = ltrl->line,
.column = ltrl->column
};
node->identifierName = ltrl->strValue;
}
return node;
}
} // namespace arti::lang