Signed-off-by: erick-alcachofa <erick@artichoke.dev> Overhaul the expression parsing mechanism to utilize a Pratt (top-down operator precedence) parser. This change provides a more scalable and maintainable way to handle operator precedence and associativity compared to standard recursive descent. As part of this transition, the nomenclature for operators has been refined to reflect their position in the grammar (Prefix, Infix, Postfix) rather than their arity. * Renamed `UnaryOperator` and `UnaryExpression` to `PrefixOperator` and `PrefixExpression`. * Renamed `BinaryOperator` and `BinaryExpression` to `InfixOperator` and `InfixExpression`. * Renamed `ScopeAccessExpression` to `ModuleAccessExpression`. * Introduced `PostfixOperator` enum and associated logic for function calls, slicing, and reflection attributes. * Updated `toDot.cpp` and `toString.cpp` to support the new node types and renamed operators. * Added `Pratt.hpp` and `Pratt.cpp` to define `BindingPower` and map operators to their respective precedence levels. * Added `Operators.cpp` to handle token-to-operator mapping and classification (isPrefix, isInfix, isPostfix). * Refactored `Parser::parseExpression` to implement the core Pratt loop using binding power comparisons. * Moved literal parsing logic into a dedicated `Literals.cpp`. * Implemented explicit parsing methods for `Integer`, `Float`, `Char`, `String`, `Boolean`, and `Null` literals. * Added support for `this` and `_` (underscore) as identifier expressions. * **Prefix**: `!`, `-`, `~`, `&` (MemPtr), `*` (DerefPtr). * **Infix**: Arithmetic, Comparison, Bitwise, Logical, and all Compound Assignments. * **Postfix**: `()` (Call), `[]` (Slice/Access), `.#` (Slice length), `.*` (Slice pointer), and `.@` (Reflection). * **Missing Literals**: Struct literals and Array literals are not yet implemented in the new parsing flow. * **Node Specialization**: `MemberAccess`, `PointerMemberAccess`, and `ModuleAccess` currently use generic infix logic and need to be migrated to their specific AST node types. * **Error Handling**: Literal parsing (specifically `std::stold` and `std::stoul`) needs safety checks to prevent potential exceptions during conversion. * **Diagnostics**: Refine the error message for unexpected tokens in postfix expressions to explicitly list supported operators. * **Generic Ambiguity**: Generic type/function instantiation currently causes parsing conflicts with comparison operators (e.g., `Foo<T>`). This is a known issue that will be resolved by transitioning the grammar to a turbofish-style `::<...>` syntax.
179 lines
5.3 KiB
C++
179 lines
5.3 KiB
C++
//============================================================================//
|
|
// //
|
|
// artichoke programming language //
|
|
// //
|
|
// Copyright (C) 2025 Erick Saul Guzman Ramos, whoami.artichoke.dev //
|
|
// //
|
|
// //
|
|
// This program is free software: you can redistribute it and/or modify //
|
|
// it under the terms of the GNU Affero General Public License as published //
|
|
// by the Free Software Foundation, either version 3 of the License, or //
|
|
// (at your option) any later version. //
|
|
// //
|
|
// This program is distributed in the hope that it will be useful, //
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of //
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
|
|
// GNU Affero General Public License for more details. //
|
|
// //
|
|
// You should have received a copy of the GNU Affero General Public License //
|
|
// along with this program. If not, see <https://www.gnu.org/licenses/>. //
|
|
// //
|
|
//============================================================================//
|
|
|
|
#include <artichoke/Parser/Parser.hpp>
|
|
|
|
namespace arti::lang {
|
|
|
|
Expected<ast::CharLtrlNode>
|
|
Parser::parseCharLiteral() {
|
|
auto node = ast::MakeNode<ast::CharLtrlNode>();
|
|
|
|
if (auto ltrl = consume(TokenV::tkCharacter, "character literal"); ! ltrl) {
|
|
return Unexpected<>{ std::move(ltrl).error() };
|
|
}
|
|
else {
|
|
node->location = {
|
|
.line = ltrl->line,
|
|
.column = ltrl->column
|
|
};
|
|
node->value = static_cast<uint8_t>(ltrl->strValue[1]);
|
|
}
|
|
|
|
return node;
|
|
}
|
|
|
|
Expected<ast::NullLtrlNode>
|
|
Parser::parseNullLiteral() {
|
|
auto node = ast::MakeNode<ast::NullLtrlNode>();
|
|
|
|
if (auto ltrl = consume(TokenV::kwNull, "null keyword"); ! ltrl) {
|
|
return Unexpected<>{ std::move(ltrl).error() };
|
|
}
|
|
else {
|
|
node->location = {
|
|
.line = ltrl->line,
|
|
.column = ltrl->column
|
|
};
|
|
}
|
|
|
|
return node;
|
|
}
|
|
|
|
Expected<ast::StringLtrlNode>
|
|
Parser::parseStringLiteral() {
|
|
auto node = ast::MakeNode<ast::StringLtrlNode>();
|
|
|
|
if (auto ltrl = consume(TokenV::tkString, "string literal"); ! ltrl) {
|
|
return Unexpected<>{ std::move(ltrl).error() };
|
|
}
|
|
else {
|
|
node->location = {
|
|
.line = ltrl->line,
|
|
.column = ltrl->column
|
|
};
|
|
|
|
ltrl->strValue.remove_suffix(1);
|
|
ltrl->strValue.remove_prefix(1);
|
|
|
|
node->value = ltrl->strValue;
|
|
}
|
|
|
|
return node;
|
|
}
|
|
|
|
Expected<ast::FloatLtrlNode>
|
|
Parser::parseFloatLiteral() {
|
|
auto node = ast::MakeNode<ast::FloatLtrlNode>();
|
|
|
|
if (auto ltrl = consume(TokenV::tkDecimal, "number literal"); ! ltrl) {
|
|
return Unexpected<>{ std::move(ltrl).error() };
|
|
}
|
|
else {
|
|
node->location = {
|
|
.line = ltrl->line,
|
|
.column = ltrl->column
|
|
};
|
|
/* TODO: This could throw? */
|
|
std::string value{ ltrl->strValue };
|
|
node->value = std::stold(value);
|
|
}
|
|
|
|
return node;
|
|
}
|
|
|
|
Expected<ast::IntegerLtrlNode>
|
|
Parser::parseIntegerLiteral() {
|
|
auto node = ast::MakeNode<ast::IntegerLtrlNode>();
|
|
|
|
if (auto ltrl = consume(TokenV::tkInteger, "integer literal"); ! ltrl) {
|
|
return Unexpected<>{ std::move(ltrl).error() };
|
|
}
|
|
else {
|
|
node->location = {
|
|
.line = ltrl->line,
|
|
.column = ltrl->column
|
|
};
|
|
/* TODO: This could throw? */
|
|
std::string value{ ltrl->strValue };
|
|
node->value = std::stoul(value);
|
|
}
|
|
|
|
return node;
|
|
}
|
|
|
|
Expected<ast::BooleanLtrlNode>
|
|
Parser::parseBooleanLiteral() {
|
|
auto node = ast::MakeNode<ast::BooleanLtrlNode>();
|
|
|
|
auto peekToken = tokenizer.peek();
|
|
|
|
if (! peekToken) {
|
|
return Unexpected<>{ std::move(peekToken).error() };
|
|
}
|
|
|
|
node->location = {
|
|
.line = peekToken->line,
|
|
.column = peekToken->column
|
|
};
|
|
|
|
if (peekToken->value == TokenV::kwTrue) {
|
|
node->value = true;
|
|
}
|
|
else if (peekToken->value == TokenV::kwFalse) {
|
|
node->value = false;
|
|
}
|
|
else {
|
|
return langException<ExceptCode::ecUnexpectedToken>(
|
|
peekToken->line,
|
|
peekToken->column,
|
|
toString(*peekToken),
|
|
"boolean literal, i.e. "
|
|
"any of ( true, false )"
|
|
);
|
|
}
|
|
|
|
std::ignore = tokenizer.consume();
|
|
|
|
return node;
|
|
}
|
|
|
|
Expected<ast::IdentifierExprNode>
|
|
Parser::parseIdentifierExpression() {
|
|
auto node = ast::MakeNode<ast::IdentifierExprNode>();
|
|
|
|
if (auto ltrl = consume(TokenV::tkIdentifier, "identifier"); ! ltrl) {
|
|
return Unexpected<>{ std::move(ltrl).error() };
|
|
}
|
|
else {
|
|
node->location = {
|
|
.line = ltrl->line,
|
|
.column = ltrl->column
|
|
};
|
|
node->identifierName = ltrl->strValue;
|
|
}
|
|
|
|
return node;
|
|
}
|
|
|
|
} // namespace arti::lang
|