diff --git a/lib/include/artichoke/Parser/AST/Common.hpp b/lib/include/artichoke/Parser/AST/Common.hpp index 2d90531..1749405 100644 --- a/lib/include/artichoke/Parser/AST/Common.hpp +++ b/lib/include/artichoke/Parser/AST/Common.hpp @@ -50,35 +50,60 @@ namespace arti::lang::ast { Optional, }; - enum class UnaryOperator { + enum class PrefixOperator { Uninitialized, Not, Minus, BitNot, - Ampersand, - Star, + MemPtr, + DerefPtr, }; - enum class BinaryOperator { + enum class InfixOperator { Uninitialized, + Modulo, + Addition, + Substraction, + Division, + Multiplication, Equal, NotEqual, - GreaterThan, LessThan, - GreaterEqual, + GreaterThan, LessEqual, - BitAnd, - BitXor, - BitOr, + GreaterEqual, LeftShift, RightShift, - Adition, - Substraction, - Multiplication, - Division, - Modulo, BoolAnd, BoolOr, + BitAnd, + BitOr, + BitXor, + Assignment, + ModuleAccess, + MemberAccess, + PointerMemberAccess, + AdditionAssignment, + SubstractionAssignment, + MultiplicationAssignment, + DivisionAssignment, + ModuloAssignment, + BitAndAssignment, + BitOrAssignment, + LShiftAssignment, + RShiftAssignment, + BoolAndAssignment, + BoolOrAssignment, + }; + + enum class PostfixOperator { + Uninitialized, + FunctionCall, + SliceAccess, + SliceSize, + PtrToSlice, + SliceToPtr, + Reflect, }; enum class CompoundAssignOperator { diff --git a/lib/include/artichoke/Parser/AST/Expressions.hpp b/lib/include/artichoke/Parser/AST/Expressions.hpp index e210ea1..695b541 100644 --- a/lib/include/artichoke/Parser/AST/Expressions.hpp +++ b/lib/include/artichoke/Parser/AST/Expressions.hpp @@ -32,8 +32,8 @@ namespace arti::lang::ast { /* Main declaration node types */ struct IdentifierExpression; - struct UnaryExpression; - struct BinaryExpression; + struct PrefixExpression; + struct InfixExpression; struct AssignExpression; struct CompoundAssignExpression; struct FunctionCallExpression; @@ -41,7 +41,7 @@ namespace arti::lang::ast { struct SliceRangeExpression; struct MemberAccessExpression; struct PointerAccessExpression; - struct ScopeAccessExpression; + struct ModuleAccessExpression; struct ReflectionExpression; struct SliceCreationExpression; struct SliceLengthExpression; @@ -51,8 +51,8 @@ namespace arti::lang::ast { /* Public Aliases */ using IdentifierExprNode = Ptr; - using UnaryExprNode = Ptr; - using BinaryExprNode = Ptr; + using PrefixExprNode = Ptr; + using InfixExprNode = Ptr; using AssignExprNode = Ptr; using CompoundAssignExprNode = Ptr; using FunctionCallExprNode = Ptr; @@ -60,7 +60,7 @@ namespace arti::lang::ast { using SliceRangeExprNode = Ptr; using MemberAccessExprNode = Ptr; using PointerAccessExprNode = Ptr; - using ScopeAccessExprNode = Ptr; + using ModuleAccessExprNode = Ptr; using ReflectionExprNode = Ptr; using SliceCreationExprNode = Ptr; using SliceLengthExprNode = Ptr; @@ -77,8 +77,8 @@ namespace arti::lang::ast { StructLtrlNode, SliceLtrlNode, IdentifierExprNode, - UnaryExprNode, - BinaryExprNode, + PrefixExprNode, + InfixExprNode, AssignExprNode, CompoundAssignExprNode, FunctionCallExprNode, @@ -86,7 +86,7 @@ namespace arti::lang::ast { SliceRangeExprNode, MemberAccessExprNode, PointerAccessExprNode, - ScopeAccessExprNode, + ModuleAccessExprNode, SliceCreationExprNode, SliceLengthExprNode, SlicePtrExprNode, @@ -99,16 +99,16 @@ namespace arti::lang::ast { String identifierName; }; - struct nodes::UnaryExpression { + struct nodes::PrefixExpression { SourceLocation location; - UnaryOperator op; + PrefixOperator op; ExpressionNode right; }; - struct nodes::BinaryExpression { + struct nodes::InfixExpression { SourceLocation location; - BinaryOperator op; + InfixOperator op; ExpressionNode left; ExpressionNode right; }; @@ -164,7 +164,7 @@ namespace arti::lang::ast { ExpressionNode object; }; - struct nodes::ScopeAccessExpression { + struct nodes::ModuleAccessExpression { SourceLocation location; String memberName; diff --git a/lib/include/artichoke/Parser/Parser.hpp b/lib/include/artichoke/Parser/Parser.hpp index 11ab366..4a52bed 100644 --- a/lib/include/artichoke/Parser/Parser.hpp +++ b/lib/include/artichoke/Parser/Parser.hpp @@ -167,7 +167,40 @@ namespace arti::lang { parseExpressionStatement(); Expected - parseExpression(); + parseExpression(std::uint16_t p = 0); + + Expected> + parsePrimaryExpression(); + + Expected + parsePrefixExpression(); + + Expected + parseInfixExpression(ast::ExpressionNode lhs); + + Expected + parsePostfixExpression(ast::ExpressionNode lhs); + + Expected + parseIdentifierExpression(); + + Expected + parseCharLiteral(); + + Expected + parseNullLiteral(); + + Expected + parseStringLiteral(); + + Expected + parseFloatLiteral(); + + Expected + parseIntegerLiteral(); + + Expected + parseBooleanLiteral(); private: std::string unitName; diff --git a/lib/include/artichoke/Parser/Pratt.hpp b/lib/include/artichoke/Parser/Pratt.hpp new file mode 100644 index 0000000..d24d29f --- /dev/null +++ b/lib/include/artichoke/Parser/Pratt.hpp @@ -0,0 +1,48 @@ +//============================================================================// +// // +// artichoke programming language // +// // +// Copyright (C) 2025 Erick Saul Guzman Ramos, whoami.artichoke.dev // +// // +// // +// This program is free software: you can redistribute it and/or modify // +// it under the terms of the GNU Affero General Public License as published // +// by the Free Software Foundation, either version 3 of the License, or // +// (at your option) any later version. // +// // +// This program is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // +// GNU Affero General Public License for more details. // +// // +// You should have received a copy of the GNU Affero General Public License // +// along with this program. If not, see . // +// // +//============================================================================// + +#include + +namespace arti::lang::pratt { + + struct BindingPower { + std::uint16_t left; + std::uint16_t right; + }; + + ast::PostfixOperator getPostfixOperator(TokenV tokenType); + ast::PrefixOperator getPrefixOperator(TokenV tokenType); + ast::InfixOperator getInfixOperator(TokenV tokenType); + + bool isPostfixOperator(TokenV tokenType); + bool isPrefixOperator(TokenV tokenType); + bool isInfixOperator(TokenV tokenType); + + std::uint16_t postfixBindingPower(ast::PostfixOperator op); + std::uint16_t prefixBindingPower(ast::PrefixOperator op); + BindingPower infixBindingPower(ast::InfixOperator op); + + bool isCompoundAssignOperator(ast::InfixOperator op); + + ast::CompoundAssignOperator getCompoundOperatorType(ast::InfixOperator op); + +} diff --git a/lib/src/Parser/AST/toDot.cpp b/lib/src/Parser/AST/toDot.cpp index 9e5eb19..50042f9 100644 --- a/lib/src/Parser/AST/toDot.cpp +++ b/lib/src/Parser/AST/toDot.cpp @@ -81,41 +81,57 @@ namespace arti::lang::ast { }; // Operator label helpers (matching AST.cpp) - std::string toString(UnaryOperator op) { - using enum UnaryOperator; + std::string toString(PrefixOperator op) { + using enum PrefixOperator; switch (op) { case Not: return "Not (!)"; case Minus: return "Minus (-)"; case BitNot: return "BitNot (~)"; - case Ampersand: return "Ampersand (&)"; - case Star: return "Star (*)"; + case MemPtr: return "MemPtr (&)"; + case DerefPtr: return "DerefPtr (*)"; default: std::unreachable(); break; } std::unreachable(); } - std::string toString(BinaryOperator op) { - using enum BinaryOperator; + std::string toString(InfixOperator op) { + using enum InfixOperator; switch (op) { - case Equal: return "Equal (==)"; - case NotEqual: return "NotEqual (!=)"; - case GreaterThan: return "GreaterThan (>)"; - case LessThan: return "LessThan (<)"; - case GreaterEqual: return "GreaterEqual (>=)"; - case LessEqual: return "LessEqual (<=)"; - case BitAnd: return "BitAnd (&)"; - case BitXor: return "BitXor (^)"; - case BitOr: return "BitOr (|)"; - case LeftShift: return "LeftShift (<<)"; - case RightShift: return "RightShift (>>)"; - case Adition: return "Adition (+)"; - case Substraction: return "Substraction (-)"; - case Multiplication: return "Multiplication (*)"; - case Division: return "Division (/)"; - case Modulo: return "Modulo (%)"; - case BoolAnd: return "BoolAnd (&&)"; - case BoolOr: return "BoolOr (||)"; - default: std::unreachable(); break; + case Equal: return "Equal (==)"; + case NotEqual: return "NotEqual (!=)"; + case GreaterThan: return "GreaterThan (>)"; + case LessThan: return "LessThan (<)"; + case GreaterEqual: return "GreaterEqual (>=)"; + case LessEqual: return "LessEqual (<=)"; + case BitAnd: return "BitAnd (&)"; + case BitXor: return "BitXor (^)"; + case BitOr: return "BitOr (|)"; + case LeftShift: return "LeftShift (<<)"; + case RightShift: return "RightShift (>>)"; + case Addition: return "Addition (+)"; + case Substraction: return "Substraction (-)"; + case Multiplication: return "Multiplication (*)"; + case Division: return "Division (/)"; + case Modulo: return "Modulo (%)"; + case BoolAnd: return "BoolAnd (&&)"; + case BoolOr: return "BoolOr (||)"; + case Assignment: return "Assignment (=)"; + case ModuleAccess: return "ModuleAccess (::)"; + case MemberAccess: return "MemberAccess (.)"; + case PointerMemberAccess: return "PointerMemberAccess (->)"; + case AdditionAssignment: return "AdditionAssignment (+=)"; + case SubstractionAssignment: return "SubstractionAssignment (-=)"; + case MultiplicationAssignment: return "MultiplicationAssignment (*=)"; + case DivisionAssignment: return "DivisionAssignment (/=)"; + case ModuloAssignment: return "ModuloAssignment (%=)"; + case BitAndAssignment: return "BitAndAssignment (&=)"; + case BitOrAssignment: return "BitOrAssignment (|=)"; + case BoolAndAssignment: return "BoolAndAssignment (&&=)"; + case BoolOrAssignment: return "BoolOrAssignment (||=)"; + case LShiftAssignment: return "LShiftAssignment (<<=)"; + case RShiftAssignment: return "RShiftAssignment (>>=)"; + + default: std::unreachable(); break; } std::unreachable(); } @@ -172,8 +188,8 @@ namespace arti::lang::ast { std::string emit(const StructLtrlPositionalInitializerNode&, GraphBuilder&); std::string emit(const StructLtrlInitializerNode &, GraphBuilder &); std::string emit(const IdentifierExprNode &, GraphBuilder &); - std::string emit(const UnaryExprNode &, GraphBuilder &); - std::string emit(const BinaryExprNode &, GraphBuilder &); + std::string emit(const PrefixExprNode &, GraphBuilder &); + std::string emit(const InfixExprNode &, GraphBuilder &); std::string emit(const AssignExprNode &, GraphBuilder &); std::string emit(const CompoundAssignExprNode &, GraphBuilder &); std::string emit(const FunctionCallExprNode &, GraphBuilder &); @@ -181,7 +197,7 @@ namespace arti::lang::ast { std::string emit(const SliceRangeExprNode &, GraphBuilder &); std::string emit(const MemberAccessExprNode &, GraphBuilder &); std::string emit(const PointerAccessExprNode &, GraphBuilder &); - std::string emit(const ScopeAccessExprNode &, GraphBuilder &); + std::string emit(const ModuleAccessExprNode &, GraphBuilder &); std::string emit(const ReflectionExprNode &, GraphBuilder &); std::string emit(const SliceCreationExprNode &, GraphBuilder &); std::string emit(const SliceLengthExprNode &, GraphBuilder &); @@ -629,8 +645,8 @@ namespace arti::lang::ast { return g.makeNode(std::format("Identifier `{}`", node->identifierName)); } - std::string emit(const UnaryExprNode &node, GraphBuilder &g) { - auto id = g.makeNode("UnaryExpression"); + std::string emit(const PrefixExprNode &node, GraphBuilder &g) { + auto id = g.makeNode("PrefixExpression"); auto opLeaf = makeLeaf(g, toString(node->op)); g.addEdge(id, opLeaf, "Operator"); auto rhs = emit(node->right, g); @@ -638,8 +654,8 @@ namespace arti::lang::ast { return id; } - std::string emit(const BinaryExprNode &node, GraphBuilder &g) { - auto id = g.makeNode("BinaryExpression"); + std::string emit(const InfixExprNode &node, GraphBuilder &g) { + auto id = g.makeNode("InfixExpression"); auto opLeaf = makeLeaf(g, toString(node->op)); g.addEdge(id, opLeaf, "Operator"); auto lhs = emit(node->left, g); @@ -709,7 +725,7 @@ namespace arti::lang::ast { return id; } - std::string emit(const ScopeAccessExprNode &node, GraphBuilder &g) { + std::string emit(const ModuleAccessExprNode &node, GraphBuilder &g) { auto id = g.makeNode("ScopeAccessExpression"); g.addEdge(id, emit(node->scope, g), "Object"); if (! node->genericParams.empty()) { @@ -764,8 +780,8 @@ namespace arti::lang::ast { [&g](const StructLtrlNode &n) { return emit(n, g); }, [&g](const SliceLtrlNode &n) { return emit(n, g); }, [&g](const IdentifierExprNode &n) { return emit(n, g); }, - [&g](const UnaryExprNode &n) { return emit(n, g); }, - [&g](const BinaryExprNode &n) { return emit(n, g); }, + [&g](const PrefixExprNode &n) { return emit(n, g); }, + [&g](const InfixExprNode &n) { return emit(n, g); }, [&g](const AssignExprNode &n) { return emit(n, g); }, [&g](const CompoundAssignExprNode &n) { return emit(n, g); }, [&g](const FunctionCallExprNode &n) { return emit(n, g); }, @@ -773,7 +789,7 @@ namespace arti::lang::ast { [&g](const SliceRangeExprNode &n) { return emit(n, g); }, [&g](const MemberAccessExprNode &n) { return emit(n, g); }, [&g](const PointerAccessExprNode &n) { return emit(n, g); }, - [&g](const ScopeAccessExprNode &n) { return emit(n, g); }, + [&g](const ModuleAccessExprNode &n) { return emit(n, g); }, [&g](const SliceCreationExprNode &n) { return emit(n, g); }, [&g](const SliceLengthExprNode &n) { return emit(n, g); }, [&g](const SlicePtrExprNode &n) { return emit(n, g); }, diff --git a/lib/src/Parser/AST/toString.cpp b/lib/src/Parser/AST/toString.cpp index 9617e30..0b27951 100644 --- a/lib/src/Parser/AST/toString.cpp +++ b/lib/src/Parser/AST/toString.cpp @@ -62,8 +62,8 @@ namespace arti::lang::ast { std::string toString(const StructLtrlPositionalInitializerNode&, std::string); std::string toString(const StructLtrlInitializerNode &, std::string); std::string toString(const IdentifierExprNode &, std::string); - std::string toString(const UnaryExprNode &, std::string); - std::string toString(const BinaryExprNode &, std::string); + std::string toString(const PrefixExprNode &, std::string); + std::string toString(const InfixExprNode &, std::string); std::string toString(const AssignExprNode &, std::string); std::string toString(const CompoundAssignExprNode &, std::string); std::string toString(const FunctionCallExprNode &, std::string); @@ -71,7 +71,7 @@ namespace arti::lang::ast { std::string toString(const SliceRangeExprNode &, std::string); std::string toString(const MemberAccessExprNode &, std::string); std::string toString(const PointerAccessExprNode &, std::string); - std::string toString(const ScopeAccessExprNode &, std::string); + std::string toString(const ModuleAccessExprNode &, std::string); std::string toString(const ReflectionExprNode &, std::string); std::string toString(const SliceCreationExprNode &, std::string); std::string toString(const SliceLengthExprNode &, std::string); @@ -100,8 +100,8 @@ namespace arti::lang::ast { std::string toString(const ElseBranchNode &, std::string); std::string toString(const DeferableNode &, std::string); std::string toString(const PreLoopStmtNode &, std::string); - std::string toString(UnaryOperator op); - std::string toString(BinaryOperator op); + std::string toString(PrefixOperator op); + std::string toString(InfixOperator op); std::string toString(CompoundAssignOperator op); const auto StrTreeNoNode = "│ "; @@ -725,9 +725,9 @@ namespace arti::lang::ast { return std::format("Identifier `{}`", node->identifierName); } - std::string toString(const UnaryExprNode &node, std::string prefix) { + std::string toString(const PrefixExprNode &node, std::string prefix) { std::stringstream ss; - ss << "UnaryExpression"; + ss << "PrefixExpression"; int total = 2; int emitted = 0; appendGroupLeaf( @@ -741,9 +741,9 @@ namespace arti::lang::ast { return ss.str(); } - std::string toString(const BinaryExprNode &node, std::string prefix) { + std::string toString(const InfixExprNode &node, std::string prefix) { std::stringstream ss; - ss << "BinaryExpression"; + ss << "InfixExpression"; int total = 3; int emitted = 0; appendGroupLeaf( @@ -849,7 +849,7 @@ namespace arti::lang::ast { return ss.str(); } - std::string toString(const ScopeAccessExprNode &node, std::string prefix) { + std::string toString(const ModuleAccessExprNode &node, std::string prefix) { std::stringstream ss; ss << "ScopeAccessExpression"; int total = 2; @@ -943,10 +943,10 @@ namespace arti::lang::ast { [padding](const IdentifierExprNode &node) -> std::string { return toString(node, padding); }, - [padding](const UnaryExprNode &node) -> std::string { + [padding](const PrefixExprNode &node) -> std::string { return toString(node, padding); }, - [padding](const BinaryExprNode &node) -> std::string { + [padding](const InfixExprNode &node) -> std::string { return toString(node, padding); }, [padding](const AssignExprNode &node) -> std::string { @@ -970,7 +970,7 @@ namespace arti::lang::ast { [padding](const PointerAccessExprNode &node) -> std::string { return toString(node, padding); }, - [padding](const ScopeAccessExprNode &node) -> std::string { + [padding](const ModuleAccessExprNode &node) -> std::string { return toString(node, padding); }, [padding](const SliceCreationExprNode &node) -> std::string { @@ -1466,46 +1466,60 @@ namespace arti::lang::ast { return std::visit(visitor, node); } - std::string toString(UnaryOperator op) { - using enum UnaryOperator; + std::string toString(PrefixOperator op) { + using enum PrefixOperator; switch (op) { case Not: return "Not (!)"; case Minus: return "Minus (-)"; case BitNot: return "BitNot (~)"; - case Ampersand: return "Ampersand (&)"; - case Star: return "Star (*)"; + case MemPtr: return "MemPtr (&)"; + case DerefPtr: return "DerefPtr (*)"; default: std::unreachable(); break; } std::unreachable(); } - std::string toString(BinaryOperator op) { - using enum BinaryOperator; - + std::string toString(InfixOperator op) { + using enum InfixOperator; switch (op) { - case Equal: return "Equal (==)"; - case NotEqual: return "NotEqual (!=)"; - case GreaterThan: return "GreaterThan (>)"; - case LessThan: return "LessThan (<)"; - case GreaterEqual: return "GreaterEqual (>=)"; - case LessEqual: return "LessEqual (<=)"; - case BitAnd: return "BitAnd (&)"; - case BitXor: return "BitXor (^)"; - case BitOr: return "BitOr (|)"; - case LeftShift: return "LeftShift (<<)"; - case RightShift: return "RightShift (>>)"; - case Adition: return "Adition (+)"; - case Substraction: return "Substraction (-)"; - case Multiplication: return "Multiplication (*)"; - case Division: return "Division (/)"; - case Modulo: return "Modulo (%)"; - case BoolAnd: return "BoolAnd (&&)"; - case BoolOr: return "BoolOr (||)"; - default: std::unreachable(); break; - } + case Equal: return "Equal (==)"; + case NotEqual: return "NotEqual (!=)"; + case GreaterThan: return "GreaterThan (>)"; + case LessThan: return "LessThan (<)"; + case GreaterEqual: return "GreaterEqual (>=)"; + case LessEqual: return "LessEqual (<=)"; + case BitAnd: return "BitAnd (&)"; + case BitXor: return "BitXor (^)"; + case BitOr: return "BitOr (|)"; + case LeftShift: return "LeftShift (<<)"; + case RightShift: return "RightShift (>>)"; + case Addition: return "Addition (+)"; + case Substraction: return "Substraction (-)"; + case Multiplication: return "Multiplication (*)"; + case Division: return "Division (/)"; + case Modulo: return "Modulo (%)"; + case BoolAnd: return "BoolAnd (&&)"; + case BoolOr: return "BoolOr (||)"; + case Assignment: return "Assignment (=)"; + case ModuleAccess: return "ModuleAccess (::)"; + case MemberAccess: return "MemberAccess (.)"; + case PointerMemberAccess: return "PointerMemberAccess (->)"; + case AdditionAssignment: return "AdditionAssignment (+=)"; + case SubstractionAssignment: return "SubstractionAssignment (-=)"; + case MultiplicationAssignment: return "MultiplicationAssignment (*=)"; + case DivisionAssignment: return "DivisionAssignment (/=)"; + case ModuloAssignment: return "ModuloAssignment (%=)"; + case BitAndAssignment: return "BitAndAssignment (&=)"; + case BitOrAssignment: return "BitOrAssignment (|=)"; + case BoolAndAssignment: return "BoolAndAssignment (&&=)"; + case BoolOrAssignment: return "BoolOrAssignment (||=)"; + case LShiftAssignment: return "LShiftAssignment (<<=)"; + case RShiftAssignment: return "RShiftAssignment (>>=)"; + default: std::unreachable(); break; + } std::unreachable(); } diff --git a/lib/src/Parser/Expressions.cpp b/lib/src/Parser/Expressions.cpp index 94039e0..c7ffb89 100644 --- a/lib/src/Parser/Expressions.cpp +++ b/lib/src/Parser/Expressions.cpp @@ -21,12 +21,530 @@ //============================================================================// #include +#include namespace arti::lang { Expected - Parser::parseExpression() { - return {}; + Parser::parseExpression(std::uint16_t minBindingPower) { + auto peekToken = tokenizer.peek(); + if (! peekToken) { + return Unexpected<>{ std::move(peekToken).error() }; + } + + bool keepParsing = true; + ast::Optional lhs = std::nullopt; + + if (peekToken->value == TokenV::opLParen) { + std::ignore = tokenizer.consume(); + + if (auto lhsExpr = parseExpression(); ! lhsExpr) { + return Unexpected<>{ std::move(lhsExpr).error() }; + } + else { + if (auto rParen = consume(TokenV::opRParen, "')'"); ! rParen) { + return Unexpected<>{ std::move(rParen).error() }; + } + + lhs = std::move(lhsExpr).value(); + } + } + else if (pratt::isPrefixOperator(peekToken->value)) { + if (auto newLhs = parsePrefixExpression(); ! newLhs) { + return Unexpected<>{ std::move(newLhs).error() }; + } + else { + lhs = std::move(newLhs).value(); + } + } + else { + if (auto expr = parsePrimaryExpression(); ! expr) { + return Unexpected<>{ std::move(expr).error() }; + } + else if (not expr.value().has_value()) { + return langException( + peekToken->line, + peekToken->column, + toString(*peekToken), + "primary expression, i.e. " + "any of ( null, boolean, number, character, string, identifier )" + ); + } + else { + lhs = std::move(expr).value().value(); + } + } + + while (keepParsing) { + peekToken = tokenizer.peek(); + if (! peekToken) { + return Unexpected<>{ std::move(peekToken).error() }; + } + + if (pratt::isPostfixOperator(peekToken->value)) { + auto op = pratt::getPostfixOperator(peekToken->value); + auto bindingPower = pratt::postfixBindingPower(op); + + if (bindingPower < minBindingPower) { + keepParsing = false; + } + else { + if (auto newLhs = parsePostfixExpression(std::move(lhs).value()); + ! newLhs) { + return Unexpected<>{ std::move(newLhs).error() }; + } + else { + lhs = std::move(newLhs).value(); + } + } + } + else if (pratt::isInfixOperator(peekToken->value)) { + auto op = pratt::getInfixOperator(peekToken->value); + auto [lbp, rbp] = pratt::infixBindingPower(op); + + if (lbp < minBindingPower) { + keepParsing = false; + } + else { + if (auto newLhs = parseInfixExpression(std::move(lhs).value()); + ! newLhs) { + return Unexpected<>{ std::move(newLhs).error() }; + } + else { + lhs = std::move(newLhs).value(); + } + } + } + else { + keepParsing = false; + } + } + + if (not lhs.has_value()) { + return langException( + peekToken->line, + peekToken->column, + toString(*peekToken), + "primary expression, i.e. " + "any of ( null, boolean, number, character, string, identifier )" + ); + } + else { + return std::move(lhs).value(); + } + } + + Expected> + Parser::parsePrimaryExpression() { + auto peekToken = tokenizer.peek(); + + if (! peekToken) { + return Unexpected<>{ std::move(peekToken).error() }; + } + + if (peekToken->value == TokenV::tkInteger) { + if (auto expr = parseIntegerLiteral(); ! expr) { + return Unexpected<>{ std::move(expr).error() }; + } + else { + return std::move(expr).value(); + } + } + else if (peekToken->value == TokenV::tkDecimal) { + if (auto expr = parseFloatLiteral(); ! expr) { + return Unexpected<>{ std::move(expr).error() }; + } + else { + return std::move(expr).value(); + } + } + else if (peekToken->value == TokenV::tkCharacter) { + if (auto expr = parseCharLiteral(); ! expr) { + return Unexpected<>{ std::move(expr).error() }; + } + else { + return std::move(expr).value(); + } + } + else if (peekToken->value == TokenV::tkString) { + if (auto expr = parseStringLiteral(); ! expr) { + return Unexpected<>{ std::move(expr).error() }; + } + else { + return std::move(expr).value(); + } + } + else if (peekToken->value == TokenV::kwTrue || + peekToken->value == TokenV::kwFalse) { + if (auto expr = parseBooleanLiteral(); ! expr) { + return Unexpected<>{ std::move(expr).error() }; + } + else { + return std::move(expr).value(); + } + } + else if (peekToken->value == TokenV::kwNull) { + if (auto expr = parseNullLiteral(); ! expr) { + return Unexpected<>{ std::move(expr).error() }; + } + else { + return std::move(expr).value(); + } + } + else if (peekToken->value == TokenV::tkIdentifier) { + if (auto expr = parseIdentifierExpression(); ! expr) { + return Unexpected<>{ std::move(expr).error() }; + } + else { + return std::move(expr).value(); + } + } + else if (peekToken->value == TokenV::kwThis) { + auto node = ast::MakeNode(); + + if (auto ltrl = consume(TokenV::kwThis, "'this' keyword"); ! ltrl) { + return Unexpected<>{ std::move(ltrl).error() }; + } + else { + node->location = { + .line = ltrl->line, + .column = ltrl->column + }; + node->identifierName = ltrl->strValue; + } + + return node; + } + else if (peekToken->value == TokenV::kwUnderscore) { + auto node = ast::MakeNode(); + + if (auto ltrl = consume(TokenV::kwUnderscore, "'_' keyword"); ! ltrl) { + return Unexpected<>{ std::move(ltrl).error() }; + } + else { + node->location = { + .line = ltrl->line, + .column = ltrl->column + }; + node->identifierName = ltrl->strValue; + } + + return node; + } + + return std::nullopt; } + Expected + Parser::parsePrefixExpression() { + auto peekToken = tokenizer.peek(); + if (! peekToken) { + return Unexpected<>{ std::move(peekToken).error() }; + } + + auto op = pratt::getPrefixOperator(peekToken->value); + auto bindingPower = pratt::prefixBindingPower(op); + + std::ignore = tokenizer.consume(); + + auto rhs = parseExpression(bindingPower); + + auto node = ast::MakeNode(); + + node->location = { + .line = peekToken->line, + .column = peekToken->column + }; + + node->op = op; + node->right = std::move(rhs).value(); + + return node; + } + + Expected + Parser::parseInfixExpression(ast::ExpressionNode lhs) { + auto peekToken = tokenizer.peek(); + if (! peekToken) { + return Unexpected<>{ std::move(peekToken).error() }; + } + + std::ignore = tokenizer.consume(); + + auto op = pratt::getInfixOperator(peekToken->value); + auto [lbp, rbp] = pratt::infixBindingPower(op); + + auto rhs = parseExpression(rbp); + + if (! rhs) { + return Unexpected<>{ std::move(rhs).error() }; + } + + /* TODO: MemberAccess and PointerMemberAccess do not use their respective + * nodes types yet */ + /* TODO: ModuleAccess do not use its respective node type yet */ + if (op == ast::InfixOperator::Assignment) { + auto node = ast::MakeNode(); + + node->location = { + .line = peekToken->line, + .column = peekToken->column + }; + + node->left = std::move(lhs); + node->right = std::move(rhs).value(); + + return node; + } + else if (pratt::isCompoundAssignOperator(op)) { + auto node = ast::MakeNode(); + + node->location = { + .line = peekToken->line, + .column = peekToken->column + }; + + node->op = pratt::getCompoundOperatorType(op); + node->left = std::move(lhs); + node->right = std::move(rhs).value(); + + return node; + } + else { + auto node = ast::MakeNode(); + + node->location = { + .line = peekToken->line, + .column = peekToken->column + }; + + node->op = op; + node->left = std::move(lhs); + node->right = std::move(rhs).value(); + + return node; + } + } + + Expected + Parser::parsePostfixExpression(ast::ExpressionNode lhs) { + auto peekToken = tokenizer.peek(); + if (! peekToken) { + return Unexpected<>{ std::move(peekToken).error() }; + } + + std::ignore = tokenizer.consume(); + + auto op = pratt::getPostfixOperator(peekToken->value); + auto bindingPower = pratt::postfixBindingPower(op); + + std::optional node = std::nullopt; + + if (op == ast::PostfixOperator::FunctionCall) { + auto newNode = ast::MakeNode(); + + newNode->location = { + .line = peekToken->line, + .column = peekToken->column + }; + + bool stillParams = true; + + if (auto close = match(TokenV::opRParen); ! close) { + return Unexpected<>{ std::move(close).error() }; + } + else if (close.value()) { + stillParams = false; + } + + while (stillParams) { + auto arg = parseExpression(); + + if (! arg) { + return Unexpected<>{ std::move(arg).error() }; + } + + newNode->arguments.emplace_back(std::move(arg).value()); + + if (auto comma = matchAndConsume(TokenV::opComma); ! comma) { + return Unexpected{ std::move(comma).error() }; + } + else if (! comma.value()) { + if (auto ntok = tokenizer.peek(); ! ntok) { + return Unexpected{ std::move(ntok).error() }; + } + else { + if (ntok->value != TokenV::opRParen) { + return langException( + ntok->line, + ntok->column, + toString(*ntok), + "',' or ')'" + ); + } + else { + stillParams = false; + } + } + } + + if (auto close = match(TokenV::opRParen); ! close) { + return Unexpected<>{ std::move(close).error() }; + } + else if (close.value()) { + stillParams = false; + } + } + + if (auto close = consume(TokenV::opRParen, "')'"); ! close) { + return Unexpected<>{ std::move(close).error() }; + } + + newNode->callee = std::move(lhs); + + node = std::move(newNode); + } + else if (op == ast::PostfixOperator::SliceAccess) { + auto idx = parseExpression(); + + if (! idx) { + return Unexpected<>{ std::move(idx).error() }; + } + + if (auto range = matchAndConsume(TokenV::opColon); ! range) { + return Unexpected<>{ std::move(range).error() }; + } + else if (range.value()) { + auto newNode = ast::MakeNode(); + + newNode->location = { + .line = peekToken->line, + .column = peekToken->column + }; + + newNode->start = std::move(idx).value(); + + auto endIdx = parseExpression(); + + if (! endIdx) { + return Unexpected<>{ std::move(endIdx).error() }; + } + + newNode->end = std::move(endIdx).value(); + + if (auto close = consume(TokenV::opRBracket, "']'"); ! close) { + return Unexpected<>{ std::move(close).error() }; + } + + newNode->slice = std::move(lhs); + + node = std::move(newNode); + } + else { + auto newNode = ast::MakeNode(); + + newNode->location = { + .line = peekToken->line, + .column = peekToken->column + }; + + newNode->index = std::move(idx).value(); + + if (auto close = consume(TokenV::opRBracket, "']'"); ! close) { + return Unexpected<>{ std::move(close).error() }; + } + + newNode->slice = std::move(lhs); + + node = std::move(newNode); + } + } + else if (op == ast::PostfixOperator::SliceSize) { + auto newNode = ast::MakeNode(); + + newNode->location = { + .line = peekToken->line, + .column = peekToken->column + }; + + newNode->object = std::move(lhs); + + node = std::move(newNode); + } + else if (op == ast::PostfixOperator::PtrToSlice) { + auto newNode = ast::MakeNode(); + + newNode->location = { + .line = peekToken->line, + .column = peekToken->column + }; + + auto len = parseExpression(); + + if (! len) { + return Unexpected<>{ std::move(len).error() }; + } + + newNode->length = std::move(len).value(); + + if (auto close = consume(TokenV::opRBracket, "']'"); ! close) { + return Unexpected<>{ std::move(close).error() }; + } + + newNode->object = std::move(lhs); + + node = std::move(newNode); + } + else if (op == ast::PostfixOperator::SliceToPtr) { + auto newNode = ast::MakeNode(); + + newNode->location = { + .line = peekToken->line, + .column = peekToken->column + }; + + newNode->object = std::move(lhs); + + node = std::move(newNode); + } + else if (op == ast::PostfixOperator::Reflect) { + auto newNode = ast::MakeNode(); + + newNode->location = { + .line = peekToken->line, + .column = peekToken->column + }; + + if (auto ident = match(TokenV::tkIdentifier); ! ident) { + return Unexpected<>{ std::move(ident).error() }; + } + else if (ident.value()) { + if (auto ident = consume(TokenV::tkIdentifier, "identifier"); + ! ident) { + return Unexpected<>{ std::move(ident).error() }; + } + else { + newNode->attribute = ident->strValue; + } + } + + newNode->object = std::move(lhs); + + node = std::move(newNode); + } + + if (not node.has_value()) { + return langException( + peekToken->line, + peekToken->column, + toString(*peekToken), + "postfix operator, i.e. " + "any of ( /*TODO*/ )" + ); + } + + return std::move(node).value(); + } + + } // namespace arti::lang diff --git a/lib/src/Parser/Literals.cpp b/lib/src/Parser/Literals.cpp index c03cfc7..ebb668a 100644 --- a/lib/src/Parser/Literals.cpp +++ b/lib/src/Parser/Literals.cpp @@ -20,3 +20,159 @@ // // //============================================================================// +#include + +namespace arti::lang { + + Expected + Parser::parseCharLiteral() { + auto node = ast::MakeNode(); + + if (auto ltrl = consume(TokenV::tkCharacter, "character literal"); ! ltrl) { + return Unexpected<>{ std::move(ltrl).error() }; + } + else { + node->location = { + .line = ltrl->line, + .column = ltrl->column + }; + node->value = static_cast(ltrl->strValue[1]); + } + + return node; + } + + Expected + Parser::parseNullLiteral() { + auto node = ast::MakeNode(); + + if (auto ltrl = consume(TokenV::kwNull, "null keyword"); ! ltrl) { + return Unexpected<>{ std::move(ltrl).error() }; + } + else { + node->location = { + .line = ltrl->line, + .column = ltrl->column + }; + } + + return node; + } + + Expected + Parser::parseStringLiteral() { + auto node = ast::MakeNode(); + + if (auto ltrl = consume(TokenV::tkString, "string literal"); ! ltrl) { + return Unexpected<>{ std::move(ltrl).error() }; + } + else { + node->location = { + .line = ltrl->line, + .column = ltrl->column + }; + + ltrl->strValue.remove_suffix(1); + ltrl->strValue.remove_prefix(1); + + node->value = ltrl->strValue; + } + + return node; + } + + Expected + Parser::parseFloatLiteral() { + auto node = ast::MakeNode(); + + if (auto ltrl = consume(TokenV::tkDecimal, "number literal"); ! ltrl) { + return Unexpected<>{ std::move(ltrl).error() }; + } + else { + node->location = { + .line = ltrl->line, + .column = ltrl->column + }; + /* TODO: This could throw? */ + std::string value{ ltrl->strValue }; + node->value = std::stold(value); + } + + return node; + } + + Expected + Parser::parseIntegerLiteral() { + auto node = ast::MakeNode(); + + if (auto ltrl = consume(TokenV::tkInteger, "integer literal"); ! ltrl) { + return Unexpected<>{ std::move(ltrl).error() }; + } + else { + node->location = { + .line = ltrl->line, + .column = ltrl->column + }; + /* TODO: This could throw? */ + std::string value{ ltrl->strValue }; + node->value = std::stoul(value); + } + + return node; + } + + Expected + Parser::parseBooleanLiteral() { + auto node = ast::MakeNode(); + + auto peekToken = tokenizer.peek(); + + if (! peekToken) { + return Unexpected<>{ std::move(peekToken).error() }; + } + + node->location = { + .line = peekToken->line, + .column = peekToken->column + }; + + if (peekToken->value == TokenV::kwTrue) { + node->value = true; + } + else if (peekToken->value == TokenV::kwFalse) { + node->value = false; + } + else { + return langException( + peekToken->line, + peekToken->column, + toString(*peekToken), + "boolean literal, i.e. " + "any of ( true, false )" + ); + } + + std::ignore = tokenizer.consume(); + + return node; + } + + Expected + Parser::parseIdentifierExpression() { + auto node = ast::MakeNode(); + + if (auto ltrl = consume(TokenV::tkIdentifier, "identifier"); ! ltrl) { + return Unexpected<>{ std::move(ltrl).error() }; + } + else { + node->location = { + .line = ltrl->line, + .column = ltrl->column + }; + node->identifierName = ltrl->strValue; + } + + return node; + } + +} // namespace arti::lang diff --git a/lib/src/Parser/Operators.cpp b/lib/src/Parser/Operators.cpp new file mode 100644 index 0000000..ce9b9a9 --- /dev/null +++ b/lib/src/Parser/Operators.cpp @@ -0,0 +1,288 @@ +//============================================================================// +// // +// artichoke programming language // +// // +// Copyright (C) 2025 Erick Saul Guzman Ramos, whoami.artichoke.dev // +// // +// // +// This program is free software: you can redistribute it and/or modify // +// it under the terms of the GNU Affero General Public License as published // +// by the Free Software Foundation, either version 3 of the License, or // +// (at your option) any later version. // +// // +// This program is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // +// GNU Affero General Public License for more details. // +// // +// You should have received a copy of the GNU Affero General Public License // +// along with this program. If not, see . // +// // +//============================================================================// + +#include + +namespace arti::lang::pratt { + + bool isPrefixOperator(TokenV tokenType) { + switch(tokenType) { + using enum TokenV; + + case opHyphen: + case opBang: + case opStar: + case opTilde: + case opAnd: + case opLParen: + case kwNot: + return true; + + default: + return false; + } + } + + bool isInfixOperator(TokenV tokenType) { + switch(tokenType) { + using enum TokenV; + + case opDot: + case opMod: + case opPlus: + case opHyphen: + case opSlash: + case opStar: + case opAssign: + case opAccess: + case opEq: + case opNeq: + case opLt: + case opGt: + case opLtEq: + case opGtEq: + case opLShift: + case opRShift: + case opBoolAnd: + case opBoolOr: + case kwAnd: + case kwOr: + case opAnd: + case opOr: + case opCaret: + case opArrow: + case opPlusAssign: + case opHyphenAssign: + case opStarAssign: + case opSlashAssign: + case opModAssign: + case opAndAssign: + case opOrAssign: + case opLShiftAssign: + case opRShiftAssign: + case opBoolAndAssign: + case opBoolOrAssign: + return true; + default: + return false; + } + } + + bool isPostfixOperator(TokenV tokenType) { + switch(tokenType) { + using enum TokenV; + + case opLParen: + case opLBracket: + case opSliceSize: + case opPtrSlice: + case opSlicePtr: + case opReflect: + return true; + default: + return false; + } + } + + + ast::PrefixOperator getPrefixOperator(TokenV tokenType) { + using enum ast::PrefixOperator; + + switch(tokenType) { + using enum TokenV; + + case opHyphen: + return Minus; + case kwNot: + case opBang: + return Not; + case opStar: + return DerefPtr; + case opTilde: + return BitNot; + case opAnd: + return MemPtr; + default: + return Uninitialized; + } + } + + ast::InfixOperator getInfixOperator(TokenV tokenType) { + using enum ast::InfixOperator; + + switch(tokenType) { + using enum TokenV; + + case opMod: + return Modulo; + case opPlus: + return Addition; + case opHyphen: + return Substraction; + case opSlash: + return Division; + case opStar: + return Multiplication; + case opEq: + return Equal; + case opNeq: + return NotEqual; + case opLt: + return LessThan; + case opGt: + return GreaterThan; + case opLtEq: + return LessEqual; + case opGtEq: + return GreaterEqual; + case opLShift: + return LeftShift; + case opRShift: + return RightShift; + case kwAnd: + case opBoolAnd: + return BoolAnd; + case kwOr: + case opBoolOr: + return BoolOr; + case opAnd: + return BitAnd; + case opOr: + return BitOr; + case opCaret: + return BitXor; + case opAssign: + return Assignment; + case opAccess: + return ModuleAccess; + case opDot: + return MemberAccess; + case opArrow: + return PointerMemberAccess; + case opPlusAssign: + return AdditionAssignment; + case opHyphenAssign: + return SubstractionAssignment; + case opStarAssign: + return MultiplicationAssignment; + case opSlashAssign: + return DivisionAssignment; + case opModAssign: + return ModuloAssignment; + case opAndAssign: + return BitAndAssignment; + case opOrAssign: + return BitOrAssignment; + case opLShiftAssign: + return LShiftAssignment; + case opRShiftAssign: + return RShiftAssignment; + case opBoolAndAssign: + return BoolAndAssignment; + case opBoolOrAssign: + return BoolOrAssignment; + default: + return Uninitialized; + } + } + + ast::PostfixOperator getPostfixOperator(TokenV tokenType) { + using enum ast::PostfixOperator; + + switch(tokenType) { + using enum TokenV; + + case opLParen: + return FunctionCall; + case opLBracket: + return SliceAccess; + case opSliceSize: + return SliceSize; + case opPtrSlice: + return PtrToSlice; + case opSlicePtr: + return SliceToPtr; + case opReflect: + return Reflect; + default: + return Uninitialized; + } + } + + bool isCompoundAssignOperator(ast::InfixOperator op) { + + switch(op) { + using enum ast::InfixOperator; + + case AdditionAssignment: + case SubstractionAssignment: + case MultiplicationAssignment: + case DivisionAssignment: + case ModuloAssignment: + case BitAndAssignment: + case BitOrAssignment: + case LShiftAssignment: + case RShiftAssignment: + case BoolAndAssignment: + case BoolOrAssignment: + return true; + + default: + return false; + } + + } + + ast::CompoundAssignOperator getCompoundOperatorType(ast::InfixOperator op) { + switch(op) { + using enum ast::InfixOperator; + + case AdditionAssignment: + return ast::CompoundAssignOperator::Addition; + case SubstractionAssignment: + return ast::CompoundAssignOperator::Substraction; + case MultiplicationAssignment: + return ast::CompoundAssignOperator::Multiplication; + case DivisionAssignment: + return ast::CompoundAssignOperator::Division; + case ModuloAssignment: + return ast::CompoundAssignOperator::Modulo; + case BitAndAssignment: + return ast::CompoundAssignOperator::BitAnd; + case BitOrAssignment: + return ast::CompoundAssignOperator::BitOr; + case LShiftAssignment: + return ast::CompoundAssignOperator::LeftShift; + case RShiftAssignment: + return ast::CompoundAssignOperator::RightShift; + case BoolAndAssignment: + return ast::CompoundAssignOperator::BoolAnd; + case BoolOrAssignment: + return ast::CompoundAssignOperator::BoolOr; + + default: + return ast::CompoundAssignOperator::Uninitialized; + } + } + +} + diff --git a/lib/src/Parser/Pratt.cpp b/lib/src/Parser/Pratt.cpp new file mode 100644 index 0000000..48516fb --- /dev/null +++ b/lib/src/Parser/Pratt.cpp @@ -0,0 +1,111 @@ +//============================================================================// +// // +// artichoke programming language // +// // +// Copyright (C) 2025 Erick Saul Guzman Ramos, whoami.artichoke.dev // +// // +// // +// This program is free software: you can redistribute it and/or modify // +// it under the terms of the GNU Affero General Public License as published // +// by the Free Software Foundation, either version 3 of the License, or // +// (at your option) any later version. // +// // +// This program is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // +// GNU Affero General Public License for more details. // +// // +// You should have received a copy of the GNU Affero General Public License // +// along with this program. If not, see . // +// // +//============================================================================// + +#include + +namespace arti::lang::pratt { + + std::uint16_t prefixBindingPower(ast::PrefixOperator op) { + switch (op) { + // Unary operators generally bind very tightly + case ast::PrefixOperator::Not: + case ast::PrefixOperator::Minus: + case ast::PrefixOperator::BitNot: + case ast::PrefixOperator::MemPtr: + case ast::PrefixOperator::DerefPtr: + return 17; // Should be higher than most infix but lower than postfix + default: return 0; + } + } + + BindingPower infixBindingPower(ast::InfixOperator op) { + switch (op) { + // Member Access (Highest) + case ast::InfixOperator::ModuleAccess: + case ast::InfixOperator::MemberAccess: + case ast::InfixOperator::PointerMemberAccess: return { 21, 22 }; + + // Multiplicative + case ast::InfixOperator::Multiplication: + case ast::InfixOperator::Division: + case ast::InfixOperator::Modulo: return { 15, 16 }; + + // Additive + case ast::InfixOperator::Addition: + case ast::InfixOperator::Substraction: return { 13, 14 }; + + // Shift + case ast::InfixOperator::LeftShift: + case ast::InfixOperator::RightShift: return { 11, 12 }; + + // Relational + case ast::InfixOperator::LessThan: + case ast::InfixOperator::GreaterThan: + case ast::InfixOperator::LessEqual: + case ast::InfixOperator::GreaterEqual: return { 9, 10 }; + + // Equality + case ast::InfixOperator::Equal: + case ast::InfixOperator::NotEqual: return { 7, 8 }; + + // Bitwise + case ast::InfixOperator::BitAnd: return { 6, 7 }; + case ast::InfixOperator::BitXor: return { 5, 6 }; + case ast::InfixOperator::BitOr: return { 4, 5 }; + + // Logical + case ast::InfixOperator::BoolAnd: return { 3, 4 }; + case ast::InfixOperator::BoolOr: return { 1, 2 }; + + // Assignment (Right-associative: left > right) + case ast::InfixOperator::Assignment: + case ast::InfixOperator::AdditionAssignment: + case ast::InfixOperator::SubstractionAssignment: + case ast::InfixOperator::MultiplicationAssignment: + case ast::InfixOperator::DivisionAssignment: + case ast::InfixOperator::ModuloAssignment: + case ast::InfixOperator::BitAndAssignment: + case ast::InfixOperator::BitOrAssignment: + case ast::InfixOperator::BoolAndAssignment: + case ast::InfixOperator::BoolOrAssignment: + case ast::InfixOperator::LShiftAssignment: + case ast::InfixOperator::RShiftAssignment: return { 2, 1 }; + + default: return { 0, 0 }; + } + } + + std::uint16_t postfixBindingPower(ast::PostfixOperator op) { + switch (op) { + // Postfix usually has the highest precedence (e.g., function calls, + // slicing) + case ast::PostfixOperator::FunctionCall: + case ast::PostfixOperator::SliceAccess: + case ast::PostfixOperator::SliceSize: + case ast::PostfixOperator::PtrToSlice: + case ast::PostfixOperator::SliceToPtr: + case ast::PostfixOperator::Reflect: return 19; + default: return 0; + } + } + +} // namespace arti::lang::pratt