fix(parser): support empty blocks, support nested scoping, and refine loop lookahead
Signed-off-by: erick-alcachofa <erick@artichoke.dev>
This commit addresses several critical issues in the recursive descent
parser, specifically regarding the handling of empty constructs,
statement termination, and AST representation of nested scopes. These
changes bring the implementation in line with the Artichoke EBNF
specification.
* **CodeBlock as Statement:** Added `CodeBlockStmtNode` to the
`StatementNode` variant. This allows a bare `{}` to be treated as a
valid statement, enabling manual scoping within functions.
* **Visitor Support:** Updated `toDot.cpp` (Graphviz) and `toString.cpp`
(Pretty-print) to support the new `CodeBlockStmtNode` during AST
traversal.
* **Empty Member Lists:** Implemented a pre-loop check for the closing
brace `}` in `parseStruct` and `parseEnum`. This prevents the parser
from attempting to parse members in empty declarations (e.g., `struct
Empty {}`).
* **Diagnostic Accuracy:** Enhanced the member-parsing loop to provide
better error context. If a member is not followed by a comma or a
closing brace, the parser now explicitly suggests `',' or '}'` as the
expected tokens.
* **Nested Scopes:** The parser now correctly identifies a `{` at the
start of a statement and dispatches to `parseCodeBlock`.
* **Empty Code Blocks:** Added a guard in the block-parsing loop to
check for `}` immediately after `{`, allowing functions or nested
scopes to be empty.
* **C-Style For-Loops:** Replaced `match` with `matchAndConsume` for the
initialization semicolon. This allows the parser to correctly handle
loops where the initialization is omitted (e.g., `for (; 1; 1)`).
* **Correctness:** Resolves parser hangs or errors when encountering
empty blocks.
* **Compliance:** Fully supports the EBNF definition of zero-or-more
members/statements.
* **Visuals:** AST diagrams now accurately reflect nested block
structures.
This commit is contained in:
parent
a3d5c0ac68
commit
30d64d9b65
@ -93,7 +93,8 @@ namespace arti::lang::ast {
|
||||
WhileStmtNode,
|
||||
DoWhileStmtNode,
|
||||
InfLoopStmtNode,
|
||||
ExpressionStmtNode
|
||||
ExpressionStmtNode,
|
||||
CodeBlockStmtNode
|
||||
>;
|
||||
|
||||
using ElseBranchNode = Variant<
|
||||
|
||||
@ -1002,6 +1002,7 @@ namespace arti::lang::ast {
|
||||
[&g](const DoWhileStmtNode &n) { return emit(n, g); },
|
||||
[&g](const InfLoopStmtNode &n) { return emit(n, g); },
|
||||
[&g](const ExpressionStmtNode &n) { return emit(n, g); },
|
||||
[&g](const CodeBlockStmtNode &n) { return emit(n, g); },
|
||||
};
|
||||
return std::visit(visitor, node);
|
||||
}
|
||||
|
||||
@ -1419,6 +1419,9 @@ namespace arti::lang::ast {
|
||||
[padding](const ExpressionStmtNode &node) -> std::string {
|
||||
return toString(node, padding);
|
||||
},
|
||||
[padding](const CodeBlockStmtNode &node) -> std::string {
|
||||
return toString(node, padding);
|
||||
}
|
||||
};
|
||||
|
||||
return std::visit(visitor, node);
|
||||
|
||||
@ -468,6 +468,13 @@ namespace arti::lang {
|
||||
|
||||
bool keepParsing = true;
|
||||
|
||||
if (auto close = match(TokenV::opRSquirly); ! close) {
|
||||
return Unexpected{ std::move(close).error() };
|
||||
}
|
||||
else if (close.value()) {
|
||||
keepParsing = false;
|
||||
}
|
||||
|
||||
while (keepParsing) {
|
||||
if (auto member = parseStructMember(); ! member) {
|
||||
return Unexpected{ std::move(member).error() };
|
||||
@ -479,13 +486,22 @@ namespace arti::lang {
|
||||
if (auto comma = matchAndConsume(TokenV::opComma); ! comma) {
|
||||
return Unexpected<>{ std::move(comma).error() };
|
||||
}
|
||||
|
||||
if (auto peekToken = tokenizer.peek(); ! peekToken) {
|
||||
return Unexpected{ std::move(peekToken).error() };
|
||||
}
|
||||
else {
|
||||
if (peekToken->value == TokenV::opRSquirly) {
|
||||
keepParsing = false;
|
||||
else if (! comma.value()) {
|
||||
if (auto peekToken = tokenizer.peek(); ! peekToken) {
|
||||
return Unexpected{ std::move(peekToken).error() };
|
||||
}
|
||||
else {
|
||||
if (peekToken->value != TokenV::opRSquirly) {
|
||||
return langException<ExceptCode::ecUnexpectedToken>(
|
||||
peekToken->line,
|
||||
peekToken->column,
|
||||
toString(*peekToken),
|
||||
"',' or '}'"
|
||||
);
|
||||
}
|
||||
else {
|
||||
keepParsing = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -533,6 +549,13 @@ namespace arti::lang {
|
||||
|
||||
bool keepParsing = true;
|
||||
|
||||
if (auto close = match(TokenV::opRSquirly); ! close) {
|
||||
return Unexpected{ std::move(close).error() };
|
||||
}
|
||||
else if (close.value()) {
|
||||
keepParsing = false;
|
||||
}
|
||||
|
||||
while (keepParsing) {
|
||||
if (auto member = parseEnumMember(); ! member) {
|
||||
return Unexpected{ std::move(member).error() };
|
||||
@ -544,13 +567,22 @@ namespace arti::lang {
|
||||
if (auto comma = matchAndConsume(TokenV::opComma); ! comma) {
|
||||
return Unexpected<>{ std::move(comma).error() };
|
||||
}
|
||||
|
||||
if (auto peekToken = tokenizer.peek(); ! peekToken) {
|
||||
return Unexpected{ std::move(peekToken).error() };
|
||||
}
|
||||
else {
|
||||
if (peekToken->value == TokenV::opRSquirly) {
|
||||
keepParsing = false;
|
||||
else if (! comma.value()) {
|
||||
if (auto peekToken = tokenizer.peek(); ! peekToken) {
|
||||
return Unexpected{ std::move(peekToken).error() };
|
||||
}
|
||||
else {
|
||||
if (peekToken->value != TokenV::opRSquirly) {
|
||||
return langException<ExceptCode::ecUnexpectedToken>(
|
||||
peekToken->line,
|
||||
peekToken->column,
|
||||
toString(*peekToken),
|
||||
"',' or '}'"
|
||||
);
|
||||
}
|
||||
else {
|
||||
keepParsing = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -36,6 +36,13 @@ namespace arti::lang {
|
||||
return Unexpected<>{ std::move(lsquirly).error() };
|
||||
}
|
||||
|
||||
if (auto close = match(TokenV::opRSquirly); ! close) {
|
||||
return Unexpected<>{ std::move(close).error() };
|
||||
}
|
||||
else if (close.value()) {
|
||||
keepParsing = false;
|
||||
}
|
||||
|
||||
while (keepParsing) {
|
||||
if (auto ok = parseStatement(); ! ok) {
|
||||
return Unexpected<>{ std::move(ok).error() };
|
||||
@ -50,6 +57,13 @@ namespace arti::lang {
|
||||
node->statements.push_back(std::move(stmt).value());
|
||||
}
|
||||
}
|
||||
|
||||
if (auto close = match(TokenV::opRSquirly); ! close) {
|
||||
return Unexpected<>{ std::move(close).error() };
|
||||
}
|
||||
else if (close.value()) {
|
||||
keepParsing = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (auto rsquirly = consume(TokenV::opRSquirly, "'}'"); ! rsquirly) {
|
||||
@ -300,6 +314,23 @@ namespace arti::lang {
|
||||
return ast::StatementNode{ std::move(stmt).value() };
|
||||
}
|
||||
}
|
||||
else if (peekToken->value == TokenV::opLSquirly) {
|
||||
if (label.has_value()) {
|
||||
return langException<ExceptCode::ecUnexpectedToken>(
|
||||
peekToken->line,
|
||||
peekToken->column,
|
||||
toString(*peekToken),
|
||||
"loop keyword, i.e. any of ( for, while, do, loop )"
|
||||
);
|
||||
}
|
||||
|
||||
if (auto stmt = parseCodeBlock(); ! stmt) {
|
||||
return Unexpected<>{ std::move(stmt).error() };
|
||||
}
|
||||
else {
|
||||
return ast::StatementNode{ std::move(stmt).value() };
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (label.has_value()) {
|
||||
return langException<ExceptCode::ecUnexpectedToken>(
|
||||
@ -981,7 +1012,7 @@ namespace arti::lang {
|
||||
return Unexpected<>{ std::move(lParen).error() };
|
||||
}
|
||||
|
||||
if (auto skipPre = match(TokenV::opSemicolon); ! skipPre) {
|
||||
if (auto skipPre = matchAndConsume(TokenV::opSemicolon); ! skipPre) {
|
||||
return Unexpected<>{ std::move(skipPre).error() };
|
||||
}
|
||||
else if (not skipPre.value()) {
|
||||
@ -1234,6 +1265,10 @@ namespace arti::lang {
|
||||
return Unexpected<>{ std::move(rParen).error() };
|
||||
}
|
||||
|
||||
if (auto rParen = consume(TokenV::opSemicolon, "';'"); ! rParen) {
|
||||
return Unexpected<>{ std::move(rParen).error() };
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user