fix(parser): support empty blocks, support nested scoping, and refine loop lookahead

Signed-off-by: erick-alcachofa <erick@artichoke.dev>

This commit addresses several critical issues in the recursive descent
parser, specifically regarding the handling of empty constructs,
statement termination, and AST representation of nested scopes. These
changes bring the implementation in line with the Artichoke EBNF
specification.

* **CodeBlock as Statement:** Added `CodeBlockStmtNode` to the
  `StatementNode` variant. This allows a bare `{}` to be treated as a
  valid statement, enabling manual scoping within functions.
* **Visitor Support:** Updated `toDot.cpp` (Graphviz) and `toString.cpp`
  (Pretty-print) to support the new `CodeBlockStmtNode` during AST
  traversal.

* **Empty Member Lists:** Implemented a pre-loop check for the closing
  brace `}` in `parseStruct` and `parseEnum`. This prevents the parser
  from attempting to parse members in empty declarations (e.g., `struct
  Empty {}`).
* **Diagnostic Accuracy:** Enhanced the member-parsing loop to provide
  better error context. If a member is not followed by a comma or a
  closing brace, the parser now explicitly suggests `',' or '}'` as the
  expected tokens.

* **Nested Scopes:** The parser now correctly identifies a `{` at the
  start of a statement and dispatches to `parseCodeBlock`.
* **Empty Code Blocks:** Added a guard in the block-parsing loop to
  check for `}` immediately after `{`, allowing functions or nested
  scopes to be empty.

* **C-Style For-Loops:** Replaced `match` with `matchAndConsume` for the
  initialization semicolon. This allows the parser to correctly handle
  loops where the initialization is omitted (e.g., `for (; 1; 1)`).

* **Correctness:** Resolves parser hangs or errors when encountering
  empty blocks.
* **Compliance:** Fully supports the EBNF definition of zero-or-more
  members/statements.
* **Visuals:** AST diagrams now accurately reflect nested block
  structures.
This commit is contained in:
erick-alcachofa 2025-12-26 00:28:18 -06:00
parent a3d5c0ac68
commit 30d64d9b65
Signed by: me
GPG Key ID: 6FA5F8643444BAFA
5 changed files with 88 additions and 16 deletions

View File

@ -93,7 +93,8 @@ namespace arti::lang::ast {
WhileStmtNode,
DoWhileStmtNode,
InfLoopStmtNode,
ExpressionStmtNode
ExpressionStmtNode,
CodeBlockStmtNode
>;
using ElseBranchNode = Variant<

View File

@ -1002,6 +1002,7 @@ namespace arti::lang::ast {
[&g](const DoWhileStmtNode &n) { return emit(n, g); },
[&g](const InfLoopStmtNode &n) { return emit(n, g); },
[&g](const ExpressionStmtNode &n) { return emit(n, g); },
[&g](const CodeBlockStmtNode &n) { return emit(n, g); },
};
return std::visit(visitor, node);
}

View File

@ -1419,6 +1419,9 @@ namespace arti::lang::ast {
[padding](const ExpressionStmtNode &node) -> std::string {
return toString(node, padding);
},
[padding](const CodeBlockStmtNode &node) -> std::string {
return toString(node, padding);
}
};
return std::visit(visitor, node);

View File

@ -468,6 +468,13 @@ namespace arti::lang {
bool keepParsing = true;
if (auto close = match(TokenV::opRSquirly); ! close) {
return Unexpected{ std::move(close).error() };
}
else if (close.value()) {
keepParsing = false;
}
while (keepParsing) {
if (auto member = parseStructMember(); ! member) {
return Unexpected{ std::move(member).error() };
@ -479,13 +486,22 @@ namespace arti::lang {
if (auto comma = matchAndConsume(TokenV::opComma); ! comma) {
return Unexpected<>{ std::move(comma).error() };
}
if (auto peekToken = tokenizer.peek(); ! peekToken) {
return Unexpected{ std::move(peekToken).error() };
}
else {
if (peekToken->value == TokenV::opRSquirly) {
keepParsing = false;
else if (! comma.value()) {
if (auto peekToken = tokenizer.peek(); ! peekToken) {
return Unexpected{ std::move(peekToken).error() };
}
else {
if (peekToken->value != TokenV::opRSquirly) {
return langException<ExceptCode::ecUnexpectedToken>(
peekToken->line,
peekToken->column,
toString(*peekToken),
"',' or '}'"
);
}
else {
keepParsing = false;
}
}
}
}
@ -533,6 +549,13 @@ namespace arti::lang {
bool keepParsing = true;
if (auto close = match(TokenV::opRSquirly); ! close) {
return Unexpected{ std::move(close).error() };
}
else if (close.value()) {
keepParsing = false;
}
while (keepParsing) {
if (auto member = parseEnumMember(); ! member) {
return Unexpected{ std::move(member).error() };
@ -544,13 +567,22 @@ namespace arti::lang {
if (auto comma = matchAndConsume(TokenV::opComma); ! comma) {
return Unexpected<>{ std::move(comma).error() };
}
if (auto peekToken = tokenizer.peek(); ! peekToken) {
return Unexpected{ std::move(peekToken).error() };
}
else {
if (peekToken->value == TokenV::opRSquirly) {
keepParsing = false;
else if (! comma.value()) {
if (auto peekToken = tokenizer.peek(); ! peekToken) {
return Unexpected{ std::move(peekToken).error() };
}
else {
if (peekToken->value != TokenV::opRSquirly) {
return langException<ExceptCode::ecUnexpectedToken>(
peekToken->line,
peekToken->column,
toString(*peekToken),
"',' or '}'"
);
}
else {
keepParsing = false;
}
}
}
}

View File

@ -36,6 +36,13 @@ namespace arti::lang {
return Unexpected<>{ std::move(lsquirly).error() };
}
if (auto close = match(TokenV::opRSquirly); ! close) {
return Unexpected<>{ std::move(close).error() };
}
else if (close.value()) {
keepParsing = false;
}
while (keepParsing) {
if (auto ok = parseStatement(); ! ok) {
return Unexpected<>{ std::move(ok).error() };
@ -50,6 +57,13 @@ namespace arti::lang {
node->statements.push_back(std::move(stmt).value());
}
}
if (auto close = match(TokenV::opRSquirly); ! close) {
return Unexpected<>{ std::move(close).error() };
}
else if (close.value()) {
keepParsing = false;
}
}
if (auto rsquirly = consume(TokenV::opRSquirly, "'}'"); ! rsquirly) {
@ -300,6 +314,23 @@ namespace arti::lang {
return ast::StatementNode{ std::move(stmt).value() };
}
}
else if (peekToken->value == TokenV::opLSquirly) {
if (label.has_value()) {
return langException<ExceptCode::ecUnexpectedToken>(
peekToken->line,
peekToken->column,
toString(*peekToken),
"loop keyword, i.e. any of ( for, while, do, loop )"
);
}
if (auto stmt = parseCodeBlock(); ! stmt) {
return Unexpected<>{ std::move(stmt).error() };
}
else {
return ast::StatementNode{ std::move(stmt).value() };
}
}
else {
if (label.has_value()) {
return langException<ExceptCode::ecUnexpectedToken>(
@ -981,7 +1012,7 @@ namespace arti::lang {
return Unexpected<>{ std::move(lParen).error() };
}
if (auto skipPre = match(TokenV::opSemicolon); ! skipPre) {
if (auto skipPre = matchAndConsume(TokenV::opSemicolon); ! skipPre) {
return Unexpected<>{ std::move(skipPre).error() };
}
else if (not skipPre.value()) {
@ -1234,6 +1265,10 @@ namespace arti::lang {
return Unexpected<>{ std::move(rParen).error() };
}
if (auto rParen = consume(TokenV::opSemicolon, "';'"); ! rParen) {
return Unexpected<>{ std::move(rParen).error() };
}
return node;
}