Signed-off-by: erick-alcachofa <erick@artichoke.dev>
Complete the transition from a declarations-only parser to a functional
imperative parser. This commit introduces the implementation for all
major statement types, loop constructs, and core control flow logic.
- **Match Case Update**: Updated `grammar.ebnf` to use pipe delimiters
`|id|` for unwrapped variables in match cases, replacing the previous
parenthetical syntax.
- **Labels**: Implemented loop labeling using the `ident := loop`
syntax. Labels are validated to ensure they only prefix valid loop
constructs.
- **Labels and Ranges**: Standardized the use of the `:=` operator for
both loop labels (`label := loop`) and range-for declarations (`let i
:= range`).
- **Conditional Branches**:
- Fully implemented `if` and `else` statements.
- Added support for optional variable unwrapping (e.g., `if (expr)
|val|`).
- Supported `else if` chaining by recursively parsing if-statements
within else-branches.
- **Loops**:
- **C-Style For**: Implemented `for (init; cond; post)` with
optional initializers and post-loop expressions.
- **Range For**: Implemented `for (let i := range)` with mutability
controls.
- **While & Do-While**: Implemented standard condition-based loops.
- **Infinite Loop**: Added the explicit `loop` keyword for infinite
iteration.
- **Loop Dispatch**: Added a lookahead mechanism in
`parseForLoopStatement` to differentiate between C-style and
Range-style loops based on token positioning.
- **Variables**: Implemented `let`/`def` parsing within local scopes,
including type annotations and initializers.
- **Defer Logic**: Implemented `defer` and `errdefer` for scope-guarded
execution.
- **Jumps**: Implemented `break`, `continue` (with optional label
targets), and `return` (with optional expressions).
- **Match & Switch**: Fully implemented branch parsing, with possible
default cases via the `_` (underscore) keyword.
- **Expression Integration**: Stubbed `parseExpression` in a new
`Expressions.cpp` to serve as the integration point for value parsing.
- **OverloadSet**: Integrated `OverloadSet` utility in `Statements.cpp`
to cleanly handle AST node variant visitation for label injection.
- **Error Handling**: Standardized error reporting across all new paths
using `langException`, providing specific "expected" messages for
delimiters and keywords.
359 lines
8.4 KiB
EBNF
359 lines
8.4 KiB
EBNF
/*
|
|
================================================================================
|
|
| |
|
|
| The Artichoke Programming Language |
|
|
| Official EBNF Grammar |
|
|
| |
|
|
================================================================================
|
|
*/
|
|
|
|
/* --- Program Structure --- */
|
|
/* A program is a sequence of top-level declarations and statements. */
|
|
|
|
<program> =
|
|
<declaration>*
|
|
<eof>
|
|
|
|
<declaration> =
|
|
"export" <exportable_declaration>
|
|
| <non_exportable_declaration>
|
|
|
|
<exportable_declaration> =
|
|
<module_statement>
|
|
| <struct_declaration>
|
|
| <enum_declaration>
|
|
| <function_declaration>
|
|
|
|
non_exportable_declaration =
|
|
<import_statement>
|
|
| <alias_statement>
|
|
| <module_statement>
|
|
| <struct_declaration>
|
|
| <enum_declaration>
|
|
| <function_declaration>
|
|
|
|
<module_statement> =
|
|
"module" <namespaced_identifier> "{"
|
|
( <module_statement>
|
|
| <alias_statement>
|
|
| <struct_declaration>
|
|
| <enum_declaration>
|
|
| <function_declaration> )*
|
|
"}"
|
|
|
|
<import_statement> =
|
|
"import" <import_target> ";"
|
|
|
|
<import_target> =
|
|
<namespaced_identifier> ( "::" "*" )?
|
|
|
|
<alias_statement> =
|
|
"using" <identifier> "=" <type> ";"
|
|
|
|
|
|
/* --- Declarations --- */
|
|
/* Rules for defining functions, structs, enums, and their components. */
|
|
|
|
<function_declaration> =
|
|
"fn" <identifier> <generic_params>? "(" <fn_params_list>? ")" ( "->" <type> )? <code_block>
|
|
|
|
<fn_params_list> =
|
|
"this" <type> ("," <fn_param> ( "," <fn_param> )* )?
|
|
| <fn_param> ( "," <fn_param> )*
|
|
|
|
<fn_param> =
|
|
<identifier> ":" <type>
|
|
|
|
<struct_declaration> =
|
|
"struct" <identifier> <generic_params>? "{" <struct_members> "}"
|
|
|
|
<struct_members> =
|
|
<struct_member> ( "," <struct_member> )*
|
|
|
|
<struct_member> =
|
|
<identifier> ":" <type>
|
|
|
|
<enum_declaration> =
|
|
"enum" <identifier> <generic_params>? "{" <enum_members> "}"
|
|
|
|
<enum_members> =
|
|
<enum_member> ( "," <enum_member> )*
|
|
|
|
<enum_member> =
|
|
<identifier> ( "(" <type> ")" )?
|
|
|
|
<generic_params> =
|
|
"<" <generic_params_list> ">"
|
|
|
|
<generic_params_list> =
|
|
<generic_param> ( "," <generic_param> )*
|
|
|
|
<generic_param> =
|
|
"typename" <identifier>
|
|
|
|
|
|
/* --- Statements & Control Flow --- */
|
|
/* Rules for code blocks, variable declarations, and control structures. */
|
|
|
|
<code_block> =
|
|
"{" <statement>* "}"
|
|
|
|
<statement> =
|
|
<variable_declaration> ";"
|
|
| <if_statement>
|
|
| <defer_statement> ";"
|
|
| <errdefer_statement> ";"
|
|
| <return_statement> ";"
|
|
| <break_statement> ";"
|
|
| <continue_statement> ";"
|
|
| <match_statement>
|
|
| <switch_statement>
|
|
| <loop_statement>
|
|
| <expression> ";"
|
|
|
|
<variable_declaration> =
|
|
<variable_declarator> <identifier> <variable_declaration_tail>
|
|
|
|
<variable_declaration_tail> =
|
|
":" <type> ( "=" <expression> )?
|
|
| "=" <expression>
|
|
|
|
<variable_declarator> =
|
|
"let"
|
|
| "def"
|
|
|
|
<if_statement> =
|
|
"if" "(" <expression> ")" <variable_unwrapper>? <code_block>
|
|
<else_statement>?
|
|
|
|
<else_statement> =
|
|
"else" <else_statement_tail>
|
|
|
|
<else_statement_tail> =
|
|
<if_statement>
|
|
| <variable_unwrapper>? <code_block>
|
|
|
|
<variable_unwrapper> =
|
|
"|" <identifier> "|"
|
|
|
|
<loop_statement> =
|
|
(<identifier> ":=")? (
|
|
<c_for_statement>
|
|
| <range_for_statement>
|
|
| <while_statement>
|
|
| <do_while_statement>
|
|
| <inf_loop_statement>
|
|
)
|
|
|
|
<c_for_statement> =
|
|
"for" "(" ( <variable_declaration> | <expression> )? ";" <expression> ";" <expression>? ")"
|
|
<code_block>
|
|
|
|
<range_for_statement> =
|
|
"for" "(" <variable_declarator> <identifier> ":=" <expression> ")"
|
|
<code_block>
|
|
|
|
<while_statement> =
|
|
"while" "(" <expression> ")" <variable_unwrapper>? <code_block>
|
|
<else_statement>?
|
|
|
|
<do_while_statement> =
|
|
"do" <code_block> "while" "(" <expression> ")"
|
|
|
|
<inf_loop_statement> =
|
|
"loop" <code_block>
|
|
|
|
<match_statement> =
|
|
"match" "(" <expression> ")" "{" <match_case>* <default_case>? "}"
|
|
|
|
<switch_statement> =
|
|
"switch" "(" <expression> ")" "{" <switch_case>* <default_case>? "}"
|
|
|
|
<match_case> =
|
|
<type_name> ( "|" <identifier> "|" )? "->" <code_block>
|
|
|
|
<switch_case> =
|
|
<expression> "->" <code_block>
|
|
|
|
<default_case> =
|
|
"_" "->" <code_block>
|
|
|
|
<break_statement> =
|
|
"break" <identifier>?
|
|
|
|
<continue_statement> =
|
|
"continue" <identifier>?
|
|
|
|
<defer_statement> =
|
|
"defer" ( <expression> | <code_block> )
|
|
|
|
<errdefer_statement> =
|
|
"errdefer" ( <expression> | <code_block> )
|
|
|
|
<return_statement> =
|
|
"return" <expression>?
|
|
|
|
|
|
/* --- Expressions & Operator Precedence --- */
|
|
/* The full expression hierarchy, from lowest to highest precedence. */
|
|
|
|
<expression> =
|
|
<bool_or_expression> ( ( <assign_op> | <compound_assign_op> ) <expression> )?
|
|
|
|
<bool_or_expression> =
|
|
<bool_and_expression> ( ( "||" | "or" ) <bool_and_expression> )*
|
|
|
|
<bool_and_expression> =
|
|
<compare_expression> ( ( "&&" | "and" ) <compare_expression> )*
|
|
|
|
<compare_expression> =
|
|
<bitwise_expression> ( <compare_op> <bitwise_expression> )?
|
|
|
|
<bitwise_expression> =
|
|
<bitwise_shift_expression> ( <bitwise_op> <bitwise_shift_expression> )*
|
|
|
|
<bitwise_shift_expression> =
|
|
<addition_expression> ( <bitshift_op> <addition_expression> )*
|
|
|
|
<addition_expression> =
|
|
<multiply_expression> ( <addition_op> <multiply_expression> )*
|
|
|
|
<multiply_expression> =
|
|
<prefix_expression> ( <multiply_op> <prefix_expression> )*
|
|
|
|
<prefix_expression> =
|
|
<prefix_op>* <postfix_expression>
|
|
|
|
<postfix_expression> =
|
|
<primary_expression> ( <suffix_op> | <fn_call_arguments> )*
|
|
|
|
|
|
/* --- Primary Expressions & Literals --- */
|
|
/* The highest-precedence expressions, including literals and grouped expressions. */
|
|
|
|
<primary_expression> =
|
|
<grouped_expression>
|
|
| <literal>
|
|
| <access_expression> ( "{" <struct_literal_body> "}" )?
|
|
|
|
<access_expression> =
|
|
<identifier> ( "<" <types_list> ">" )?
|
|
|
|
<literal> =
|
|
<char_literal>
|
|
| <null_literal>
|
|
| <string_literal>
|
|
| <number_literal>
|
|
| <boolean_literal>
|
|
|
|
<grouped_expression> =
|
|
"(" <expression> ")"
|
|
|
|
<fn_call_arguments> =
|
|
"(" <expression_list> ")"
|
|
|
|
<expression_list> =
|
|
(<expression> ",")* <expression>?
|
|
|
|
<struct_literal_body> =
|
|
( <named_field_list> | <positional_field_list> )? ","?
|
|
|
|
<named_field_list> =
|
|
<named_field_init> ( "," <named_field_init> )*
|
|
|
|
<named_field_init> =
|
|
<identifier> ":" <expression>
|
|
|
|
<positional_field_list> =
|
|
<expression> ( "," <expression> )*
|
|
|
|
<null_literal> =
|
|
"null"
|
|
|
|
<boolean_literal> =
|
|
"true"
|
|
| "false"
|
|
|
|
<number_literal> = /* Assumed to be defined by the tokenizer */
|
|
<string_literal> = /* Assumed to be defined by the tokenizer */
|
|
<char_literal> = /* Assumed to be defined by the tokenizer */
|
|
|
|
|
|
/* --- Operators --- */
|
|
/* Definitions for all operator token sets. */
|
|
|
|
<assign_op> = "="
|
|
<compound_assign_op> = "+=" | "-=" | "*=" | "/=" | "%=" | "&=" | "|=" | "<<=" | ">>=" | "||=" | "&&="
|
|
<compare_op> = "==" | "!=" | ">" | "<" | ">=" | "<="
|
|
<bitwise_op> = "&" | "^" | "|"
|
|
<bitshift_op> = "<<" | ">>"
|
|
<addition_op> = "+" | "-"
|
|
<multiply_op> = "*" | "/" | "%"
|
|
<prefix_op> = "!" | "-" | "~" | "&" | "*"
|
|
|
|
<suffix_op> =
|
|
"[" <array_access_tail>
|
|
| "." <identifier>
|
|
| "::" <identifier> ( "<" <types_list> ">" )?
|
|
| "->" <identifier>
|
|
| ".@" <identifier>?
|
|
| ".[" <expression> "]"
|
|
| ".#"
|
|
| ".*"
|
|
|
|
<array_access_tail> =
|
|
<expression> <slice_or_index_tail>
|
|
| ":" <expression>? "]"
|
|
|
|
<slice_or_index_tail> =
|
|
"]"
|
|
| ":" <expression>? "]"
|
|
|
|
/* --- Type System --- */
|
|
/* Rules for defining types, type names, and type qualifiers. */
|
|
|
|
<type> =
|
|
<type_qualifier_chain>? <type_name>
|
|
|
|
<type_qualifier_chain> =
|
|
( "*" | "[]" ) <type_qualifier_chain>?
|
|
| "$" <type_qualifier_chain_after_mutable>?
|
|
| "?" <type_qualifier_chain_after_optional>?
|
|
|
|
<type_qualifier_chain_after_optional> =
|
|
( "*" | "[]" ) <type_qualifier_chain>?
|
|
| "$" <type_qualifier_chain_after_mutable>?
|
|
|
|
<type_qualifier_chain_after_mutable> =
|
|
( "*" | "[]" ) <type_qualifier_chain>?
|
|
| "?" <type_qualifier_chain_after_optional>?
|
|
|
|
<type_name> =
|
|
<access_expression> ( "::" <identifier> ( "<" <types_list> ">" )? )*
|
|
|
|
<namespaced_identifier> =
|
|
<identifier> ( "::" identifier )*
|
|
|
|
<types_list> =
|
|
<type> ( "," <types_list> )*
|
|
|
|
|
|
/* --- Lexical Tokens & Base Definitions --- */
|
|
/* The lowest-level building blocks of the language. */
|
|
|
|
<identifier> =
|
|
<nondigit> <identifier_tail>
|
|
|
|
<identifier_tail> =
|
|
<empty>
|
|
| <nondigit> <identifier_tail>
|
|
| <digit> <identifier_tail>
|
|
|
|
<nondigit> = "_" | [a-z] | [A-Z]
|
|
<digit> = <zero> | <nonzero_digit>
|
|
<zero> = "0"
|
|
<nonzero_digit> = [1-9]
|
|
|
|
<empty> = E /* Represents an empty terminal string */
|
|
<eof> = /* End Of File */
|