1
Reference Grammar
erick-alcachofa edited this page 2025-12-30 03:08:14 +00:00

Grammar Reference

Formal grammar aligned with the current parser implementation.

/*
================================================================================
|                                                                              |
|                      The Artichoke Programming Language                      |
|                           Official EBNF Grammar                              |
|                                                                              |
================================================================================
*/

/* --- Program Structure --- */
/* A program is a sequence of top-level declarations and statements. */

<program> =
  <declaration>*
  <eof>

<declaration> =
    "export" <exportable_declaration>
  | <non_exportable_declaration>

<exportable_declaration> =
    <module_statement>
  | <struct_declaration>
  | <enum_declaration>
  | <function_declaration>

<non_exportable_declaration> =
    <import_statement>
  | <alias_statement>
  | <module_statement>
  | <struct_declaration>
  | <enum_declaration>
  | <function_declaration>

<module_statement> =
  "module" <namespaced_identifier> "{"
    ( <module_statement>
    | <alias_statement>
    | <struct_declaration>
    | <enum_declaration>
    | <function_declaration> )*
  "}"

<import_statement> =
  "import" <import_target> ";"

<import_target> =
    <namespaced_identifier> ( "::" "*" )?

<alias_statement> =
  "using" <identifier> "=" <type> ";"


/* --- Declarations --- */
/* Rules for defining functions, structs, enums, and their components. */

<function_declaration> =
  "fn" <identifier> <generic_params>? "(" <fn_params_list>? ")" ( "->" <type> )? <code_block>

<fn_params_list> =
    "this" <type> ("," <fn_param> ( "," <fn_param> )* )?
  | <fn_param> ( "," <fn_param> )*

<fn_param> =
  <identifier> ":" <type>

<struct_declaration> =
  "struct" <identifier> <generic_params>? "{" <struct_members> "}"

<struct_members> =
  <struct_member> ( "," <struct_member> )*

<struct_member> =
  <identifier> ":" <type>

<enum_declaration> =
  "enum" <identifier> <generic_params>? "{" <enum_members> "}"

<enum_members> =
  <enum_member> ( "," <enum_member> )*

<enum_member> =
  <identifier> ( "(" <type> ")" )?

<generic_params> =
  "<" <generic_params_list> ">"

<generic_params_list> =
  <generic_param> ( "," <generic_param> )*

<generic_param> =
  "typename" <identifier>


/* --- Statements & Control Flow --- */
/* Rules for code blocks, variable declarations, and control structures. */

<code_block> =
  "{" <statement>* "}"

<statement> =
    <variable_declaration> ";"
  | <if_statement>
  | <defer_statement> ";"
  | <errdefer_statement> ";"
  | <return_statement> ";"
  | <break_statement> ";"
  | <continue_statement> ";"
  | <match_statement>
  | <switch_statement>
  | <loop_statement>
  | <expression> ";"

<variable_declaration> =
    <variable_declarator> <identifier> <variable_declaration_tail>

<variable_declaration_tail> =
    ":" <type> ( "=" <expression> )?
  | "=" <expression>

<variable_declarator> =
    "let"
  | "def"

<if_statement> =
  "if" "(" <expression> ")" <variable_unwrapper>? <code_block>
  <else_statement>?

<else_statement> =
    "else" <else_statement_tail>

<else_statement_tail> =
    <if_statement>
  | <variable_unwrapper>? <code_block>

<variable_unwrapper> =
  "|" <identifier> "|"

<loop_statement> =
  (<identifier> ":=")? (
      <c_for_statement>
    | <range_for_statement>
    | <while_statement>
    | <do_while_statement>
    | <inf_loop_statement>
  )

<c_for_statement> =
  "for" "(" ( <variable_declaration> | <expression> )? ";" <expression> ";" <expression>? ")"
  <code_block>

<range_for_statement> =
  "for" "(" <variable_declarator> <identifier> ":=" <expression> ")"
  <code_block>

<while_statement> =
  "while" "(" <expression> ")" <variable_unwrapper>? <code_block>
  <else_statement>?

<do_while_statement> =
  "do" <code_block> "while" "(" <expression> ")"

<inf_loop_statement> =
  "loop" <code_block>

<match_statement> =
  "match" "(" <expression> ")" "{" <match_case>* <default_case>? "}"

<switch_statement> =
  "switch" "(" <expression> ")" "{" <switch_case>* <default_case>? "}"

<match_case> =
  <type_name> ( "|" <identifier> "|" )? "->" <code_block>

<switch_case> =
  <expression> "->" <code_block>

<default_case> =
  "_" "->" <code_block>

<break_statement> =
  "break" <identifier>?

<continue_statement> =
  "continue" <identifier>?

<defer_statement> =
  "defer" ( <expression> | <code_block> )

<errdefer_statement> =
  "errdefer" ( <expression> | <code_block> )

<return_statement> =
  "return" <expression>?


/* --- Expressions & Operator Precedence --- */
/* The full expression hierarchy, from lowest to highest precedence. */

<expression> =
  <bool_or_expression> ( ( <assign_op> | <compound_assign_op> ) <expression> )?

<bool_or_expression> =
  <bool_and_expression> ( ( "||" | "or" ) <bool_and_expression> )*

<bool_and_expression> =
  <compare_expression> ( ( "&&" | "and" ) <compare_expression> )*

<compare_expression> =
  <bitwise_expression> ( <compare_op> <bitwise_expression> )?

<bitwise_expression> =
  <bitwise_shift_expression> ( <bitwise_op> <bitwise_shift_expression> )*

<bitwise_shift_expression> =
  <addition_expression> ( <bitshift_op> <addition_expression> )*

<addition_expression> =
  <multiply_expression> ( <addition_op> <multiply_expression> )*

<multiply_expression> =
  <prefix_expression> ( <multiply_op> <prefix_expression> )*

<prefix_expression> =
  <prefix_op>* <postfix_expression>

<postfix_expression> =
  <primary_expression> ( <suffix_op> | <fn_call_arguments> )*


/* --- Primary Expressions & Literals --- */
/* The highest-precedence expressions, including literals and grouped expressions. */

<primary_expression> =
    <grouped_expression>
  | <literal>
  | <type_initialized_literal>
  | <access_expression> ( "{" <struct_literal_body> "}" )?

<access_expression> =
  <identifier> ( "::" "<" <types_list> ">" )?

<type_initiated_literal> =
  <type> "{" <struct_literal_body> "}"

<literal> =
    <char_literal>
  | <null_literal>
  | <string_literal>
  | <number_literal>
  | <boolean_literal>

<grouped_expression> =
  "(" <expression> ")"

<fn_call_arguments> =
  "(" <expression_list> ")"

<expression_list> =
  (<expression> ",")* <expression>?

<struct_literal_body> =
  ( <named_field_list> | <positional_field_list> )? ","?

<named_field_list> =
  <named_field_init> ( "," <named_field_init> )*

<named_field_init> =
  "." <identifier> "=" <expression>

<positional_field_list> =
  <expression> ( "," <expression> )*

<null_literal> =
  "null"

<boolean_literal> =
    "true"
  | "false"

<number_literal> = /* Assumed to be defined by the tokenizer */
<string_literal> = /* Assumed to be defined by the tokenizer */
<char_literal> = /* Assumed to be defined by the tokenizer */


/* --- Operators --- */
/* Definitions for all operator token sets. */

<assign_op> = "="
<compound_assign_op> = "+=" | "-=" | "*=" | "/=" | "%=" | "&=" | "|=" | "<<=" | ">>=" | "||=" | "&&="
<compare_op> = "==" | "!=" | ">" | "<" | ">=" | "<="
<bitwise_op> = "&" | "^" | "|"
<bitshift_op> = "<<" | ">>"
<addition_op> = "+" | "-"
<multiply_op> = "*" | "/" | "%"
<prefix_op> = "!" | "-" | "~" | "&" | "*"

<suffix_op> =
    "[" <array_access_tail>
  | "." <identifier>
  | "::" <identifier> ( "::" "<" <types_list> ">" )?
  | "->" <identifier>
  | ".@" <identifier>?
  | ".[" <expression> "]"
  | ".#"
  | ".*"

<array_access_tail> =
    <expression>? <slice_or_index_tail>
  | ":" <expression>? "]"

<slice_or_index_tail> =
    "]"
  | ":" <expression>? "]"

/* --- Type System --- */
/* Rules for defining types, type names, and type qualifiers. */

<type> =
  <type_qualifier_chain>? <type_name>

<type_qualifier_chain> =
    ( "*" | "[]" ) <type_qualifier_chain>?
  | "$" <type_qualifier_chain_after_mutable>?
  | "?" <type_qualifier_chain_after_optional>?

<type_qualifier_chain_after_optional> =
    ( "*" | "[]" ) <type_qualifier_chain>?
  | "$" <type_qualifier_chain_after_mutable>?

<type_qualifier_chain_after_mutable> =
    ( "*" | "[]" ) <type_qualifier_chain>?
  | "?" <type_qualifier_chain_after_optional>?

<type_name> =
  <access_expression> ( "::" <identifier> ( "::" "<" <types_list> ">" )? )*

<namespaced_identifier> =
  <identifier> ( "::" <identifier> )*

<types_list> =
  <type> ( "," <types_list> )*


/* --- Lexical Tokens & Base Definitions --- */
/* The lowest-level building blocks of the language. */

<identifier> =
  <nondigit> <identifier_tail>

<identifier_tail> =
    <empty>
  | <nondigit> <identifier_tail>
  | <digit> <identifier_tail>

<nondigit> = "_" | [a-z] | [A-Z]
<digit> = <zero> | <nonzero_digit>
<zero> = "0"
<nonzero_digit> = [1-9]

<empty> = E /* Represents an empty terminal string */
<eof> = /* End Of File */