Compare commits

...

4 Commits

Author SHA1 Message Date
146a48aef9
fix(tokenizer): harden move semantics and operator parsing
Signed-off-by: erick-alcachofa <erick@artichoke.dev>

This commit addresses subtle state-management issues uncovered while
exercising tokenizer move semantics and operator parsing. Moved-from
instances were retaining stale iterators and metadata that could
manifest as incorrect token offsets when the objects were reused.
Additionally, token location bookkeeping during comment skipping and
operator backtracking drifted from the iterator, producing inaccurate
column reports.

- Refines the move constructor by exchanging `line`, `column`, `iter`,
  `tokensGenerator`, `tokensBuffer`, and `source`, ensuring moved-from
  tokenizers reset to neutral defaults without dangling references.
- Aligns the move assignment operator with the constructor by using
  `std::exchange` across all transferred members, preventing stale state
  when a tokenizer is reassigned.
- Adjusts column advancement for single-line comments so the cursor
  increments by one when skipping the comments sequence, matching the
  iterator progression.
- Promotes the operator trie to a function-local static cache, avoiding
  repeated construction each time an operator token is read.
- Restores accurate token metadata by assigning the original line and
  column (`cLine`, `cColumn`) when emitting operator tokens and rolling
  back the column alongside the iterator during longest-valid
  backtracking.
2026-01-01 07:55:09 +00:00
d92f39538b
feat(semantic): implement tree-based string representation for SymbolTable
Signed-off-by: erick-alcachofa <erick@artichoke.dev>

- Add comprehensive toString() implementation for all symbol types.
- Support recursive tree rendering for debugging symbol hierarchies.
- Implement OverloadSet-based visitation for Symbol variants.
2025-12-30 23:19:05 -06:00
f235a72671
feat(semantic): introduce SymbolTable and core semantic data structures
Signed-off-by: erick-alcachofa <erick@artichoke.dev>

- Define core symbol types: Function, Struct, Enum, and Variable.
- Implement recursive TypeSymbol structure with qualifier support.
- Add TemplatedSymbol and TemplateParam for upcoming generic support.
- Implement scoped SymbolTable with parent/inner hierarchy.
2025-12-30 23:16:11 -06:00
5ce803bb07
refactor: move common types and aliases to central Util namespace
Signed-off-by: erick-alcachofa <erick@artichoke.dev>

- Create lib/include/artichoke/Util/Common.hpp for shared types.
- Migrate SourceLocation, Mutability, and TypeQualifier from AST.
- Update Parser and AST files to use unified aliases (Ptr, Vector,
  etc.).
- Clean up includes across the Parser module.
2025-12-30 23:13:21 -06:00
12 changed files with 1131 additions and 99 deletions

View File

@ -22,34 +22,10 @@
#pragma once
#include <cstddef>
#include <memory>
#include <vector>
#include <string>
#include <variant>
#include <optional>
#include <artichoke/Util/Common.hpp>
namespace arti::lang::ast {
struct SourceLocation {
std::size_t line;
std::size_t column;
};
enum class Mutability {
Uninitialized,
Mutable,
Constant,
};
enum class TypeQualifier {
Uninitialized,
Pointer,
Slice,
Mutable,
Optional,
};
enum class PrefixOperator {
Uninitialized,
Not,
@ -122,27 +98,10 @@ namespace arti::lang::ast {
BoolOr,
};
/* Alising of types for consistency */
using Boolean = bool;
using String = std::string;
template <typename T>
using Ptr = std::unique_ptr<T>;
template <typename T>
using Optional = std::optional<T>;
template <typename... T>
using Variant = std::variant<T...>;
template <typename T>
using Vector = std::vector<T>;
template <typename Node>
concept ASTNodePtr = requires {
typename Node::element_type;
requires std::is_same_v<std::unique_ptr<typename Node::element_type>, Node>;
requires std::is_same_v<Ptr<typename Node::element_type>, Node>;
};
} // namespace arti::lang::ast

View File

@ -49,7 +49,7 @@ namespace arti::lang {
Expected<bool>
match(TokenV type, std::size_t offset = 0);
Expected<ast::Optional<ast::TopLevelDeclNode>>
Expected<Optional<ast::TopLevelDeclNode>>
parseTopLevelDeclaration();
Expected<ast::ImportDeclNode>
@ -67,19 +67,19 @@ namespace arti::lang {
Expected<ast::EnumDeclNode>
parseEnumDeclaration();
Expected<ast::Vector<ast::GenericParamNode>>
Expected<Vector<ast::GenericParamNode>>
parseGenericParamsList();
Expected<ast::GenericParamNode>
parseGenericParam();
Expected<ast::Vector<ast::StructMemberNode>>
Expected<Vector<ast::StructMemberNode>>
parseStructMembersList();
Expected<ast::StructMemberNode>
parseStructMember();
Expected<ast::Vector<ast::EnumMemberNode>>
Expected<Vector<ast::EnumMemberNode>>
parseEnumMembersList();
Expected<ast::EnumMemberNode>
@ -94,13 +94,13 @@ namespace arti::lang {
Expected<ast::TypeNode>
parseType();
Expected<ast::Vector<ast::TypeQualifier>>
Expected<Vector<TypeQualifier>>
parseTypeQualifiers();
Expected<ast::Vector<ast::TypeNode>>
Expected<Vector<ast::TypeNode>>
parseGenericArgumentsList();
Expected<ast::Vector<ast::FunctionParamNode>>
Expected<Vector<ast::FunctionParamNode>>
parseFunctionParamsList();
Expected<ast::FunctionParamNode>
@ -112,7 +112,7 @@ namespace arti::lang {
Expected<ast::CodeBlockStmtNode>
parseCodeBlock();
Expected<ast::Optional<ast::StatementNode>>
Expected<Optional<ast::StatementNode>>
parseStatement();
Expected<ast::VariableStmtNode>
@ -169,7 +169,7 @@ namespace arti::lang {
Expected<ast::ExpressionNode>
parseExpression(std::uint16_t p = 0);
Expected<ast::Optional<ast::ExpressionNode>>
Expected<Optional<ast::ExpressionNode>>
parsePrimaryExpression();
Expected<ast::ExpressionNode>

View File

@ -20,6 +20,8 @@
// //
//============================================================================//
#pragma once
#include <artichoke/Parser/Parser.hpp>
namespace arti::lang::pratt {

View File

@ -0,0 +1,190 @@
//============================================================================//
// //
// artichoke programming language //
// //
// Copyright (C) 2025 Erick Saul Guzman Ramos, whoami.artichoke.dev //
// //
// //
// This program is free software: you can redistribute it and/or modify //
// it under the terms of the GNU Affero General Public License as published //
// by the Free Software Foundation, either version 3 of the License, or //
// (at your option) any later version. //
// //
// This program is distributed in the hope that it will be useful, //
// but WITHOUT ANY WARRANTY; without even the implied warranty of //
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
// GNU Affero General Public License for more details. //
// //
// You should have received a copy of the GNU Affero General Public License //
// along with this program. If not, see <https://www.gnu.org/licenses/>. //
// //
//============================================================================//
#pragma once
#include <variant>
#include <artichoke/Util/Common.hpp>
namespace arti::lang::sym {
struct Function;
struct FunctionType;
struct Enum;
struct EnumMember;
struct Struct;
struct StructMember;
struct Variable;
struct Primitive;
struct TypeSymbol;
struct TemplateParam;
struct TemplatedSymbol;
using Symbol = std::variant<
SharedPtr<Function>,
SharedPtr<Variable>,
SharedPtr<TypeSymbol>,
SharedPtr<FunctionType>,
SharedPtr<Primitive>,
SharedPtr<Struct>,
SharedPtr<Enum>,
SharedPtr<TemplatedSymbol>,
SharedPtr<TemplateParam>
>;
using WeakSymbol = std::variant<
WeakPtr<Function>,
WeakPtr<Variable>,
WeakPtr<TypeSymbol>,
WeakPtr<FunctionType>,
WeakPtr<Primitive>,
WeakPtr<Struct>,
WeakPtr<Enum>,
WeakPtr<TemplatedSymbol>
>;
struct SymbolTable {
String name;
SourceLocation location;
WeakPtr<SymbolTable> parent;
HashMap<String, SharedPtr<SymbolTable>> inner;
HashMap<String, Symbol> symbols;
};
struct Function {
String name;
SourceLocation location;
WeakPtr<SymbolTable> parent;
SharedPtr<SymbolTable> body;
WeakPtr<FunctionType> type;
};
struct FunctionType {
String name;
SourceLocation location;
WeakPtr<SymbolTable> parent;
WeakPtr<Function> symbol;
Vector<WeakPtr<TypeSymbol>> arguments;
Optional<WeakPtr<TypeSymbol>> returnType;
};
struct EnumMember {
String name;
SourceLocation location;
std::uint64_t index;
Optional<WeakPtr<TypeSymbol>> type;
};
struct Enum {
String name;
SourceLocation location;
WeakPtr<SymbolTable> parent;
std::uint64_t size;
std::uint64_t alignment;
Vector<EnumMember> membersList;
HashMap<String, std::size_t> membersLookup;
HashMap<String, WeakPtr<Function>> methods;
};
struct StructMember {
String name;
SourceLocation location;
std::uint64_t index;
std::uint64_t offset;
WeakPtr<TypeSymbol> type;
};
struct Struct {
String name;
SourceLocation location;
WeakPtr<SymbolTable> parent;
std::uint64_t size;
std::uint64_t alignment;
Vector<StructMember> membersList;
HashMap<String, std::size_t> membersLookup;
HashMap<String, WeakPtr<Function>> methods;
};
struct TemplateParam {
String name;
SourceLocation location;
WeakPtr<SymbolTable> parent;
};
struct TemplatedSymbol {
String name;
SourceLocation location;
WeakPtr<SymbolTable> parent;
/* It's parent is ^ */
WeakPtr<SymbolTable> scope;
/* Lives in scope */
WeakSymbol symbol;
Vector<TemplateParam> params;
};
struct Variable {
String name;
SourceLocation location;
WeakPtr<TypeSymbol> type;
};
struct Primitive {
String name;
std::uint64_t size;
std::uint64_t alignment;
};
struct TypeSymbol {
using Type = std::variant<
SharedPtr<TypeSymbol>,
WeakPtr<FunctionType>,
WeakPtr<Primitive>,
WeakPtr<Struct>,
WeakPtr<Enum>,
WeakPtr<TemplatedSymbol>,
WeakPtr<TemplateParam>
>;
Type type;
Optional<TypeQualifier> qualifier;
};
std::string toString(const SharedPtr<SymbolTable> &, const std::string &);
} // namespace arti::lang

View File

@ -0,0 +1,79 @@
//============================================================================//
// //
// artichoke programming language //
// //
// Copyright (C) 2025 Erick Saul Guzman Ramos, whoami.artichoke.dev //
// //
// //
// This program is free software: you can redistribute it and/or modify //
// it under the terms of the GNU Affero General Public License as published //
// by the Free Software Foundation, either version 3 of the License, or //
// (at your option) any later version. //
// //
// This program is distributed in the hope that it will be useful, //
// but WITHOUT ANY WARRANTY; without even the implied warranty of //
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
// GNU Affero General Public License for more details. //
// //
// You should have received a copy of the GNU Affero General Public License //
// along with this program. If not, see <https://www.gnu.org/licenses/>. //
// //
//============================================================================//
#pragma once
#include <vector>
#include <string>
#include <memory>
#include <variant>
#include <optional>
#include <unordered_map>
namespace arti::lang {
/* Alising of types for consistency */
struct SourceLocation {
std::size_t line;
std::size_t column;
};
using Boolean = bool;
using String = std::string;
template <typename T>
using Ptr = std::shared_ptr<T>;
template <typename T>
using SharedPtr = std::shared_ptr<T>;
template <typename T>
using WeakPtr = std::weak_ptr<T>;
template <typename T>
using Optional = std::optional<T>;
template <typename... T>
using Variant = std::variant<T...>;
template <typename T>
using Vector = std::vector<T>;
template <typename K, typename V>
using HashMap = std::unordered_map<K, V>;
enum class Mutability {
Uninitialized,
Mutable,
Constant,
};
enum class TypeQualifier {
Uninitialized,
Pointer,
Slice,
Mutable,
Optional,
};
}

View File

@ -24,7 +24,7 @@
namespace arti::lang {
Expected<ast::Optional<ast::TopLevelDeclNode>>
Expected<Optional<ast::TopLevelDeclNode>>
Parser::parseTopLevelDeclaration() {
bool exportable = false;
@ -198,7 +198,7 @@ namespace arti::lang {
Expected<ast::ModuleDeclNode> Parser::parseModuleDeclaration() {
auto node = ast::MakeNode<ast::ModuleDeclNode>();
auto decl = ast::Optional<ast::TopLevelDeclNode>{};
auto decl = Optional<ast::TopLevelDeclNode>{};
bool keepParsing = true;
if (auto kw = consume(TokenV::kwModule, "'module' keyword"); ! kw) {
@ -376,9 +376,9 @@ namespace arti::lang {
return node;
}
Expected<ast::Vector<ast::GenericParamNode>>
Expected<Vector<ast::GenericParamNode>>
Parser::parseGenericParamsList() {
auto paramsList = ast::Vector<ast::GenericParamNode>{};
auto paramsList = Vector<ast::GenericParamNode>{};
auto peekToken = tokenizer.peek();
@ -452,9 +452,9 @@ namespace arti::lang {
return node;
}
Expected<ast::Vector<ast::StructMemberNode>>
Expected<Vector<ast::StructMemberNode>>
Parser::parseStructMembersList() {
auto membersList = ast::Vector<ast::StructMemberNode>{};
auto membersList = Vector<ast::StructMemberNode>{};
if (auto comma = tokenizer.peek();
comma and comma->value == TokenV::opComma) {
@ -541,8 +541,8 @@ namespace arti::lang {
return node;
}
Expected<ast::Vector<ast::EnumMemberNode>> Parser::parseEnumMembersList() {
auto membersList = ast::Vector<ast::EnumMemberNode>{};
Expected<Vector<ast::EnumMemberNode>> Parser::parseEnumMembersList() {
auto membersList = Vector<ast::EnumMemberNode>{};
if (auto comma = tokenizer.peek();
comma and comma->value == TokenV::opComma) {
@ -709,9 +709,9 @@ namespace arti::lang {
return node;
}
Expected<ast::Vector<ast::FunctionParamNode>>
Expected<Vector<ast::FunctionParamNode>>
Parser::parseFunctionParamsList() {
auto params = ast::Vector<ast::FunctionParamNode>{};
auto params = Vector<ast::FunctionParamNode>{};
if (auto comma = tokenizer.peek();
comma and comma->value == TokenV::opComma) {

View File

@ -33,7 +33,7 @@ namespace arti::lang {
}
bool keepParsing = true;
ast::Optional<ast::ExpressionNode> lhs = std::nullopt;
Optional<ast::ExpressionNode> lhs = std::nullopt;
if (peekToken->value == TokenV::opLParen) {
std::ignore = tokenizer.consume();
@ -157,7 +157,7 @@ namespace arti::lang {
}
}
Expected<ast::Optional<ast::ExpressionNode>>
Expected<Optional<ast::ExpressionNode>>
Parser::parsePrimaryExpression() {
auto peekToken = tokenizer.peek();
@ -509,7 +509,7 @@ namespace arti::lang {
initializerNode->location = newNode->location;
while (stillParams) {
auto currLocation = ast::SourceLocation{};
auto currLocation = SourceLocation{};
if (auto dot = consume(TokenV::opDot, "'.'"); ! dot) {
return Unexpected<>{ std::move(dot).error() };
@ -651,7 +651,7 @@ namespace arti::lang {
}
}
auto idxExpr = ast::Optional<ast::ExpressionNode>{};
auto idxExpr = Optional<ast::ExpressionNode>{};
if (! skipSliceStart) {
auto idx = parseExpression();

View File

@ -36,7 +36,7 @@ namespace arti::lang {
Expected<ast::AST> Parser::parse() {
auto unit = ast::MakeNode<ast::AST>();
auto decl = ast::Optional<ast::TopLevelDeclNode>{};
auto decl = Optional<ast::TopLevelDeclNode>{};
bool keepParsing = true;
unit->unitName = this->unitName;

View File

@ -30,7 +30,7 @@ namespace arti::lang {
Expected<ast::CodeBlockStmtNode> Parser::parseCodeBlock() {
auto node = ast::MakeNode<ast::CodeBlockStmtNode>();
auto stmt = ast::Optional<ast::StatementNode>{};
auto stmt = Optional<ast::StatementNode>{};
bool keepParsing = true;
if (auto lsquirly = consume(TokenV::opLSquirly, "'{'"); ! lsquirly) {
@ -74,9 +74,9 @@ namespace arti::lang {
return node;
}
Expected<ast::Optional<ast::StatementNode>>
Expected<Optional<ast::StatementNode>>
Parser::parseStatement() {
ast::Optional<std::string_view> label;
Optional<std::string_view> label;
auto peekToken = tokenizer.peek();
@ -368,10 +368,10 @@ namespace arti::lang {
};
if (peekToken->value == TokenV::kwLet) {
node->mutability = ast::Mutability::Mutable;
node->mutability = Mutability::Mutable;
}
else if (peekToken->value == TokenV::kwDef) {
node->mutability = ast::Mutability::Constant;
node->mutability = Mutability::Constant;
}
else {
return langException<ExceptCode::ecUnexpectedToken>(
@ -921,7 +921,7 @@ namespace arti::lang {
}
else {
curCase->location = std::visit(
[](const auto &exprNode) -> ast::SourceLocation {
[](const auto &exprNode) -> SourceLocation {
return exprNode->location;
},
expr.value()
@ -1107,13 +1107,13 @@ namespace arti::lang {
return Unexpected<>{ std::move(isLet).error() };
}
else if (isLet.value()) {
node->varMutability = ast::Mutability::Mutable;
node->varMutability = Mutability::Mutable;
}
else if (auto isDef = matchAndConsume(TokenV::kwDef); ! isDef) {
return Unexpected<>{ std::move(isDef).error() };
}
else if (isDef.value()) {
node->varMutability = ast::Mutability::Constant;
node->varMutability = Mutability::Constant;
}
else {
auto peekToken = tokenizer.peek();
@ -1309,7 +1309,7 @@ namespace arti::lang {
}
else {
node->location = std::visit(
[](const auto &exprNode) -> ast::SourceLocation {
[](const auto &exprNode) -> SourceLocation {
return exprNode->location;
},
expr.value()

View File

@ -180,8 +180,8 @@ namespace arti::lang {
return node;
}
Expected<ast::Vector<ast::TypeQualifier>> Parser::parseTypeQualifiers() {
auto qualifs = ast::Vector<ast::TypeQualifier>{};
Expected<Vector<TypeQualifier>> Parser::parseTypeQualifiers() {
auto qualifs = Vector<TypeQualifier>{};
auto peekToken = tokenizer.peek();
@ -196,7 +196,7 @@ namespace arti::lang {
using enum TokenV;
case opStar:
qualifs.push_back(ast::TypeQualifier::Pointer);
qualifs.push_back(TypeQualifier::Pointer);
state = None;
break;
case opMut:
@ -208,7 +208,7 @@ namespace arti::lang {
"non mutable type qualifier, i.e. any of ( *, ?, [] )"
);
}
qualifs.push_back(ast::TypeQualifier::Mutable);
qualifs.push_back(TypeQualifier::Mutable);
state = AfterMutable;
break;
case opOpt:
@ -220,7 +220,7 @@ namespace arti::lang {
"non optional type qualifier, i.e. any of ( *, $, [] )"
);
}
qualifs.push_back(ast::TypeQualifier::Optional);
qualifs.push_back(TypeQualifier::Optional);
state = AfterOptional;
break;
case opLBracket:
@ -232,7 +232,7 @@ namespace arti::lang {
return Unexpected<>{ std::move(peekToken).error() };
}
qualifs.push_back(ast::TypeQualifier::Slice);
qualifs.push_back(TypeQualifier::Slice);
state = None;
break;
default:
@ -251,8 +251,8 @@ namespace arti::lang {
return qualifs;
}
Expected<ast::Vector<ast::TypeNode>> Parser::parseGenericArgumentsList() {
auto args = ast::Vector<ast::TypeNode>{};
Expected<Vector<ast::TypeNode>> Parser::parseGenericArgumentsList() {
auto args = Vector<ast::TypeNode>{};
if (auto lt = consume(TokenV::opLt, "'<'"); ! lt) {
return Unexpected<>{ std::move(lt).error() };

View File

@ -0,0 +1,800 @@
//============================================================================//
// //
// artichoke programming language //
// //
// Copyright (C) 2025 Erick Saul Guzman Ramos, whoami.artichoke.dev //
// //
// //
// This program is free software: you can redistribute it and/or modify //
// it under the terms of the GNU Affero General Public License as published //
// by the Free Software Foundation, either version 3 of the License, or //
// (at your option) any later version. //
// //
// This program is distributed in the hope that it will be useful, //
// but WITHOUT ANY WARRANTY; without even the implied warranty of //
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
// GNU Affero General Public License for more details. //
// //
// You should have received a copy of the GNU Affero General Public License //
// along with this program. If not, see <https://www.gnu.org/licenses/>. //
// //
//============================================================================//
#include <artichoke/Semantics/SymbolTable.hpp>
#include <utility>
#include <format>
#include <sstream>
#include <string>
#include <string_view>
#include <vector>
#include <artichoke/Util/OverloadSet.hpp>
namespace arti::lang::sym {
constexpr std::string_view StrTreeNoNode = "";
constexpr std::string_view StrTreeChilds = "├─";
constexpr std::string_view StrTreeLast = "└─";
constexpr std::string_view StrTreeSpace = " ";
inline std::string nextPrefix(const std::string &prefix, bool isLast) {
return prefix +
(isLast ? std::string(StrTreeSpace) : std::string(StrTreeNoNode));
}
std::string toString(const SymbolTable &, const std::string &);
std::string toString(const SharedPtr<SymbolTable> &, const std::string &);
std::string toString(const sym::Symbol &, const std::string &);
std::string toString(const sym::Function &, const std::string &);
std::string toString(const sym::FunctionType &, const std::string &);
std::string toString(const sym::Enum &, const std::string &);
std::string toString(const sym::EnumMember &, const std::string &);
std::string toString(const sym::Struct &, const std::string &);
std::string toString(const sym::StructMember &, const std::string &);
std::string toString(const sym::TemplateParam &, const std::string &);
std::string toString(const sym::TemplatedSymbol &, const std::string &);
std::string toString(const sym::Variable &, const std::string &);
std::string toString(const sym::Primitive &, const std::string &);
std::string toString(const sym::TypeSymbol &, const std::string &);
std::string
toString(const SharedPtr<sym::FunctionType> &, const std::string &);
template <typename T>
void appendItem(
std::stringstream &ss,
const std::string &prefix,
const T &item,
bool isLastChild
) {
ss << "\n"
<< prefix << (isLastChild ? StrTreeLast : StrTreeChilds) << " "
<< toString(item, nextPrefix(prefix, isLastChild));
}
template <typename T>
void appendGroupVec(
std::stringstream &ss,
const std::string &prefix,
std::string_view title,
const std::vector<T> &items,
bool isLastGroup
) {
if (items.empty()) {
return;
}
ss << "\n"
<< prefix << (isLastGroup ? StrTreeLast : StrTreeChilds) << " " << title;
for (std::size_t i = 0; i < items.size(); ++i) {
appendItem(
ss,
nextPrefix(prefix, isLastGroup),
items[i],
i + 1 == items.size()
);
}
}
template <typename T>
void appendGroupOne(
std::stringstream &ss,
const std::string &prefix,
std::string_view title,
const T &item,
bool isLastGroup
) {
ss << "\n"
<< prefix << (isLastGroup ? StrTreeLast : StrTreeChilds) << " " << title;
appendItem(ss, nextPrefix(prefix, isLastGroup), item, true);
}
inline void appendGroupLeaf(
std::stringstream &ss,
const std::string &prefix,
std::string_view title,
std::string_view value,
bool isLastGroup
) {
ss << "\n"
<< prefix << (isLastGroup ? StrTreeLast : StrTreeChilds) << " " << title;
ss << "\n"
<< nextPrefix(prefix, isLastGroup) << StrTreeLast << " `" << value
<< "`";
}
inline void appendGroupLeafList(
std::stringstream &ss,
const std::string &prefix,
std::string_view title,
const std::vector<std::string> &values,
bool isLastGroup
) {
if (values.empty()) {
return;
}
ss << "\n"
<< prefix << (isLastGroup ? StrTreeLast : StrTreeChilds) << " " << title;
for (std::size_t i = 0; i < values.size(); ++i) {
ss << "\n"
<< nextPrefix(prefix, isLastGroup)
<< (i + 1 == values.size() ? StrTreeLast : StrTreeChilds) << " `"
<< values[i] << "`";
}
}
template <typename Map, typename Printer>
void appendGroupHashMap(
std::stringstream &ss,
const std::string &prefix,
std::string_view title,
const Map &map,
bool isLastGroup,
Printer printer
) {
if (map.empty()) {
return;
}
ss << "\n"
<< prefix << (isLastGroup ? StrTreeLast : StrTreeChilds) << " " << title;
const auto total = map.size();
std::size_t index = 0;
for (const auto &entry : map) {
const auto &[key, value] = entry;
const bool isLastEntry = ++index == total;
const auto entryPrefix = nextPrefix(prefix, isLastGroup);
ss << "\n"
<< entryPrefix << (isLastEntry ? StrTreeLast : StrTreeChilds) << " `"
<< key << "`";
printer(ss, nextPrefix(entryPrefix, isLastEntry), value);
}
}
inline std::string formatName(const String &name) {
return name.empty() ? std::string("<anonymous>") : std::string(name);
}
inline std::string formatLocation(const SourceLocation &location) {
return std::format("(line {}, col {})", location.line, location.column);
}
inline std::string describeScope(const SymbolTable &table) {
return std::format(
"Scope `{}` {}",
formatName(table.name),
formatLocation(table.location)
);
}
inline std::string describeWeakScope(const WeakPtr<SymbolTable> &weak) {
if (auto locked = weak.lock()) {
return describeScope(*locked);
}
return "<expired scope>";
}
std::string describeWeakFunction(const WeakPtr<sym::Function> &);
std::string describeWeakFunctionType(const WeakPtr<sym::FunctionType> &);
std::string describeWeakStruct(const WeakPtr<sym::Struct> &);
std::string describeWeakEnum(const WeakPtr<sym::Enum> &);
std::string describeWeakPrimitive(const WeakPtr<sym::Primitive> &);
std::string describeWeakTemplateParam(const WeakPtr<sym::TemplateParam> &);
std::string describeWeakTypeSymbol(const WeakPtr<sym::TypeSymbol> &);
std::string
describeWeakTemplatedSymbol(const WeakPtr<sym::TemplatedSymbol> &);
inline std::string
describeTemplatedSymbolTarget(const sym::WeakSymbol &symbol) {
return std::visit(
OverloadSet{
[](const WeakPtr<sym::Function> &fn) {
return describeWeakFunction(fn);
},
[](const WeakPtr<sym::Variable> &var) {
if (auto locked = var.lock()) {
return std::format("Variable `{}`", formatName(locked->name));
}
return std::string("<expired variable>");
},
[](const WeakPtr<sym::TypeSymbol> &) {
return std::string("TypeSymbol");
},
[](const WeakPtr<sym::FunctionType> &fnType) {
return describeWeakFunctionType(fnType);
},
[](const WeakPtr<sym::Primitive> &primitive) {
return describeWeakPrimitive(primitive);
},
[](const WeakPtr<sym::Struct> &structure) {
return describeWeakStruct(structure);
},
[](const WeakPtr<sym::Enum> &enumeration) {
return describeWeakEnum(enumeration);
},
[](const WeakPtr<sym::TemplatedSymbol> &templated) {
if (auto locked = templated.lock()) {
return std::format(
"TemplatedSymbol `{}`",
formatName(locked->name)
);
}
return std::string("<expired templated symbol>");
} },
symbol
);
}
inline std::string describeWeakFunction(const WeakPtr<sym::Function> &weak) {
if (auto locked = weak.lock()) {
return std::format("Function `{}`", formatName(locked->name));
}
return "<expired function>";
}
inline std::string
describeWeakFunctionType(const WeakPtr<sym::FunctionType> &weak) {
if (auto locked = weak.lock()) {
return std::format("FunctionType `{}`", formatName(locked->name));
}
return "<expired function type>";
}
inline std::string describeWeakStruct(const WeakPtr<sym::Struct> &weak) {
if (auto locked = weak.lock()) {
return std::format("Struct `{}`", formatName(locked->name));
}
return "<expired struct>";
}
inline std::string describeWeakEnum(const WeakPtr<sym::Enum> &weak) {
if (auto locked = weak.lock()) {
return std::format("Enum `{}`", formatName(locked->name));
}
return "<expired enum>";
}
inline std::string
describeWeakPrimitive(const WeakPtr<sym::Primitive> &weak) {
if (auto locked = weak.lock()) {
return std::format("Primitive `{}`", formatName(locked->name));
}
return "<expired primitive>";
}
inline std::string
describeWeakTemplateParam(const WeakPtr<sym::TemplateParam> &weak) {
if (auto locked = weak.lock()) {
return std::format("TemplateParam `{}`", formatName(locked->name));
}
return "<expired template param>";
}
inline std::string
describeWeakTemplatedSymbol(const WeakPtr<sym::TemplatedSymbol> &weak) {
if (auto locked = weak.lock()) {
return std::format("TemplatedSymbol `{}`", formatName(locked->name));
}
return "<expired template param>";
}
inline std::string
describeWeakTypeSymbol(const WeakPtr<sym::TypeSymbol> &weak) {
if (auto locked = weak.lock()) {
(void) locked;
return "TypeSymbol";
}
return "<expired type>";
}
inline std::string qualifierName(TypeQualifier qualifier) {
switch (qualifier) {
case TypeQualifier::Uninitialized: return "Uninitialized";
case TypeQualifier::Pointer: return "Pointer";
case TypeQualifier::Slice: return "Slice";
case TypeQualifier::Mutable: return "Mutable";
case TypeQualifier::Optional: return "Optional";
default: return "Unknown";
}
std::unreachable();
}
std::string describeTypeKind(const sym::TypeSymbol::Type &type) {
return std::visit(
OverloadSet{
[](const SharedPtr<sym::TypeSymbol> &nested) -> std::string {
if (! nested) {
return "<null type>";
}
return "TypeSymbol";
},
[](const WeakPtr<sym::FunctionType> &weak) -> std::string {
return describeWeakFunctionType(weak);
},
[](const WeakPtr<sym::Primitive> &weak) -> std::string {
return describeWeakPrimitive(weak);
},
[](const WeakPtr<sym::Struct> &weak) -> std::string {
return describeWeakStruct(weak);
},
[](const WeakPtr<sym::Enum> &weak) -> std::string {
return describeWeakEnum(weak);
},
[](const WeakPtr<sym::TemplatedSymbol> &weak) -> std::string {
return describeWeakTemplatedSymbol(weak);
},
[](const WeakPtr<sym::TemplateParam> &weak) -> std::string {
return describeWeakTemplateParam(weak);
},
},
type
);
}
std::string toString(const SymbolTable &table, const std::string &prefix) {
std::stringstream ss;
ss << describeScope(table);
int groups = 0;
if (! table.symbols.empty()) {
++groups;
}
if (! table.inner.empty()) {
++groups;
}
int emitted = 0;
if (! table.symbols.empty()) {
appendGroupHashMap(
ss,
prefix,
"Symbols",
table.symbols,
++emitted == groups,
[](
std::stringstream &stream,
const std::string &valuePrefix,
const sym::Symbol &value
) { appendItem(stream, valuePrefix, value, true); }
);
}
if (! table.inner.empty()) {
appendGroupHashMap(
ss,
prefix,
"NestedScopes",
table.inner,
++emitted == groups,
[](
std::stringstream &stream,
const std::string &valuePrefix,
const SharedPtr<SymbolTable> &child
) { appendItem(stream, valuePrefix, child, true); }
);
}
return ss.str();
}
std::string
toString(const SharedPtr<SymbolTable> &ptr, const std::string &prefix) {
if (! ptr) {
return "<null scope>";
}
return toString(*ptr, prefix);
}
std::string
toString(const SharedPtr<sym::FunctionType> &ptr, const std::string &prefix) {
if (! ptr) {
return "<null function type>";
}
return toString(*ptr, prefix);
}
std::string toString(const sym::Symbol &symbol, const std::string &prefix) {
return std::visit(
OverloadSet{
[&prefix](const SharedPtr<sym::Function> &value) -> std::string {
return value ? toString(*value, prefix)
: std::string("<null function>");
},
[&prefix](const SharedPtr<sym::Variable> &value) -> std::string {
return value ? toString(*value, prefix)
: std::string("<null variable>");
},
[&prefix](const SharedPtr<sym::TypeSymbol> &value) -> std::string {
return value ? toString(*value, prefix)
: std::string("<null type symbol>");
},
[&prefix](const SharedPtr<sym::FunctionType> &value) -> std::string {
return value ? toString(*value, prefix)
: std::string("<null function type>");
},
[&prefix](const SharedPtr<sym::Primitive> &value) -> std::string {
return value ? toString(*value, prefix)
: std::string("<null primitive>");
},
[&prefix](const SharedPtr<sym::Struct> &value) -> std::string {
return value ? toString(*value, prefix)
: std::string("<null struct>");
},
[&prefix](const SharedPtr<sym::Enum> &value) -> std::string {
return value ? toString(*value, prefix) : std::string("<null enum>");
},
[&prefix](const SharedPtr<sym::TemplatedSymbol> &value) -> std::string {
return value ? toString(*value, prefix)
: std::string("<null templated symbol>");
},
[&prefix](const SharedPtr<sym::TemplateParam> &value) -> std::string {
return value ? toString(*value, prefix)
: std::string("<null templated symbol>");
},
},
symbol
);
}
std::string
toString(const sym::TemplatedSymbol &templated, const std::string &prefix) {
std::stringstream ss;
ss << std::format(
"TemplatedSymbol `{}` {}",
formatName(templated.name),
formatLocation(templated.location)
);
int groups = 0;
if (! templated.symbol.valueless_by_exception()) {
++groups;
}
if (! templated.params.empty()) {
++groups;
}
int emitted = 0;
if (! templated.symbol.valueless_by_exception()) {
appendGroupLeaf(
ss,
prefix,
"BaseSymbol",
describeTemplatedSymbolTarget(templated.symbol),
++emitted == groups
);
}
if (! templated.params.empty()) {
appendGroupVec(
ss,
prefix,
"Params",
templated.params,
++emitted == groups
);
}
return ss.str();
}
std::string toString(const sym::Function &fn, const std::string &prefix) {
std::stringstream ss;
ss << std::format(
"Function `{}` {}",
formatName(fn.name),
formatLocation(fn.location)
);
int groups = 0;
if (fn.body) {
++groups;
}
auto typePtr = fn.type.lock();
const bool hasType = static_cast<bool>(typePtr);
if (hasType) {
++groups;
}
int emitted = 0;
if (fn.body) {
appendGroupOne(ss, prefix, "Body", fn.body, ++emitted == groups);
}
if (hasType) {
appendGroupOne(ss, prefix, "Type", typePtr, ++emitted == groups);
}
return ss.str();
}
std::string
toString(const sym::FunctionType &fnType, const std::string &prefix) {
std::stringstream ss;
ss << std::format(
"FunctionType `{}` {}",
formatName(fnType.name),
formatLocation(fnType.location)
);
int groups = 0;
if (! fnType.symbol.expired()) {
++groups;
}
if (! fnType.arguments.empty()) {
++groups;
}
if (fnType.returnType) {
++groups;
}
int emitted = 0;
if (! fnType.symbol.expired()) {
appendGroupLeaf(
ss,
prefix,
"Symbol",
describeWeakFunction(fnType.symbol),
++emitted == groups
);
}
if (! fnType.arguments.empty()) {
std::vector<std::string> arguments;
arguments.reserve(fnType.arguments.size());
for (const auto &argument : fnType.arguments) {
arguments.emplace_back(describeWeakTypeSymbol(argument));
}
appendGroupLeafList(
ss,
prefix,
"Arguments",
arguments,
++emitted == groups
);
}
if (fnType.returnType) {
appendGroupLeaf(
ss,
prefix,
"Return",
describeWeakTypeSymbol(*fnType.returnType),
++emitted == groups
);
}
return ss.str();
}
std::string
toString(const sym::Enum &enumeration, const std::string &prefix) {
std::stringstream ss;
ss << std::format(
"Enum `{}` {} size={} align={}",
formatName(enumeration.name),
formatLocation(enumeration.location),
enumeration.size,
enumeration.alignment
);
int groups = 0;
if (! enumeration.membersList.empty()) {
++groups;
}
if (! enumeration.methods.empty()) {
++groups;
}
int emitted = 0;
if (! enumeration.membersList.empty()) {
appendGroupVec(
ss,
prefix,
"Members",
enumeration.membersList,
++emitted == groups
);
}
if (! enumeration.methods.empty()) {
appendGroupHashMap(
ss,
prefix,
"Methods",
enumeration.methods,
++emitted == groups,
[](
std::stringstream &stream,
const std::string &valuePrefix,
const WeakPtr<sym::Function> &method
) {
appendGroupLeaf(
stream,
valuePrefix,
"Function",
describeWeakFunction(method),
true
);
}
);
}
return ss.str();
}
std::string
toString(const sym::EnumMember &member, const std::string &prefix) {
std::stringstream ss;
ss << std::format(
"EnumMember `{}` {} index={}",
formatName(member.name),
formatLocation(member.location),
member.index
);
if (member.type) {
appendGroupLeaf(
ss,
prefix,
"Type",
describeWeakTypeSymbol(*member.type),
true
);
}
return ss.str();
}
std::string
toString(const sym::Struct &structure, const std::string &prefix) {
std::stringstream ss;
ss << std::format(
"Struct `{}` {} size={} align={}",
formatName(structure.name),
formatLocation(structure.location),
structure.size,
structure.alignment
);
int groups = 0;
if (! structure.membersList.empty()) {
++groups;
}
if (! structure.methods.empty()) {
++groups;
}
int emitted = 0;
if (! structure.membersList.empty()) {
appendGroupVec(
ss,
prefix,
"Members",
structure.membersList,
++emitted == groups
);
}
if (! structure.methods.empty()) {
appendGroupHashMap(
ss,
prefix,
"Methods",
structure.methods,
++emitted == groups,
[](
std::stringstream &stream,
const std::string &valuePrefix,
const WeakPtr<sym::Function> &method
) {
appendGroupLeaf(
stream,
valuePrefix,
"Function",
describeWeakFunction(method),
true
);
}
);
}
return ss.str();
}
std::string
toString(const sym::StructMember &member, const std::string &prefix) {
std::stringstream ss;
ss << std::format(
"StructMember `{}` {} index={} offset={}",
formatName(member.name),
formatLocation(member.location),
member.index,
member.offset
);
appendGroupLeaf(
ss,
prefix,
"Type",
describeWeakTypeSymbol(member.type),
true
);
return ss.str();
}
std::string toString(const sym::TemplateParam &param, const std::string &) {
std::stringstream ss;
ss << std::format(
"TemplateParam `{}` {}",
formatName(param.name),
formatLocation(param.location)
);
return ss.str();
}
std::string toString(const sym::Variable &var, const std::string &prefix) {
std::stringstream ss;
ss << std::format(
"Variable `{}` {}",
formatName(var.name),
formatLocation(var.location)
);
appendGroupLeaf(ss, prefix, "Type", describeWeakTypeSymbol(var.type), true);
return ss.str();
}
std::string
toString(const sym::Primitive &primitive, const std::string &prefix) {
std::stringstream ss;
ss << std::format(
"Primitive `{}` size={} align={}",
formatName(primitive.name),
primitive.size,
primitive.alignment
);
(void) prefix;
return ss.str();
}
std::string toString(const sym::TypeSymbol &type, const std::string &prefix) {
std::stringstream ss;
ss << "TypeSymbol";
int groups = 1;
if (type.qualifier) {
++groups;
}
int emitted = 0;
appendGroupLeaf(
ss,
prefix,
"Kind",
describeTypeKind(type.type),
++emitted == groups
);
if (type.qualifier) {
appendGroupLeaf(
ss,
prefix,
"Qualifier",
qualifierName(*type.qualifier),
++emitted == groups
);
}
return ss.str();
} // namespace
std::string toPrettyString(const SymbolTable &table, std::string prefix) {
auto rendered = toString(table, prefix);
if (! prefix.empty()) {
rendered.insert(0, prefix);
}
return rendered;
}
} // namespace arti::lang::sym

View File

@ -22,6 +22,7 @@
#include <artichoke/Tokenizer/Tokenizer.hpp>
#include <print>
#include <utility>
#include <artichoke/Tokenizer/TokenizerRange.hpp>
@ -41,20 +42,20 @@ namespace arti::lang {
}
Tokenizer::Tokenizer(Tokenizer &&rhs) noexcept
: line(rhs.line)
, column(rhs.column)
, iter(rhs.iter)
: line(std::exchange(rhs.line, 0))
, column(std::exchange(rhs.column, 0))
, iter(std::exchange(rhs.iter, {}))
, tokensGenerator(std::exchange(rhs.tokensGenerator, {}))
, tokensBuffer(std::exchange(rhs.tokensBuffer, {}))
, source(std::exchange(rhs.source, "")) { }
, source(std::exchange(rhs.source, {})) { }
Tokenizer &Tokenizer::operator=(Tokenizer &&rhs) noexcept {
line = rhs.line;
column = rhs.column;
iter = std::move(rhs.iter);
source = std::move(rhs.source);
tokensBuffer = std::move(rhs.tokensBuffer);
tokensGenerator = std::move(rhs.tokensGenerator);
line = std::exchange(rhs.line, 0);
column = std::exchange(rhs.column, 0);
iter = std::exchange(rhs.iter, {});
source = std::exchange(rhs.source, {});
tokensBuffer = std::exchange(rhs.tokensBuffer, {});
tokensGenerator = std::exchange(rhs.tokensGenerator, {});
return *this;
}
@ -282,7 +283,7 @@ namespace arti::lang {
}
else if (*(iter + 1) == '/') {
iter += 2;
column += 2;
column += 1;
return {};
}
}
@ -744,6 +745,8 @@ namespace arti::lang {
TrieMap<TokenV> buildOperatorsTrieMap();
Expected<Token> Tokenizer::readOperator() {
static auto tm = buildOperatorsTrieMap();
auto stIter = iter;
auto cLine = line;
@ -754,8 +757,6 @@ namespace arti::lang {
++column;
};
auto tm = buildOperatorsTrieMap();
if (not tm.root.childs.contains(*iter)) {
return langException<ExceptCode::ecInvalidCharacter>(line, column, *iter);
}
@ -781,8 +782,8 @@ namespace arti::lang {
Token tok{
TokenV::opStar,
line = cLine,
column = cColumn,
cLine,
cColumn,
{ stIter, iter }
};
@ -791,6 +792,7 @@ namespace arti::lang {
}
else if (lvNode != nullptr) {
iter -= lvNodeDiff;
column -= static_cast<size_t>(lvNodeDiff);
tok.value = *lvNode->value;
tok.strValue = std::string_view{ stIter, iter };
}