From patchwork Tue Dec 6 10:13:46 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Arthur Cohen X-Patchwork-Id: 61506 Return-Path: X-Original-To: patchwork@sourceware.org Delivered-To: patchwork@sourceware.org Received: from server2.sourceware.org (localhost [IPv6:::1]) by sourceware.org (Postfix) with ESMTP id 14A6E3AA8C00 for ; Tue, 6 Dec 2022 10:15:58 +0000 (GMT) X-Original-To: gcc-patches@gcc.gnu.org Delivered-To: gcc-patches@gcc.gnu.org Received: from mail-wm1-x32a.google.com (mail-wm1-x32a.google.com [IPv6:2a00:1450:4864:20::32a]) by sourceware.org (Postfix) with ESMTPS id 7C7FA395A457 for ; Tue, 6 Dec 2022 10:12:12 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 7C7FA395A457 Authentication-Results: sourceware.org; dmarc=none (p=none dis=none) header.from=embecosm.com Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=embecosm.com Received: by mail-wm1-x32a.google.com with SMTP id v7so10809654wmn.0 for ; Tue, 06 Dec 2022 02:12:12 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=embecosm.com; s=google; h=content-transfer-encoding:mime-version:reply-to:references :in-reply-to:message-id:date:subject:cc:to:from:from:to:cc:subject :date:message-id:reply-to; bh=2TqoQuHt0oQ2EMM6oId7u63575VmDfpngMEFyYGnhy8=; b=fI/3x6jNiMR0wK/jjim7FGRg1AoI5ORrdRRXHSASu81ehLUCwQda76Co+LXIXMN0c8 IvWtHsJP06qGnVytS8bvS1DSJcCWGoXgM+4Isp7BgsSAPJYjOICs453K4tbUkQ/kWJLx trywNBjzhWJQuSCveZYAabgkfMx9pZ1Qh4Bg+XVKAmkaHtyiQaVxuKo+w4h2MZIgz0SJ 74R2VKjh8iw14nUD/mgas7ahEkKeEj+TztuZYnJ8wfO9KnXhvk7LgUSYo69Vq8W7rlrd WnehpGO7YpEz/6IQkpca6J8WAlgZ8pGiWVEw5gwpuDQEDI8jxOwqPYKS2G+GxbpBq4WA HKOg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:reply-to:references :in-reply-to:message-id:date:subject:cc:to:from:x-gm-message-state :from:to:cc:subject:date:message-id:reply-to; bh=2TqoQuHt0oQ2EMM6oId7u63575VmDfpngMEFyYGnhy8=; b=UWu6TLbvl5NOBOkD2nJxo5X+Pb2dbpyzKSU8BNLMaVEkh7otde+Xl2mCgk5VrVvW9p Bw9yyxZobhwBoSVpH+q4dhJrTNae6kI0hj4e/FZBSYw1zw4OYHyvtP3lx3hpp87Hp4w1 d/QL07o5uR/21PoyYr2UFfkU0ZgvlA7avszOPwLDRiN4FFvrKQG8zItW/dfniWvd/oo2 F4QpgUGO//zWBQCWUyebUlg5uR8/Y9UpTsfLdP+91qi6Wb7SpkomB/87iNwwy5CiQyoR 9y2bnEqJYaudig5gVI9Bi9xG7KiWwbptw27yVGLYMLfGLUFPGUw9bOr7v6WhDxMJfx3M aN+A== X-Gm-Message-State: ANoB5pmGhZDvkviLK4XaezWdeeFYwzcpGHOxeJzd8SxQhebXMaCU3Pie Be0cVxr3yS+V/qQSi2yGbirh+B3/kRqLTrigng== X-Google-Smtp-Source: AA0mqf6pJcUzS5L7KzdVl/yNqax5jnfC8i2xhRVZKj2hYHXw9kjHK41ea5Put3qwekIfy/+/Fn+Idw== X-Received: by 2002:a05:600c:4d83:b0:3d1:d866:c82a with SMTP id v3-20020a05600c4d8300b003d1d866c82amr4718485wmp.88.1670321528742; Tue, 06 Dec 2022 02:12:08 -0800 (PST) Received: from platypus.lan ([2001:861:5e4c:3bb0:6424:328a:1734:3249]) by smtp.googlemail.com with ESMTPSA id r10-20020a05600c458a00b003cfd4a50d5asm27052699wmo.34.2022.12.06.02.12.08 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 06 Dec 2022 02:12:08 -0800 (PST) From: arthur.cohen@embecosm.com To: gcc-patches@gcc.gnu.org Cc: gcc-rust@gcc.gnu.org, The Other , Philip Herron Subject: [PATCH Rust front-end v4 14/46] gccrs: Add Parser for Rust front-end pt.2 Date: Tue, 6 Dec 2022 11:13:46 +0100 Message-Id: <20221206101417.778807-15-arthur.cohen@embecosm.com> X-Mailer: git-send-email 2.38.1 In-Reply-To: <20221206101417.778807-1-arthur.cohen@embecosm.com> References: <20221206101417.778807-1-arthur.cohen@embecosm.com> MIME-Version: 1.0 X-Spam-Status: No, score=-22.8 required=5.0 tests=BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, GIT_PATCH_0, KAM_NUMSUBJECT, RCVD_IN_DNSWL_NONE, SPF_HELO_NONE, SPF_PASS, TXREP autolearn=unavailable autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org X-BeenThere: gcc-patches@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-patches mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: arthur.cohen@embecosm.com Errors-To: gcc-patches-bounces+patchwork=sourceware.org@gcc.gnu.org Sender: "Gcc-patches" From: The Other This patch contains the second half of the templated Rust parser. Co-authored-by: Philip Herron Co-authored-by: Arthur Cohen ::parse_self_param () } } +/* Parses a method. Note that this function is probably useless because using + * lookahead to determine whether a function is a method is a PITA (maybe not + * even doable), so most places probably parse a "function or method" and then + * resolve it into whatever it is afterward. As such, this is only here for + * algorithmically defining the grammar rule. */ +template +AST::Method +Parser::parse_method () +{ + Location locus = lexer.peek_token ()->get_locus (); + /* Note: as a result of the above, this will not attempt to disambiguate a + * function parse qualifiers */ + AST::FunctionQualifiers qualifiers = parse_function_qualifiers (); + + skip_token (FN_TOK); + + const_TokenPtr ident_tok = expect_token (IDENTIFIER); + if (ident_tok == nullptr) + { + skip_after_next_block (); + return AST::Method::create_error (); + } + Identifier method_name = ident_tok->get_str (); + + // parse generic params - if exist + std::vector> generic_params + = parse_generic_params_in_angles (); + + if (!skip_token (LEFT_PAREN)) + { + Error error (lexer.peek_token ()->get_locus (), + "method missing opening parentheses before parameter list"); + add_error (std::move (error)); + + skip_after_next_block (); + return AST::Method::create_error (); + } + + // parse self param + AST::SelfParam self_param = parse_self_param (); + if (self_param.is_error ()) + { + Error error (lexer.peek_token ()->get_locus (), + "could not parse self param in method"); + add_error (std::move (error)); + + skip_after_next_block (); + return AST::Method::create_error (); + } + + // skip comma if it exists + if (lexer.peek_token ()->get_id () == COMMA) + lexer.skip_token (); + + // parse function parameters + std::vector function_params + = parse_function_params ([] (TokenId id) { return id == RIGHT_PAREN; }); + + if (!skip_token (RIGHT_PAREN)) + { + Error error (lexer.peek_token ()->get_locus (), + "method declaration missing closing parentheses after " + "parameter list"); + add_error (std::move (error)); + + skip_after_next_block (); + return AST::Method::create_error (); + } + + // parse function return type - if exists + std::unique_ptr return_type = parse_function_return_type (); + + // parse where clause - if exists + AST::WhereClause where_clause = parse_where_clause (); + + // parse block expression + std::unique_ptr block_expr = parse_block_expr (); + if (block_expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "method declaration missing block expression"); + add_error (std::move (error)); + + skip_after_end_block (); + return AST::Method::create_error (); + } + + // does not parse visibility, but this method isn't used, so doesn't matter + return AST::Method (std::move (method_name), std::move (qualifiers), + std::move (generic_params), std::move (self_param), + std::move (function_params), std::move (return_type), + std::move (where_clause), std::move (block_expr), + AST::Visibility::create_error (), AST::AttrVec (), locus); +} + +/* Parses an expression statement (disambiguates to expression with or without + * block statement). */ +template +std::unique_ptr +Parser::parse_expr_stmt (AST::AttrVec outer_attrs, + ParseRestrictions restrictions) +{ + /* potential thoughts - define new virtual method "has_block()" on expr. + * parse expr and then determine whether semicolon is needed as a result of + * this method. but then this would require dynamic_cast, which is not + * allowed. */ + + /* okay new thought - big switch to disambiguate exprs with blocks - either + * block expr, async block expr, unsafe block expr, loop expr, if expr, if + * let expr, or match expr. So all others are exprs without block. */ + /* new thought: possible initial tokens: 'loop', 'while', 'for', lifetime + * (and then ':' and then loop), 'if', 'match', '{', 'async', 'unsafe' (and + * then + * '{')). This seems to have no ambiguity. */ + + const_TokenPtr t = lexer.peek_token (); + /* TODO: should the switch just directly call the individual parse methods + * rather than adding another layer of indirection with + * parse_expr_stmt_with_block()? */ + switch (t->get_id ()) + { + case LOOP: + case WHILE: + case FOR: + case IF: + case MATCH_TOK: + case LEFT_CURLY: + case ASYNC: + // expression with block + return parse_expr_stmt_with_block (std::move (outer_attrs)); + case LIFETIME: { + /* FIXME: are there any expressions without blocks that can have + * lifetime as their first token? Or is loop expr the only one? */ + // safe side for now: + if (lexer.peek_token (1)->get_id () == COLON + && lexer.peek_token (2)->get_id () == LOOP) + { + return parse_expr_stmt_with_block (std::move (outer_attrs)); + } + else + { + return parse_expr_stmt_without_block (std::move (outer_attrs), + restrictions); + } + } + case UNSAFE: { + /* FIXME: are there any expressions without blocks that can have + * unsafe as their first token? Or is unsafe the only one? */ + // safe side for now + if (lexer.peek_token (1)->get_id () == LEFT_CURLY) + { + return parse_expr_stmt_with_block (std::move (outer_attrs)); + } + else + { + return parse_expr_stmt_without_block (std::move (outer_attrs), + restrictions); + } + } + default: + // not a parse expr with block, so must be expr without block + /* TODO: if possible, be more selective about possible expr without + * block initial tokens in order to prevent more syntactical errors at + * parse time. */ + return parse_expr_stmt_without_block (std::move (outer_attrs), + restrictions); + } +} + +template +std::unique_ptr +Parser::parse_expr_with_block (AST::AttrVec outer_attrs) +{ + std::unique_ptr expr_parsed = nullptr; + + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case IF: + // if or if let, so more lookahead to find out + if (lexer.peek_token (1)->get_id () == LET) + { + // if let expr + expr_parsed = parse_if_let_expr (std::move (outer_attrs)); + break; + } + else + { + // if expr + expr_parsed = parse_if_expr (std::move (outer_attrs)); + break; + } + case LOOP: + // infinite loop + expr_parsed = parse_loop_expr (std::move (outer_attrs)); + break; + case FOR: + // "for" iterator loop + expr_parsed = parse_for_loop_expr (std::move (outer_attrs)); + break; + case WHILE: { + // while or while let, so more lookahead to find out + if (lexer.peek_token (1)->get_id () == LET) + { + // while let loop expr + expr_parsed = parse_while_let_loop_expr (std::move (outer_attrs)); + break; + } + else + { + // while loop expr + expr_parsed = parse_while_loop_expr (std::move (outer_attrs)); + break; + } + } + case MATCH_TOK: + // match expression + expr_parsed = parse_match_expr (std::move (outer_attrs)); + break; + case LEFT_CURLY: + // block expression + expr_parsed = parse_block_expr (std::move (outer_attrs)); + break; + case ASYNC: + // async block expression + expr_parsed = parse_async_block_expr (std::move (outer_attrs)); + break; + case UNSAFE: + // unsafe block expression + expr_parsed = parse_unsafe_block_expr (std::move (outer_attrs)); + break; + case LIFETIME: + // some kind of loop expr (with loop label) + expr_parsed = parse_labelled_loop_expr (std::move (outer_attrs)); + break; + default: + add_error (Error ( + t->get_locus (), + "could not recognise expr beginning with %qs as an expr with block in" + " parsing expr statement", + t->get_token_description ())); + + skip_after_next_block (); + return nullptr; + } + + // ensure expr parsed exists + if (expr_parsed == nullptr) + { + Error error (t->get_locus (), + "failed to parse expr with block in parsing expr statement"); + add_error (std::move (error)); + + skip_after_end_block (); + return nullptr; + } + + return expr_parsed; +} + +/* Parses a expression statement containing an expression with block. + * Disambiguates internally. */ +template +std::unique_ptr +Parser::parse_expr_stmt_with_block ( + AST::AttrVec outer_attrs) +{ + auto expr_parsed = parse_expr_with_block (std::move (outer_attrs)); + auto locus = expr_parsed->get_locus (); + + // return expr stmt created from expr + return std::unique_ptr ( + new AST::ExprStmtWithBlock (std::move (expr_parsed), locus, + lexer.peek_token ()->get_id () == SEMICOLON)); +} + +/* Parses an expression statement containing an expression without block. + * Disambiguates further. */ +template +std::unique_ptr +Parser::parse_expr_stmt_without_block ( + AST::AttrVec outer_attrs, ParseRestrictions restrictions) +{ + /* TODO: maybe move more logic for expr without block in here for better + * error handling */ + + // attempt to parse via parse_expr_without_block - seems to work + std::unique_ptr expr = nullptr; + Location locus = lexer.peek_token ()->get_locus (); + + restrictions.expr_can_be_stmt = true; + + expr = parse_expr_without_block (std::move (outer_attrs), restrictions); + if (expr == nullptr) + { + // expr is required, error + Error error (lexer.peek_token ()->get_locus (), + "failed to parse expr without block in expr statement"); + add_error (std::move (error)); + + skip_after_semicolon (); + return nullptr; + } + + if (restrictions.consume_semi) + if (!skip_token (SEMICOLON)) + return nullptr; + + return std::unique_ptr ( + new AST::ExprStmtWithoutBlock (std::move (expr), locus)); +} + +/* Parses an expression without a block associated with it (further + * disambiguates). */ +template +std::unique_ptr +Parser::parse_expr_without_block ( + AST::AttrVec outer_attrs, ParseRestrictions restrictions) +{ + /* Notes on types of expr without block: + * - literal expr tokens that are literals + * - path expr path_in_expr or qual_path_in_expr + * - operator expr many different types + * unary: + * borrow expr ( '&' | '&&' ) 'mut'? expr + * dereference expr '*' expr + * error propagation expr '?' + * negation '-' expr + * not '!' expr + * binary: all start with expr + * - grouped/paren expr '(' inner_attributes expr ')' + * - array expr '[' inner_attributes array_elems? ']' + * - await expr expr '.' 'await' + * - (array/slice) index expr expr '[' expr ']' + * - tuple expr '(' inner_attributes tuple_elems? ')' + * note that a single elem tuple is distinguished from a grouped expr + * by a trailing comma, i.e. a grouped expr is preferred over a tuple expr + * - tuple index expr expr '.' tuple_index + * - struct expr path_in_expr (and optional other stuff) + * - enum variant expr path_in_expr (and optional other stuff) + * this means that there is no syntactic difference between an enum + * variant and a struct + * - only name resolution can tell the difference. Thus, maybe rework + * AST to take this into account ("struct or enum" nodes?) + * - (function) call expr expr '(' call_params? ')' + * - method call expr expr '.' path_expr_segment '(' call_params? ')' + * - field expr expr '.' identifier + * note that method call expr is preferred, i.e. field expr must not be + * followed by parenthesised expression sequence. + * - closure expr 'move'? ( '||' | '|' closure_params? '|' ) ( + * expr | '->' type_no_bounds block_expr ) + * - continue expr 'continue' labelled_lifetime? + * - break expr 'break' labelled_lifetime? expr? + * - range expr many different types but all involve '..' or + * '..=' + * - return expr 'return' as 1st tok + * - macro invocation identifier then :: or identifier then ! + * (simple_path '!') + * + * any that have rules beginning with 'expr' should probably be + * pratt-parsed, + * with parsing type to use determined by token AND lookahead. */ + + // ok well at least can do easy ones + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case RETURN_TOK: + // return expr + return parse_return_expr (std::move (outer_attrs)); + case BREAK: + // break expr + return parse_break_expr (std::move (outer_attrs)); + case CONTINUE: + // continue expr + return parse_continue_expr (std::move (outer_attrs)); + case MOVE: + // closure expr (though not all closure exprs require this) + return parse_closure_expr (std::move (outer_attrs)); + case LEFT_SQUARE: + // array expr (creation, not index) + return parse_array_expr (std::move (outer_attrs)); + default: { + /* HACK: piggyback on pratt parsed expr and abuse polymorphism to + * essentially downcast */ + + std::unique_ptr expr + = parse_expr (std::move (outer_attrs), restrictions); + + if (expr == nullptr) + { + Error error (t->get_locus (), + "failed to parse expression for expression without " + "block (pratt-parsed expression is null)"); + add_error (std::move (error)); + + return nullptr; + } + + std::unique_ptr expr_without_block ( + expr->as_expr_without_block ()); + + if (expr_without_block != nullptr) + { + return expr_without_block; + } + else + { + Error error (t->get_locus (), + "converted expr without block is null"); + add_error (std::move (error)); + + return nullptr; + } + } + } +} + +// Parses a block expression, including the curly braces at start and end. +template +std::unique_ptr +Parser::parse_block_expr (AST::AttrVec outer_attrs, + Location pratt_parsed_loc) +{ + Location locus = pratt_parsed_loc; + if (locus == Linemap::unknown_location ()) + { + locus = lexer.peek_token ()->get_locus (); + if (!skip_token (LEFT_CURLY)) + { + skip_after_end_block (); + return nullptr; + } + } + + AST::AttrVec inner_attrs = parse_inner_attributes (); + + // parse statements and expression + std::vector> stmts; + std::unique_ptr expr = nullptr; + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != RIGHT_CURLY) + { + ExprOrStmt expr_or_stmt = parse_stmt_or_expr_without_block (); + if (expr_or_stmt.is_error ()) + { + Error error (t->get_locus (), + "failed to parse statement or expression without " + "block in block expression"); + add_error (std::move (error)); + + return nullptr; + } + + t = lexer.peek_token (); + + if (expr_or_stmt.stmt != nullptr) + { + stmts.push_back (std::move (expr_or_stmt.stmt)); + } + else + { + // assign to expression and end parsing inside + expr = std::move (expr_or_stmt.expr); + break; + } + } + + Location end_locus = t->get_locus (); + + if (!skip_token (RIGHT_CURLY)) + { + Error error (t->get_locus (), + "error may be from having an expression (as opposed to " + "statement) in the body of the function but not last"); + add_error (std::move (error)); + + skip_after_end_block (); + return nullptr; + } + + // grammar allows for empty block expressions + + stmts.shrink_to_fit (); + + return std::unique_ptr ( + new AST::BlockExpr (std::move (stmts), std::move (expr), + std::move (inner_attrs), std::move (outer_attrs), locus, + end_locus)); +} + +/* Parses a "grouped" expression (expression in parentheses), used to control + * precedence. */ +template +std::unique_ptr +Parser::parse_grouped_expr (AST::AttrVec outer_attrs) +{ + Location locus = lexer.peek_token ()->get_locus (); + skip_token (LEFT_PAREN); + + AST::AttrVec inner_attrs = parse_inner_attributes (); + + // parse required expr inside parentheses + std::unique_ptr expr_in_parens = parse_expr (); + if (expr_in_parens == nullptr) + { + // skip after somewhere? + // error? + return nullptr; + } + + if (!skip_token (RIGHT_PAREN)) + { + // skip after somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::GroupedExpr (std::move (expr_in_parens), std::move (inner_attrs), + std::move (outer_attrs), locus)); +} + +// Parses a closure expression (closure definition). +template +std::unique_ptr +Parser::parse_closure_expr (AST::AttrVec outer_attrs) +{ + Location locus = lexer.peek_token ()->get_locus (); + // detect optional "move" + bool has_move = false; + if (lexer.peek_token ()->get_id () == MOVE) + { + lexer.skip_token (); + has_move = true; + } + + // handle parameter list + std::vector params; + + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case OR: + // skip token, no parameters + lexer.skip_token (); + break; + case PIPE: + // actually may have parameters + lexer.skip_token (); + + while (t->get_id () != PIPE) + { + AST::ClosureParam param = parse_closure_param (); + if (param.is_error ()) + { + // TODO is this really an error? + Error error (t->get_locus (), "could not parse closure param"); + add_error (std::move (error)); + + break; + } + params.push_back (std::move (param)); + + if (lexer.peek_token ()->get_id () != COMMA) + { + // not an error but means param list is done + break; + } + // skip comma + lexer.skip_token (); + + t = lexer.peek_token (); + } + params.shrink_to_fit (); + break; + default: + add_error (Error (t->get_locus (), + "unexpected token %qs in closure expression - expected " + "%<|%> or %<||%>", + t->get_token_description ())); + + // skip somewhere? + return nullptr; + } + + // again branch based on next token + t = lexer.peek_token (); + if (t->get_id () == RETURN_TYPE) + { + // must be return type closure with block expr + + // skip "return type" token + lexer.skip_token (); + + // parse actual type, which is required + std::unique_ptr type = parse_type_no_bounds (); + if (type == nullptr) + { + // error + Error error (t->get_locus (), "failed to parse type for closure"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + // parse block expr, which is required + std::unique_ptr block = parse_block_expr (); + if (block == nullptr) + { + // error + Error error (lexer.peek_token ()->get_locus (), + "failed to parse block expr in closure"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::ClosureExprInnerTyped (std::move (type), std::move (block), + std::move (params), locus, has_move, + std::move (outer_attrs))); + } + else + { + // must be expr-only closure + + // parse expr, which is required + std::unique_ptr expr = parse_expr (); + if (expr == nullptr) + { + Error error (t->get_locus (), + "failed to parse expression in closure"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::ClosureExprInner (std::move (expr), std::move (params), locus, + has_move, std::move (outer_attrs))); + } +} + +// Parses a literal token (to literal expression). +template +std::unique_ptr +Parser::parse_literal_expr (AST::AttrVec outer_attrs) +{ + // TODO: change if literal representation in lexer changes + + std::string literal_value; + AST::Literal::LitType type = AST::Literal::STRING; + + // branch based on token + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case CHAR_LITERAL: + type = AST::Literal::CHAR; + literal_value = t->get_str (); + lexer.skip_token (); + break; + case STRING_LITERAL: + type = AST::Literal::STRING; + literal_value = t->get_str (); + lexer.skip_token (); + break; + case BYTE_CHAR_LITERAL: + type = AST::Literal::BYTE; + literal_value = t->get_str (); + lexer.skip_token (); + break; + case BYTE_STRING_LITERAL: + type = AST::Literal::BYTE_STRING; + literal_value = t->get_str (); + lexer.skip_token (); + break; + case INT_LITERAL: + type = AST::Literal::INT; + literal_value = t->get_str (); + lexer.skip_token (); + break; + case FLOAT_LITERAL: + type = AST::Literal::FLOAT; + literal_value = t->get_str (); + lexer.skip_token (); + break; + // case BOOL_LITERAL + // use true and false keywords rather than "bool literal" Rust terminology + case TRUE_LITERAL: + type = AST::Literal::BOOL; + literal_value = "true"; + lexer.skip_token (); + break; + case FALSE_LITERAL: + type = AST::Literal::BOOL; + literal_value = "false"; + lexer.skip_token (); + break; + default: + // error - cannot be a literal expr + add_error (Error (t->get_locus (), + "unexpected token %qs when parsing literal expression", + t->get_token_description ())); + + // skip? + return nullptr; + } + + // create literal based on stuff in switch + return std::unique_ptr ( + new AST::LiteralExpr (std::move (literal_value), std::move (type), + t->get_type_hint (), std::move (outer_attrs), + t->get_locus ())); +} + +// Parses a return expression (including any expression to return). +template +std::unique_ptr +Parser::parse_return_expr (AST::AttrVec outer_attrs, + Location pratt_parsed_loc) +{ + Location locus = pratt_parsed_loc; + if (locus == Linemap::unknown_location ()) + { + locus = lexer.peek_token ()->get_locus (); + skip_token (RETURN_TOK); + } + + // parse expression to return, if it exists + ParseRestrictions restrictions; + restrictions.expr_can_be_null = true; + std::unique_ptr returned_expr + = parse_expr (AST::AttrVec (), restrictions); + + return std::unique_ptr ( + new AST::ReturnExpr (std::move (returned_expr), std::move (outer_attrs), + locus)); +} + +/* Parses a break expression (including any label to break to AND any return + * expression). */ +template +std::unique_ptr +Parser::parse_break_expr (AST::AttrVec outer_attrs, + Location pratt_parsed_loc) +{ + Location locus = pratt_parsed_loc; + if (locus == Linemap::unknown_location ()) + { + locus = lexer.peek_token ()->get_locus (); + skip_token (BREAK); + } + + // parse label (lifetime) if it exists - create dummy first + AST::Lifetime label = AST::Lifetime::error (); + if (lexer.peek_token ()->get_id () == LIFETIME) + { + label = parse_lifetime (); + } + + // parse break return expression if it exists + ParseRestrictions restrictions; + restrictions.expr_can_be_null = true; + std::unique_ptr return_expr + = parse_expr (AST::AttrVec (), restrictions); + + return std::unique_ptr ( + new AST::BreakExpr (std::move (label), std::move (return_expr), + std::move (outer_attrs), locus)); +} + +// Parses a continue expression (including any label to continue from). +template +std::unique_ptr +Parser::parse_continue_expr (AST::AttrVec outer_attrs, + Location pratt_parsed_loc) +{ + Location locus = pratt_parsed_loc; + if (locus == Linemap::unknown_location ()) + { + locus = lexer.peek_token ()->get_locus (); + skip_token (CONTINUE); + } + + // parse label (lifetime) if it exists - create dummy first + AST::Lifetime label = AST::Lifetime::error (); + if (lexer.peek_token ()->get_id () == LIFETIME) + { + label = parse_lifetime (); + } + + return std::unique_ptr ( + new AST::ContinueExpr (std::move (label), std::move (outer_attrs), locus)); +} + +// Parses a loop label used in loop expressions. +template +AST::LoopLabel +Parser::parse_loop_label () +{ + // parse lifetime - if doesn't exist, assume no label + const_TokenPtr t = lexer.peek_token (); + if (t->get_id () != LIFETIME) + { + // not necessarily an error + return AST::LoopLabel::error (); + } + /* FIXME: check for named lifetime requirement here? or check in semantic + * analysis phase? */ + AST::Lifetime label = parse_lifetime (); + + if (!skip_token (COLON)) + { + // skip somewhere? + return AST::LoopLabel::error (); + } + + return AST::LoopLabel (std::move (label), t->get_locus ()); +} + +/* Parses an if expression of any kind, including with else, else if, else if + * let, and neither. Note that any outer attributes will be ignored because if + * expressions don't support them. */ +template +std::unique_ptr +Parser::parse_if_expr (AST::AttrVec outer_attrs, + Location pratt_parsed_loc) +{ + // TODO: make having outer attributes an error? + Location locus = pratt_parsed_loc; + if (locus == Linemap::unknown_location ()) + { + locus = lexer.peek_token ()->get_locus (); + if (!skip_token (IF)) + { + skip_after_end_block (); + return nullptr; + } + } + + // detect accidental if let + if (lexer.peek_token ()->get_id () == LET) + { + Error error (lexer.peek_token ()->get_locus (), + "if let expression probably exists, but is being parsed " + "as an if expression. This may be a parser error"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + /* parse required condition expr - HACK to prevent struct expr from being + * parsed */ + ParseRestrictions no_struct_expr; + no_struct_expr.can_be_struct_expr = false; + std::unique_ptr condition = parse_expr ({}, no_struct_expr); + if (condition == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse condition expression in if expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + // parse required block expr + std::unique_ptr if_body = parse_block_expr (); + if (if_body == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse if body block expression in if expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + // branch to parse end or else (and then else, else if, or else if let) + if (lexer.peek_token ()->get_id () != ELSE) + { + // single selection - end of if expression + return std::unique_ptr ( + new AST::IfExpr (std::move (condition), std::move (if_body), + std::move (outer_attrs), locus)); + } + else + { + // double or multiple selection - branch on end, else if, or else if let + + // skip "else" + lexer.skip_token (); + + // branch on whether next token is '{' or 'if' + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case LEFT_CURLY: { + // double selection - else + // parse else block expr (required) + std::unique_ptr else_body = parse_block_expr (); + if (else_body == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse else body block expression in " + "if expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::IfExprConseqElse (std::move (condition), + std::move (if_body), + std::move (else_body), + std::move (outer_attrs), locus)); + } + case IF: { + // multiple selection - else if or else if let + // branch on whether next token is 'let' or not + if (lexer.peek_token (1)->get_id () == LET) + { + // parse if let expr (required) + std::unique_ptr if_let_expr + = parse_if_let_expr (); + if (if_let_expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse (else) if let expression " + "after if expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::IfExprConseqIfLet (std::move (condition), + std::move (if_body), + std::move (if_let_expr), + std::move (outer_attrs), locus)); + } + else + { + // parse if expr (required) + std::unique_ptr if_expr = parse_if_expr (); + if (if_expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse (else) if expression after " + "if expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::IfExprConseqIf (std::move (condition), + std::move (if_body), + std::move (if_expr), + std::move (outer_attrs), locus)); + } + } + default: + // error - invalid token + add_error (Error (t->get_locus (), + "unexpected token %qs after else in if expression", + t->get_token_description ())); + + // skip somewhere? + return nullptr; + } + } +} + +/* Parses an if let expression of any kind, including with else, else if, else + * if let, and none. Note that any outer attributes will be ignored as if let + * expressions don't support them. */ +template +std::unique_ptr +Parser::parse_if_let_expr (AST::AttrVec outer_attrs, + Location pratt_parsed_loc) +{ + // TODO: make having outer attributes an error? + Location locus = pratt_parsed_loc; + if (locus == Linemap::unknown_location ()) + { + locus = lexer.peek_token ()->get_locus (); + if (!skip_token (IF)) + { + skip_after_end_block (); + return nullptr; + } + } + + // detect accidental if expr parsed as if let expr + if (lexer.peek_token ()->get_id () != LET) + { + Error error (lexer.peek_token ()->get_locus (), + "if expression probably exists, but is being parsed as an " + "if let expression. This may be a parser error"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + lexer.skip_token (); + + // parse match arm patterns (which are required) + std::vector> match_arm_patterns + = parse_match_arm_patterns (EQUAL); + if (match_arm_patterns.empty ()) + { + Error error ( + lexer.peek_token ()->get_locus (), + "failed to parse any match arm patterns in if let expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + if (!skip_token (EQUAL)) + { + // skip somewhere? + return nullptr; + } + + // parse expression (required) - HACK to prevent struct expr being parsed + ParseRestrictions no_struct_expr; + no_struct_expr.can_be_struct_expr = false; + std::unique_ptr scrutinee_expr = parse_expr ({}, no_struct_expr); + if (scrutinee_expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse scrutinee expression in if let expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + /* TODO: check for expression not being a struct expression or lazy boolean + * expression here? or actually probably in semantic analysis. */ + + // parse block expression (required) + std::unique_ptr if_let_body = parse_block_expr (); + if (if_let_body == nullptr) + { + Error error ( + lexer.peek_token ()->get_locus (), + "failed to parse if let body block expression in if let expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + // branch to parse end or else (and then else, else if, or else if let) + if (lexer.peek_token ()->get_id () != ELSE) + { + // single selection - end of if let expression + return std::unique_ptr ( + new AST::IfLetExpr (std::move (match_arm_patterns), + std::move (scrutinee_expr), std::move (if_let_body), + std::move (outer_attrs), locus)); + } + else + { + // double or multiple selection - branch on end, else if, or else if let + + // skip "else" + lexer.skip_token (); + + // branch on whether next token is '{' or 'if' + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case LEFT_CURLY: { + // double selection - else + // parse else block expr (required) + std::unique_ptr else_body = parse_block_expr (); + if (else_body == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse else body block expression in " + "if let expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::IfLetExprConseqElse (std::move (match_arm_patterns), + std::move (scrutinee_expr), + std::move (if_let_body), + std::move (else_body), + std::move (outer_attrs), locus)); + } + case IF: { + // multiple selection - else if or else if let + // branch on whether next token is 'let' or not + if (lexer.peek_token (1)->get_id () == LET) + { + // parse if let expr (required) + std::unique_ptr if_let_expr + = parse_if_let_expr (); + if (if_let_expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse (else) if let expression " + "after if let expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::IfLetExprConseqIfLet ( + std::move (match_arm_patterns), std::move (scrutinee_expr), + std::move (if_let_body), std::move (if_let_expr), + std::move (outer_attrs), locus)); + } + else + { + // parse if expr (required) + std::unique_ptr if_expr = parse_if_expr (); + if (if_expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse (else) if expression after " + "if let expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::IfLetExprConseqIf (std::move (match_arm_patterns), + std::move (scrutinee_expr), + std::move (if_let_body), + std::move (if_expr), + std::move (outer_attrs), locus)); + } + } + default: + // error - invalid token + add_error ( + Error (t->get_locus (), + "unexpected token %qs after else in if let expression", + t->get_token_description ())); + + // skip somewhere? + return nullptr; + } + } +} + +/* TODO: possibly decide on different method of handling label (i.e. not + * parameter) */ + +/* Parses a "loop" infinite loop expression. Label is not parsed and should be + * parsed via parse_labelled_loop_expr, which would call this. */ +template +std::unique_ptr +Parser::parse_loop_expr (AST::AttrVec outer_attrs, + AST::LoopLabel label, + Location pratt_parsed_loc) +{ + Location locus = pratt_parsed_loc; + if (locus == Linemap::unknown_location ()) + { + if (label.is_error ()) + locus = lexer.peek_token ()->get_locus (); + else + locus = label.get_locus (); + + if (!skip_token (LOOP)) + { + skip_after_end_block (); + return nullptr; + } + } + else + { + if (!label.is_error ()) + locus = label.get_locus (); + } + + // parse loop body, which is required + std::unique_ptr loop_body = parse_block_expr (); + if (loop_body == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "could not parse loop body in (infinite) loop expression"); + add_error (std::move (error)); + + return nullptr; + } + + return std::unique_ptr ( + new AST::LoopExpr (std::move (loop_body), locus, std::move (label), + std::move (outer_attrs))); +} + +/* Parses a "while" loop expression. Label is not parsed and should be parsed + * via parse_labelled_loop_expr, which would call this. */ +template +std::unique_ptr +Parser::parse_while_loop_expr (AST::AttrVec outer_attrs, + AST::LoopLabel label, + Location pratt_parsed_loc) +{ + Location locus = pratt_parsed_loc; + if (locus == Linemap::unknown_location ()) + { + if (label.is_error ()) + locus = lexer.peek_token ()->get_locus (); + else + locus = label.get_locus (); + + if (!skip_token (WHILE)) + { + skip_after_end_block (); + return nullptr; + } + } + else + { + if (!label.is_error ()) + locus = label.get_locus (); + } + + // ensure it isn't a while let loop + if (lexer.peek_token ()->get_id () == LET) + { + Error error (lexer.peek_token ()->get_locus (), + "appears to be while let loop but is being parsed by " + "while loop - this may be a compiler issue"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + // parse loop predicate (required) with HACK to prevent struct expr parsing + ParseRestrictions no_struct_expr; + no_struct_expr.can_be_struct_expr = false; + std::unique_ptr predicate = parse_expr ({}, no_struct_expr); + if (predicate == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse predicate expression in while loop"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + /* TODO: check that it isn't struct expression here? actually, probably in + * semantic analysis */ + + // parse loop body (required) + std::unique_ptr body = parse_block_expr (); + if (body == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse loop body block expression in while loop"); + add_error (std::move (error)); + + // skip somewhere + return nullptr; + } + + return std::unique_ptr ( + new AST::WhileLoopExpr (std::move (predicate), std::move (body), locus, + std::move (label), std::move (outer_attrs))); +} + +/* Parses a "while let" loop expression. Label is not parsed and should be + * parsed via parse_labelled_loop_expr, which would call this. */ +template +std::unique_ptr +Parser::parse_while_let_loop_expr (AST::AttrVec outer_attrs, + AST::LoopLabel label) +{ + Location locus = Linemap::unknown_location (); + if (label.is_error ()) + locus = lexer.peek_token ()->get_locus (); + else + locus = label.get_locus (); + skip_token (WHILE); + + /* check for possible accidental recognition of a while loop as a while let + * loop */ + if (lexer.peek_token ()->get_id () != LET) + { + Error error (lexer.peek_token ()->get_locus (), + "appears to be a while loop but is being parsed by " + "while let loop - this may be a compiler issue"); + add_error (std::move (error)); + + // skip somewhere + return nullptr; + } + // as this token is definitely let now, save the computation of comparison + lexer.skip_token (); + + // parse predicate patterns + std::vector> predicate_patterns + = parse_match_arm_patterns (EQUAL); + // TODO: have to ensure that there is at least 1 pattern? + + if (!skip_token (EQUAL)) + { + // skip somewhere? + return nullptr; + } + + /* parse predicate expression, which is required (and HACK to prevent struct + * expr) */ + ParseRestrictions no_struct_expr; + no_struct_expr.can_be_struct_expr = false; + std::unique_ptr predicate_expr = parse_expr ({}, no_struct_expr); + if (predicate_expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse predicate expression in while let loop"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + /* TODO: ensure that struct expression is not parsed? Actually, probably in + * semantic analysis. */ + + // parse loop body, which is required + std::unique_ptr body = parse_block_expr (); + if (body == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse block expr (loop body) of while let loop"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr (new AST::WhileLetLoopExpr ( + std::move (predicate_patterns), std::move (predicate_expr), + std::move (body), locus, std::move (label), std::move (outer_attrs))); +} + +/* Parses a "for" iterative loop. Label is not parsed and should be parsed via + * parse_labelled_loop_expr, which would call this. */ +template +std::unique_ptr +Parser::parse_for_loop_expr (AST::AttrVec outer_attrs, + AST::LoopLabel label) +{ + Location locus = Linemap::unknown_location (); + if (label.is_error ()) + locus = lexer.peek_token ()->get_locus (); + else + locus = label.get_locus (); + skip_token (FOR); + + // parse pattern, which is required + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse iterator pattern in for loop"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + if (!skip_token (IN)) + { + // skip somewhere? + return nullptr; + } + + /* parse iterator expression, which is required - also HACK to prevent + * struct expr */ + ParseRestrictions no_struct_expr; + no_struct_expr.can_be_struct_expr = false; + std::unique_ptr expr = parse_expr ({}, no_struct_expr); + if (expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse iterator expression in for loop"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + // TODO: check to ensure this isn't struct expr? Or in semantic analysis. + + // parse loop body, which is required + std::unique_ptr body = parse_block_expr (); + if (body == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse loop body block expression in for loop"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::ForLoopExpr (std::move (pattern), std::move (expr), + std::move (body), locus, std::move (label), + std::move (outer_attrs))); +} + +// Parses a loop expression with label (any kind of loop - disambiguates). +template +std::unique_ptr +Parser::parse_labelled_loop_expr (AST::AttrVec outer_attrs) +{ + /* TODO: decide whether it should not work if there is no label, or parse it + * with no label at the moment, I will make it not work with no label + * because that's the implication. */ + + if (lexer.peek_token ()->get_id () != LIFETIME) + { + Error error (lexer.peek_token ()->get_locus (), + "expected lifetime in labelled loop expr (to parse loop " + "label) - found %qs", + lexer.peek_token ()->get_token_description ()); + add_error (std::move (error)); + + // skip? + return nullptr; + } + + // parse loop label (required) + AST::LoopLabel label = parse_loop_label (); + if (label.is_error ()) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse loop label in labelled loop expr"); + add_error (std::move (error)); + + // skip? + return nullptr; + } + + // branch on next token + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case LOOP: + return parse_loop_expr (std::move (outer_attrs), std::move (label)); + case FOR: + return parse_for_loop_expr (std::move (outer_attrs), std::move (label)); + case WHILE: + // further disambiguate into while vs while let + if (lexer.peek_token (1)->get_id () == LET) + { + return parse_while_let_loop_expr (std::move (outer_attrs), + std::move (label)); + } + else + { + return parse_while_loop_expr (std::move (outer_attrs), + std::move (label)); + } + default: + // error + add_error (Error (t->get_locus (), + "unexpected token %qs when parsing labelled loop", + t->get_token_description ())); + + // skip? + return nullptr; + } +} + +// Parses a match expression. +template +std::unique_ptr +Parser::parse_match_expr (AST::AttrVec outer_attrs, + Location pratt_parsed_loc) +{ + Location locus = pratt_parsed_loc; + if (locus == Linemap::unknown_location ()) + { + locus = lexer.peek_token ()->get_locus (); + skip_token (MATCH_TOK); + } + + /* parse scrutinee expression, which is required (and HACK to prevent struct + * expr) */ + ParseRestrictions no_struct_expr; + no_struct_expr.can_be_struct_expr = false; + std::unique_ptr scrutinee = parse_expr ({}, no_struct_expr); + if (scrutinee == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse scrutinee expression in match expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + /* TODO: check for scrutinee expr not being struct expr? or do so in + * semantic analysis */ + + if (!skip_token (LEFT_CURLY)) + { + // skip somewhere? + return nullptr; + } + + // parse inner attributes (if they exist) + AST::AttrVec inner_attrs = parse_inner_attributes (); + + // parse match arms (if they exist) + // std::vector > match_arms; + std::vector match_arms; + + // parse match cases + while (lexer.peek_token ()->get_id () != RIGHT_CURLY) + { + // parse match arm itself, which is required + AST::MatchArm arm = parse_match_arm (); + if (arm.is_error ()) + { + // TODO is this worth throwing everything away? + Error error (lexer.peek_token ()->get_locus (), + "failed to parse match arm in match arms"); + add_error (std::move (error)); + + return nullptr; + } + + if (!skip_token (MATCH_ARROW)) + { + // skip after somewhere? + // TODO is returning here a good idea? or is break better? + return nullptr; + } + + ParseRestrictions restrictions; + restrictions.expr_can_be_stmt = true; + restrictions.consume_semi = false; + + std::unique_ptr expr = parse_expr_stmt ({}, restrictions); + if (expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse expr in match arm in match expr"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + bool is_expr_without_block + = expr->get_type () == AST::ExprStmt::ExprStmtType::WITHOUT_BLOCK; + + // construct match case expr and add to cases + switch (expr->get_type ()) + { + case AST::ExprStmt::ExprStmtType::WITH_BLOCK: { + AST::ExprStmtWithBlock *cast + = static_cast (expr.get ()); + std::unique_ptr e = cast->get_expr ()->clone_expr (); + match_arms.push_back ( + AST::MatchCase (std::move (arm), std::move (e))); + } + break; + + case AST::ExprStmt::ExprStmtType::WITHOUT_BLOCK: { + AST::ExprStmtWithoutBlock *cast + = static_cast (expr.get ()); + std::unique_ptr e = cast->get_expr ()->clone_expr (); + match_arms.push_back ( + AST::MatchCase (std::move (arm), std::move (e))); + } + break; + } + + // handle comma presence + if (lexer.peek_token ()->get_id () != COMMA) + { + if (!is_expr_without_block) + { + // allowed even if not final case + continue; + } + else if (is_expr_without_block + && lexer.peek_token ()->get_id () != RIGHT_CURLY) + { + // not allowed if not final case + Error error (lexer.peek_token ()->get_locus (), + "exprwithoutblock requires comma after match case " + "expression in match arm (if not final case)"); + add_error (std::move (error)); + + return nullptr; + } + else + { + // otherwise, must be final case, so fine + break; + } + } + lexer.skip_token (); + } + + if (!skip_token (RIGHT_CURLY)) + { + // skip somewhere? + return nullptr; + } + + match_arms.shrink_to_fit (); + + return std::unique_ptr ( + new AST::MatchExpr (std::move (scrutinee), std::move (match_arms), + std::move (inner_attrs), std::move (outer_attrs), + locus)); +} + +// Parses the "pattern" part of the match arm (the 'case x:' equivalent). +template +AST::MatchArm +Parser::parse_match_arm () +{ + // parse optional outer attributes + AST::AttrVec outer_attrs = parse_outer_attributes (); + + // DEBUG + rust_debug ("about to start parsing match arm patterns"); + + // break early if find right curly + if (lexer.peek_token ()->get_id () == RIGHT_CURLY) + { + // not an error + return AST::MatchArm::create_error (); + } + + // parse match arm patterns - at least 1 is required + std::vector> match_arm_patterns + = parse_match_arm_patterns (RIGHT_CURLY); + if (match_arm_patterns.empty ()) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse any patterns in match arm"); + add_error (std::move (error)); + + // skip somewhere? + return AST::MatchArm::create_error (); + } + + // DEBUG + rust_debug ("successfully parsed match arm patterns"); + + // parse match arm guard expr if it exists + std::unique_ptr guard_expr = nullptr; + if (lexer.peek_token ()->get_id () == IF) + { + lexer.skip_token (); + + guard_expr = parse_expr (); + if (guard_expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse guard expression in match arm"); + add_error (std::move (error)); + + // skip somewhere? + return AST::MatchArm::create_error (); + } + } + + // DEBUG + rust_debug ("successfully parsed match arm"); + + return AST::MatchArm (std::move (match_arm_patterns), + lexer.peek_token ()->get_locus (), + std::move (guard_expr), std::move (outer_attrs)); +} + +/* Parses the patterns used in a match arm. End token id is the id of the + * token that would exist after the patterns are done (e.g. '}' for match + * expr, '=' for if let and while let). */ +template +std::vector> +Parser::parse_match_arm_patterns (TokenId end_token_id) +{ + // skip optional leading '|' + if (lexer.peek_token ()->get_id () == PIPE) + lexer.skip_token (); + /* TODO: do I even need to store the result of this? can't be used. + * If semantically different, I need a wrapped "match arm patterns" object + * for this. */ + + std::vector> patterns; + + // quick break out if end_token_id + if (lexer.peek_token ()->get_id () == end_token_id) + return patterns; + + // parse required pattern - if doesn't exist, return empty + std::unique_ptr initial_pattern = parse_pattern (); + if (initial_pattern == nullptr) + { + // FIXME: should this be an error? + return patterns; + } + patterns.push_back (std::move (initial_pattern)); + + // DEBUG + rust_debug ("successfully parsed initial match arm pattern"); + + // parse new patterns as long as next char is '|' + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () == PIPE) + { + // skip pipe token + lexer.skip_token (); + + // break if hit end token id + if (lexer.peek_token ()->get_id () == end_token_id) + break; + + // parse pattern + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + // this is an error + Error error (lexer.peek_token ()->get_locus (), + "failed to parse pattern in match arm patterns"); + add_error (std::move (error)); + + // skip somewhere? + return {}; + } + + patterns.push_back (std::move (pattern)); + + t = lexer.peek_token (); + } + + patterns.shrink_to_fit (); + + return patterns; +} + +// Parses an async block expression. +template +std::unique_ptr +Parser::parse_async_block_expr (AST::AttrVec outer_attrs) +{ + Location locus = lexer.peek_token ()->get_locus (); + skip_token (ASYNC); + + // detect optional move token + bool has_move = false; + if (lexer.peek_token ()->get_id () == MOVE) + { + lexer.skip_token (); + has_move = true; + } + + // parse block expression (required) + std::unique_ptr block_expr = parse_block_expr (); + if (block_expr == nullptr) + { + Error error ( + lexer.peek_token ()->get_locus (), + "failed to parse block expression of async block expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::AsyncBlockExpr (std::move (block_expr), has_move, + std::move (outer_attrs), locus)); +} + +// Parses an unsafe block expression. +template +std::unique_ptr +Parser::parse_unsafe_block_expr (AST::AttrVec outer_attrs, + Location pratt_parsed_loc) +{ + Location locus = pratt_parsed_loc; + if (locus == Linemap::unknown_location ()) + { + locus = lexer.peek_token ()->get_locus (); + skip_token (UNSAFE); + } + + // parse block expression (required) + std::unique_ptr block_expr = parse_block_expr (); + if (block_expr == nullptr) + { + Error error ( + lexer.peek_token ()->get_locus (), + "failed to parse block expression of unsafe block expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::UnsafeBlockExpr (std::move (block_expr), std::move (outer_attrs), + locus)); +} + +// Parses an array definition expression. +template +std::unique_ptr +Parser::parse_array_expr (AST::AttrVec outer_attrs, + Location pratt_parsed_loc) +{ + Location locus = pratt_parsed_loc; + if (locus == Linemap::unknown_location ()) + { + locus = lexer.peek_token ()->get_locus (); + skip_token (LEFT_SQUARE); + } + + // parse optional inner attributes + AST::AttrVec inner_attrs = parse_inner_attributes (); + + // parse the "array elements" section, which is optional + if (lexer.peek_token ()->get_id () == RIGHT_SQUARE) + { + // no array elements + lexer.skip_token (); + + std::vector> exprs; + auto array_elems + = Rust::make_unique (std::move (exprs), locus); + return Rust::make_unique (std::move (array_elems), + std::move (inner_attrs), + std::move (outer_attrs), locus); + } + else + { + // should have array elements + // parse initial expression, which is required for either + std::unique_ptr initial_expr = parse_expr (); + if (initial_expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "could not parse expression in array expression " + "(even though arrayelems seems to be present)"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + if (lexer.peek_token ()->get_id () == SEMICOLON) + { + // copy array elems + lexer.skip_token (); + + // parse copy amount expression (required) + std::unique_ptr copy_amount = parse_expr (); + if (copy_amount == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "could not parse copy amount expression in array " + "expression (arrayelems)"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + skip_token (RIGHT_SQUARE); + + std::unique_ptr copied_array_elems ( + new AST::ArrayElemsCopied (std::move (initial_expr), + std::move (copy_amount), locus)); + return std::unique_ptr ( + new AST::ArrayExpr (std::move (copied_array_elems), + std::move (inner_attrs), + std::move (outer_attrs), locus)); + } + else if (lexer.peek_token ()->get_id () == RIGHT_SQUARE) + { + // single-element array expression + std::vector> exprs; + exprs.reserve (1); + exprs.push_back (std::move (initial_expr)); + exprs.shrink_to_fit (); + + skip_token (RIGHT_SQUARE); + + std::unique_ptr array_elems ( + new AST::ArrayElemsValues (std::move (exprs), locus)); + return std::unique_ptr ( + new AST::ArrayExpr (std::move (array_elems), + std::move (inner_attrs), + std::move (outer_attrs), locus)); + } + else if (lexer.peek_token ()->get_id () == COMMA) + { + // multi-element array expression (or trailing comma) + std::vector> exprs; + exprs.push_back (std::move (initial_expr)); + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () == COMMA) + { + lexer.skip_token (); + + // quick break if right square bracket + if (lexer.peek_token ()->get_id () == RIGHT_SQUARE) + break; + + // parse expression (required) + std::unique_ptr expr = parse_expr (); + if (expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse element in array expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + exprs.push_back (std::move (expr)); + + t = lexer.peek_token (); + } + + skip_token (RIGHT_SQUARE); + + exprs.shrink_to_fit (); + + std::unique_ptr array_elems ( + new AST::ArrayElemsValues (std::move (exprs), locus)); + return std::unique_ptr ( + new AST::ArrayExpr (std::move (array_elems), + std::move (inner_attrs), + std::move (outer_attrs), locus)); + } + else + { + // error + Error error (lexer.peek_token ()->get_locus (), + "unexpected token %qs in array expression (arrayelems)", + lexer.peek_token ()->get_token_description ()); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + } +} + +// Parses a single parameter used in a closure definition. +template +AST::ClosureParam +Parser::parse_closure_param () +{ + AST::AttrVec outer_attrs = parse_outer_attributes (); + + // parse pattern (which is required) + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + // not necessarily an error + return AST::ClosureParam::create_error (); + } + + // parse optional type of param + std::unique_ptr type = nullptr; + if (lexer.peek_token ()->get_id () == COLON) + { + lexer.skip_token (); + + // parse type, which is now required + type = parse_type (); + if (type == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse type in closure parameter"); + add_error (std::move (error)); + + // skip somewhere? + return AST::ClosureParam::create_error (); + } + } + + return AST::ClosureParam (std::move (pattern), pattern->get_locus (), + std::move (type), std::move (outer_attrs)); +} + +// Parses a grouped or tuple expression (disambiguates). +template +std::unique_ptr +Parser::parse_grouped_or_tuple_expr ( + AST::AttrVec outer_attrs, Location pratt_parsed_loc) +{ + // adjustment to allow Pratt parsing to reuse function without copy-paste + Location locus = pratt_parsed_loc; + if (locus == Linemap::unknown_location ()) + { + locus = lexer.peek_token ()->get_locus (); + skip_token (LEFT_PAREN); + } + + // parse optional inner attributes + AST::AttrVec inner_attrs = parse_inner_attributes (); + + if (lexer.peek_token ()->get_id () == RIGHT_PAREN) + { + // must be empty tuple + lexer.skip_token (); + + // create tuple with empty tuple elems + return std::unique_ptr ( + new AST::TupleExpr (std::vector> (), + std::move (inner_attrs), std::move (outer_attrs), + locus)); + } + + // parse first expression (required) + std::unique_ptr first_expr = parse_expr (); + if (first_expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse expression in grouped or tuple expression"); + add_error (std::move (error)); + + // skip after somewhere? + return nullptr; + } + + // detect whether grouped expression with right parentheses as next token + if (lexer.peek_token ()->get_id () == RIGHT_PAREN) + { + // must be grouped expr + lexer.skip_token (); + + // create grouped expr + return std::unique_ptr ( + new AST::GroupedExpr (std::move (first_expr), std::move (inner_attrs), + std::move (outer_attrs), locus)); + } + else if (lexer.peek_token ()->get_id () == COMMA) + { + // tuple expr + std::vector> exprs; + exprs.push_back (std::move (first_expr)); + + // parse potential other tuple exprs + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () == COMMA) + { + lexer.skip_token (); + + // break out if right paren + if (lexer.peek_token ()->get_id () == RIGHT_PAREN) + break; + + // parse expr, which is now required + std::unique_ptr expr = parse_expr (); + if (expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse expr in tuple expr"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + exprs.push_back (std::move (expr)); + + t = lexer.peek_token (); + } + + // skip right paren + skip_token (RIGHT_PAREN); + + return std::unique_ptr ( + new AST::TupleExpr (std::move (exprs), std::move (inner_attrs), + std::move (outer_attrs), locus)); + } + else + { + // error + const_TokenPtr t = lexer.peek_token (); + Error error (t->get_locus (), + "unexpected token %qs in grouped or tuple expression " + "(parenthesised expression) - expected %<)%> for grouped " + "expr and %<,%> for tuple expr", + t->get_token_description ()); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } +} + +// Parses a type (will further disambiguate any type). +template +std::unique_ptr +Parser::parse_type (bool save_errors) +{ + /* rules for all types: + * NeverType: '!' + * SliceType: '[' Type ']' + * InferredType: '_' + * MacroInvocation: SimplePath '!' DelimTokenTree + * ParenthesisedType: '(' Type ')' + * ImplTraitType: 'impl' TypeParamBounds + * TypeParamBounds (not type) TypeParamBound ( '+' TypeParamBound )* '+'? + * TypeParamBound Lifetime | TraitBound + * ImplTraitTypeOneBound: 'impl' TraitBound + * TraitObjectType: 'dyn'? TypeParamBounds + * TraitObjectTypeOneBound: 'dyn'? TraitBound + * TraitBound '?'? ForLifetimes? TypePath | '(' '?'? + * ForLifetimes? TypePath ')' BareFunctionType: ForLifetimes? + * FunctionQualifiers 'fn' etc. ForLifetimes (not type) 'for' '<' + * LifetimeParams '>' FunctionQualifiers ( 'async' | 'const' )? + * 'unsafe'? + * ('extern' abi?)? QualifiedPathInType: '<' Type ( 'as' TypePath )? '>' + * ( + * '::' TypePathSegment )+ TypePath: '::'? TypePathSegment ( + * '::' TypePathSegment)* ArrayType: '[' Type ';' Expr ']' + * ReferenceType: '&' Lifetime? 'mut'? TypeNoBounds + * RawPointerType: '*' ( 'mut' | 'const' ) TypeNoBounds + * TupleType: '(' Type etc. - regular tuple stuff. Also + * regular tuple vs parenthesised precedence + * + * Disambiguate between macro and type path via type path being parsed, and + * then if '!' found, convert type path to simple path for macro. Usual + * disambiguation for tuple vs parenthesised. For ImplTraitType and + * TraitObjectType individual disambiguations, they seem more like "special + * cases", so probably just try to parse the more general ImplTraitType or + * TraitObjectType and return OneBound versions if they satisfy those + * criteria. */ + + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case EXCLAM: + // never type - can't be macro as no path beforehand + lexer.skip_token (); + return std::unique_ptr ( + new AST::NeverType (t->get_locus ())); + case LEFT_SQUARE: + // slice type or array type - requires further disambiguation + return parse_slice_or_array_type (); + case LEFT_ANGLE: { + // qualified path in type + AST::QualifiedPathInType path = parse_qualified_path_in_type (); + if (path.is_error ()) + { + if (save_errors) + { + Error error (t->get_locus (), + "failed to parse qualified path in type"); + add_error (std::move (error)); + } + + return nullptr; + } + return std::unique_ptr ( + new AST::QualifiedPathInType (std::move (path))); + } + case UNDERSCORE: + // inferred type + lexer.skip_token (); + return std::unique_ptr ( + new AST::InferredType (t->get_locus ())); + case ASTERISK: + // raw pointer type + return parse_raw_pointer_type (); + case AMP: // does this also include AMP_AMP? + // reference type + return parse_reference_type (); + case LIFETIME: { + /* probably a lifetime bound, so probably type param bounds in + * TraitObjectType */ + std::vector> bounds + = parse_type_param_bounds (); + + return std::unique_ptr ( + new AST::TraitObjectType (std::move (bounds), t->get_locus (), + false)); + } + case IDENTIFIER: + case SUPER: + case SELF: + case SELF_ALIAS: + case CRATE: + case DOLLAR_SIGN: + case SCOPE_RESOLUTION: { + // macro invocation or type path - requires further disambiguation. + /* for parsing path component of each rule, perhaps parse it as a + * typepath and attempt conversion to simplepath if a trailing '!' is + * found */ + /* Type path also includes TraitObjectTypeOneBound BUT if it starts + * with it, it is exactly the same as a TypePath syntactically, so + * this is a syntactical ambiguity. As such, the parser will parse it + * as a TypePath. This, however, does not prevent TraitObjectType from + * starting with a typepath. */ + + // parse path as type path + AST::TypePath path = parse_type_path (); + if (path.is_error ()) + { + if (save_errors) + { + Error error (t->get_locus (), + "failed to parse path as first component of type"); + add_error (std::move (error)); + } + + return nullptr; + } + Location locus = path.get_locus (); + + // branch on next token + t = lexer.peek_token (); + switch (t->get_id ()) + { + case EXCLAM: { + // macro invocation + // convert to simple path + AST::SimplePath macro_path = path.as_simple_path (); + if (macro_path.is_empty ()) + { + if (save_errors) + { + Error error (t->get_locus (), + "failed to parse simple path in macro " + "invocation (for type)"); + add_error (std::move (error)); + } + + return nullptr; + } + + lexer.skip_token (); + + AST::DelimTokenTree tok_tree = parse_delim_token_tree (); + + return std::unique_ptr ( + new AST::MacroInvocation ( + AST::MacroInvocData (std::move (macro_path), + std::move (tok_tree)), + {}, locus)); + } + case PLUS: { + // type param bounds + std::vector> bounds; + + // convert type path to trait bound + std::unique_ptr path_bound ( + new AST::TraitBound (std::move (path), locus, false, false)); + bounds.push_back (std::move (path_bound)); + + /* parse rest of bounds - FIXME: better way to find when to stop + * parsing */ + while (t->get_id () == PLUS) + { + lexer.skip_token (); + + // parse bound if it exists - if not, assume end of sequence + std::unique_ptr bound + = parse_type_param_bound (); + if (bound == nullptr) + { + break; + } + bounds.push_back (std::move (bound)); + + t = lexer.peek_token (); + } + + return std::unique_ptr ( + new AST::TraitObjectType (std::move (bounds), locus, false)); + } + default: + // assume that this is a type path and not an error + return std::unique_ptr ( + new AST::TypePath (std::move (path))); + } + } + case LEFT_PAREN: + /* tuple type or parenthesised type - requires further disambiguation + * (the usual). ok apparently can be a parenthesised TraitBound too, so + * could be TraitObjectTypeOneBound or TraitObjectType */ + return parse_paren_prefixed_type (); + case FOR: + // TraitObjectTypeOneBound or BareFunctionType + return parse_for_prefixed_type (); + case ASYNC: + case CONST: + case UNSAFE: + case EXTERN_TOK: + case FN_TOK: + // bare function type (with no for lifetimes) + return parse_bare_function_type (std::vector ()); + case IMPL: + lexer.skip_token (); + if (lexer.peek_token ()->get_id () == LIFETIME) + { + /* cannot be one bound because lifetime prevents it from being + * traitbound */ + std::vector> bounds + = parse_type_param_bounds (); + + return std::unique_ptr ( + new AST::ImplTraitType (std::move (bounds), t->get_locus ())); + } + else + { + // should be trait bound, so parse trait bound + std::unique_ptr initial_bound = parse_trait_bound (); + if (initial_bound == nullptr) + { + if (save_errors) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse ImplTraitType initial bound"); + add_error (std::move (error)); + } + + return nullptr; + } + + Location locus = t->get_locus (); + + // short cut if next token isn't '+' + t = lexer.peek_token (); + if (t->get_id () != PLUS) + { + // convert trait bound to value object + AST::TraitBound value_bound (*initial_bound); + + // DEBUG: removed as unique ptr, so should auto-delete + // delete initial_bound; + + return std::unique_ptr ( + new AST::ImplTraitTypeOneBound (std::move (value_bound), + locus)); + } + + // parse additional type param bounds + std::vector> bounds; + bounds.push_back (std::move (initial_bound)); + while (t->get_id () == PLUS) + { + lexer.skip_token (); + + // parse bound if it exists + std::unique_ptr bound + = parse_type_param_bound (); + if (bound == nullptr) + { + // not an error as trailing plus may exist + break; + } + bounds.push_back (std::move (bound)); + + t = lexer.peek_token (); + } + + return std::unique_ptr ( + new AST::ImplTraitType (std::move (bounds), locus)); + } + case DYN: + case QUESTION_MARK: { + // either TraitObjectType or TraitObjectTypeOneBound + bool has_dyn = false; + if (t->get_id () == DYN) + { + lexer.skip_token (); + has_dyn = true; + } + + if (lexer.peek_token ()->get_id () == LIFETIME) + { + /* cannot be one bound because lifetime prevents it from being + * traitbound */ + std::vector> bounds + = parse_type_param_bounds (); + + return std::unique_ptr ( + new AST::TraitObjectType (std::move (bounds), t->get_locus (), + has_dyn)); + } + else + { + // should be trait bound, so parse trait bound + std::unique_ptr initial_bound + = parse_trait_bound (); + if (initial_bound == nullptr) + { + if (save_errors) + { + Error error ( + lexer.peek_token ()->get_locus (), + "failed to parse TraitObjectType initial bound"); + add_error (std::move (error)); + } + + return nullptr; + } + + // short cut if next token isn't '+' + t = lexer.peek_token (); + if (t->get_id () != PLUS) + { + // convert trait bound to value object + AST::TraitBound value_bound (*initial_bound); + + // DEBUG: removed as unique ptr, so should auto delete + // delete initial_bound; + + return std::unique_ptr ( + new AST::TraitObjectTypeOneBound (std::move (value_bound), + t->get_locus (), has_dyn)); + } + + // parse additional type param bounds + std::vector> bounds; + bounds.push_back (std::move (initial_bound)); + while (t->get_id () == PLUS) + { + lexer.skip_token (); + + // parse bound if it exists + std::unique_ptr bound + = parse_type_param_bound (); + if (bound == nullptr) + { + // not an error as trailing plus may exist + break; + } + bounds.push_back (std::move (bound)); + + t = lexer.peek_token (); + } + + return std::unique_ptr ( + new AST::TraitObjectType (std::move (bounds), t->get_locus (), + has_dyn)); + } + } + default: + if (save_errors) + add_error (Error (t->get_locus (), "unrecognised token %qs in type", + t->get_token_description ())); + + return nullptr; + } +} + +/* Parses a type that has '(' as its first character. Returns a tuple type, + * parenthesised type, TraitObjectTypeOneBound, or TraitObjectType depending + * on following characters. */ +template +std::unique_ptr +Parser::parse_paren_prefixed_type () +{ + /* NOTE: Syntactical ambiguity of a parenthesised trait bound is considered + * a trait bound, not a parenthesised type, so that it can still be used in + * type param bounds. */ + + /* NOTE: this implementation is really shit but I couldn't think of a better + * one. It requires essentially breaking polymorphism and downcasting via + * virtual method abuse, as it was copied from the rustc implementation (in + * which types are reified due to tagged union), after a more OOP attempt by + * me failed. */ + Location left_delim_locus = lexer.peek_token ()->get_locus (); + + // skip left delim + lexer.skip_token (); + /* while next token isn't close delim, parse comma-separated types, saving + * whether trailing comma happens */ + const_TokenPtr t = lexer.peek_token (); + bool trailing_comma = true; + std::vector> types; + + while (t->get_id () != RIGHT_PAREN) + { + std::unique_ptr type = parse_type (); + if (type == nullptr) + { + Error error (t->get_locus (), + "failed to parse type inside parentheses (probably " + "tuple or parenthesised)"); + add_error (std::move (error)); + + return nullptr; + } + types.push_back (std::move (type)); + + t = lexer.peek_token (); + if (t->get_id () != COMMA) + { + trailing_comma = false; + break; + } + lexer.skip_token (); + + t = lexer.peek_token (); + } + + if (!skip_token (RIGHT_PAREN)) + { + return nullptr; + } + + // if only one type and no trailing comma, then not a tuple type + if (types.size () == 1 && !trailing_comma) + { + // must be a TraitObjectType (with more than one bound) + if (lexer.peek_token ()->get_id () == PLUS) + { + // create type param bounds vector + std::vector> bounds; + + // HACK: convert type to traitbound and add to bounds + std::unique_ptr released_ptr = std::move (types[0]); + std::unique_ptr converted_bound ( + released_ptr->to_trait_bound (true)); + if (converted_bound == nullptr) + { + Error error ( + lexer.peek_token ()->get_locus (), + "failed to hackily converted parsed type to trait bound"); + add_error (std::move (error)); + + return nullptr; + } + bounds.push_back (std::move (converted_bound)); + + t = lexer.peek_token (); + while (t->get_id () == PLUS) + { + lexer.skip_token (); + + // attempt to parse typeparambound + std::unique_ptr bound + = parse_type_param_bound (); + if (bound == nullptr) + { + // not an error if null + break; + } + bounds.push_back (std::move (bound)); + + t = lexer.peek_token (); + } + + return std::unique_ptr ( + new AST::TraitObjectType (std::move (bounds), left_delim_locus, + false)); + } + else + { + // release vector pointer + std::unique_ptr released_ptr = std::move (types[0]); + /* HACK: attempt to convert to trait bound. if fails, parenthesised + * type */ + std::unique_ptr converted_bound ( + released_ptr->to_trait_bound (true)); + if (converted_bound == nullptr) + { + // parenthesised type + return std::unique_ptr ( + new AST::ParenthesisedType (std::move (released_ptr), + left_delim_locus)); + } + else + { + // trait object type (one bound) + + // get value semantics trait bound + AST::TraitBound value_bound (*converted_bound); + + return std::unique_ptr ( + new AST::TraitObjectTypeOneBound (value_bound, + left_delim_locus)); + } + } + } + else + { + return std::unique_ptr ( + new AST::TupleType (std::move (types), left_delim_locus)); + } + /* TODO: ensure that this ensures that dynamic dispatch for traits is not + * lost somehow */ +} + +/* Parses a type that has 'for' as its first character. This means it has a + * "for lifetimes", so returns either a BareFunctionType, TraitObjectType, or + * TraitObjectTypeOneBound depending on following characters. */ +template +std::unique_ptr +Parser::parse_for_prefixed_type () +{ + Location for_locus = lexer.peek_token ()->get_locus (); + // parse for lifetimes in type + std::vector for_lifetimes = parse_for_lifetimes (); + + // branch on next token - either function or a trait type + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case ASYNC: + case CONST: + case UNSAFE: + case EXTERN_TOK: + case FN_TOK: + return parse_bare_function_type (std::move (for_lifetimes)); + case SCOPE_RESOLUTION: + case IDENTIFIER: + case SUPER: + case SELF: + case SELF_ALIAS: + case CRATE: + case DOLLAR_SIGN: { + // path, so trait type + + // parse type path to finish parsing trait bound + AST::TypePath path = parse_type_path (); + + t = lexer.peek_token (); + if (t->get_id () != PLUS) + { + // must be one-bound trait type + // create trait bound value object + AST::TraitBound bound (std::move (path), for_locus, false, false, + std::move (for_lifetimes)); + + return std::unique_ptr ( + new AST::TraitObjectTypeOneBound (std::move (bound), for_locus)); + } + + /* more than one bound trait type (or at least parsed as it - could be + * trailing '+') create trait bound pointer and bounds */ + std::unique_ptr initial_bound ( + new AST::TraitBound (std::move (path), for_locus, false, false, + std::move (for_lifetimes))); + std::vector> bounds; + bounds.push_back (std::move (initial_bound)); + + while (t->get_id () == PLUS) + { + lexer.skip_token (); + + // parse type param bound if it exists + std::unique_ptr bound + = parse_type_param_bound (); + if (bound == nullptr) + { + // not an error - e.g. trailing plus + return nullptr; + } + bounds.push_back (std::move (bound)); + + t = lexer.peek_token (); + } + + return std::unique_ptr ( + new AST::TraitObjectType (std::move (bounds), for_locus, false)); + } + default: + // error + add_error (Error (t->get_locus (), + "unrecognised token %qs in bare function type or trait " + "object type or trait object type one bound", + t->get_token_description ())); + + return nullptr; + } +} + +// Parses a maybe named param used in bare function types. +template +AST::MaybeNamedParam +Parser::parse_maybe_named_param (AST::AttrVec outer_attrs) +{ + /* Basically guess that param is named if first token is identifier or + * underscore and second token is semicolon. This should probably have no + * exceptions. rustc uses backtracking to parse these, but at the time of + * writing gccrs has no backtracking capabilities. */ + const_TokenPtr current = lexer.peek_token (); + const_TokenPtr next = lexer.peek_token (1); + + Identifier name; + AST::MaybeNamedParam::ParamKind kind = AST::MaybeNamedParam::UNNAMED; + + if (current->get_id () == IDENTIFIER && next->get_id () == COLON) + { + // named param + name = current->get_str (); + kind = AST::MaybeNamedParam::IDENTIFIER; + lexer.skip_token (1); + } + else if (current->get_id () == UNDERSCORE && next->get_id () == COLON) + { + // wildcard param + name = "_"; + kind = AST::MaybeNamedParam::WILDCARD; + lexer.skip_token (1); + } + + // parse type (required) + std::unique_ptr type = parse_type (); + if (type == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse type in maybe named param"); + add_error (std::move (error)); + + return AST::MaybeNamedParam::create_error (); + } + + return AST::MaybeNamedParam (std::move (name), kind, std::move (type), + std::move (outer_attrs), current->get_locus ()); +} + +/* Parses a bare function type (with the given for lifetimes for convenience - + * does not parse them itself). */ +template +std::unique_ptr +Parser::parse_bare_function_type ( + std::vector for_lifetimes) +{ + // TODO: pass in for lifetime location as param + Location best_try_locus = lexer.peek_token ()->get_locus (); + + AST::FunctionQualifiers qualifiers = parse_function_qualifiers (); + + if (!skip_token (FN_TOK)) + return nullptr; + + if (!skip_token (LEFT_PAREN)) + return nullptr; + + // parse function params, if they exist + std::vector params; + bool is_variadic = false; + AST::AttrVec variadic_attrs; + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != RIGHT_PAREN) + { + AST::AttrVec temp_attrs = parse_outer_attributes (); + + if (lexer.peek_token ()->get_id () == ELLIPSIS) + { + lexer.skip_token (); + is_variadic = true; + variadic_attrs = std::move (temp_attrs); + + t = lexer.peek_token (); + + if (t->get_id () != RIGHT_PAREN) + { + Error error (t->get_locus (), + "expected right parentheses after variadic in maybe " + "named function " + "parameters, found %qs", + t->get_token_description ()); + add_error (std::move (error)); + + return nullptr; + } + + break; + } + + AST::MaybeNamedParam param + = parse_maybe_named_param (std::move (temp_attrs)); + if (param.is_error ()) + { + Error error ( + lexer.peek_token ()->get_locus (), + "failed to parse maybe named param in bare function type"); + add_error (std::move (error)); + + return nullptr; + } + params.push_back (std::move (param)); + + if (lexer.peek_token ()->get_id () != COMMA) + break; + + lexer.skip_token (); + t = lexer.peek_token (); + } + + if (!skip_token (RIGHT_PAREN)) + return nullptr; + + // bare function return type, if exists + std::unique_ptr return_type = nullptr; + if (lexer.peek_token ()->get_id () == RETURN_TYPE) + { + lexer.skip_token (); + + // parse required TypeNoBounds + return_type = parse_type_no_bounds (); + if (return_type == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse return type (type no bounds) in bare " + "function type"); + add_error (std::move (error)); + + return nullptr; + } + } + + return std::unique_ptr ( + new AST::BareFunctionType (std::move (for_lifetimes), + std::move (qualifiers), std::move (params), + is_variadic, std::move (variadic_attrs), + std::move (return_type), best_try_locus)); +} + +// Parses a reference type (mutable or immutable, with given lifetime). +template +std::unique_ptr +Parser::parse_reference_type () +{ + Location locus = lexer.peek_token ()->get_locus (); + skip_token (AMP); + + // parse optional lifetime + AST::Lifetime lifetime = AST::Lifetime::error (); + if (lexer.peek_token ()->get_id () == LIFETIME) + { + lifetime = parse_lifetime (); + if (lifetime.is_error ()) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse lifetime in reference type"); + add_error (std::move (error)); + + return nullptr; + } + } + + bool is_mut = false; + if (lexer.peek_token ()->get_id () == MUT) + { + lexer.skip_token (); + is_mut = true; + } + + // parse type no bounds, which is required + std::unique_ptr type = parse_type_no_bounds (); + if (type == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse referenced type in reference type"); + add_error (std::move (error)); + + return nullptr; + } + + return std::unique_ptr ( + new AST::ReferenceType (is_mut, std::move (type), locus, + std::move (lifetime))); +} + +// Parses a raw (unsafe) pointer type. +template +std::unique_ptr +Parser::parse_raw_pointer_type () +{ + Location locus = lexer.peek_token ()->get_locus (); + skip_token (ASTERISK); + + AST::RawPointerType::PointerType kind = AST::RawPointerType::CONST; + + // branch on next token for pointer kind info + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case MUT: + kind = AST::RawPointerType::MUT; + lexer.skip_token (); + break; + case CONST: + kind = AST::RawPointerType::CONST; + lexer.skip_token (); + break; + default: + add_error (Error (t->get_locus (), + "unrecognised token %qs in raw pointer type", + t->get_token_description ())); + + return nullptr; + } + + // parse type no bounds (required) + std::unique_ptr type = parse_type_no_bounds (); + if (type == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse pointed type of raw pointer type"); + add_error (std::move (error)); + + return nullptr; + } + + return std::unique_ptr ( + new AST::RawPointerType (kind, std::move (type), locus)); +} + +/* Parses a slice or array type, depending on following arguments (as + * lookahead is not possible). */ +template +std::unique_ptr +Parser::parse_slice_or_array_type () +{ + Location locus = lexer.peek_token ()->get_locus (); + skip_token (LEFT_SQUARE); + + // parse inner type (required) + std::unique_ptr inner_type = parse_type (); + if (inner_type == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse inner type in slice or array type"); + add_error (std::move (error)); + + return nullptr; + } + + // branch on next token + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case RIGHT_SQUARE: + // slice type + lexer.skip_token (); + + return std::unique_ptr ( + new AST::SliceType (std::move (inner_type), locus)); + case SEMICOLON: { + // array type + lexer.skip_token (); + + // parse required array size expression + std::unique_ptr size = parse_expr (); + if (size == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse size expression in array type"); + add_error (std::move (error)); + + return nullptr; + } + + if (!skip_token (RIGHT_SQUARE)) + { + return nullptr; + } + + return std::unique_ptr ( + new AST::ArrayType (std::move (inner_type), std::move (size), locus)); + } + default: + // error + add_error ( + Error (t->get_locus (), + "unrecognised token %qs in slice or array type after inner type", + t->get_token_description ())); + + return nullptr; + } +} + +// Parses a type, taking into account type boundary disambiguation. +template +std::unique_ptr +Parser::parse_type_no_bounds () +{ + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case EXCLAM: + // never type - can't be macro as no path beforehand + lexer.skip_token (); + return std::unique_ptr ( + new AST::NeverType (t->get_locus ())); + case LEFT_SQUARE: + // slice type or array type - requires further disambiguation + return parse_slice_or_array_type (); + case LEFT_ANGLE: { + // qualified path in type + AST::QualifiedPathInType path = parse_qualified_path_in_type (); + if (path.is_error ()) + { + Error error (t->get_locus (), + "failed to parse qualified path in type"); + add_error (std::move (error)); + + return nullptr; + } + return std::unique_ptr ( + new AST::QualifiedPathInType (std::move (path))); + } + case UNDERSCORE: + // inferred type + lexer.skip_token (); + return std::unique_ptr ( + new AST::InferredType (t->get_locus ())); + case ASTERISK: + // raw pointer type + return parse_raw_pointer_type (); + case AMP: // does this also include AMP_AMP? + // reference type + return parse_reference_type (); + case LIFETIME: + /* probably a lifetime bound, so probably type param bounds in + * TraitObjectType. this is not allowed, but detection here for error + * message */ + add_error (Error (t->get_locus (), + "lifetime bounds (i.e. in type param bounds, in " + "TraitObjectType) are not allowed as TypeNoBounds")); + + return nullptr; + case IDENTIFIER: + case SUPER: + case SELF: + case SELF_ALIAS: + case CRATE: + case DOLLAR_SIGN: + case SCOPE_RESOLUTION: { + // macro invocation or type path - requires further disambiguation. + /* for parsing path component of each rule, perhaps parse it as a + * typepath and attempt conversion to simplepath if a trailing '!' is + * found */ + /* Type path also includes TraitObjectTypeOneBound BUT if it starts + * with it, it is exactly the same as a TypePath syntactically, so + * this is a syntactical ambiguity. As such, the parser will parse it + * as a TypePath. This, however, does not prevent TraitObjectType from + * starting with a typepath. */ + + // parse path as type path + AST::TypePath path = parse_type_path (); + if (path.is_error ()) + { + Error error ( + t->get_locus (), + "failed to parse path as first component of type no bounds"); + add_error (std::move (error)); + + return nullptr; + } + Location locus = path.get_locus (); + + // branch on next token + t = lexer.peek_token (); + switch (t->get_id ()) + { + case EXCLAM: { + // macro invocation + // convert to simple path + AST::SimplePath macro_path = path.as_simple_path (); + if (macro_path.is_empty ()) + { + Error error (t->get_locus (), + "failed to parse simple path in macro " + "invocation (for type)"); + add_error (std::move (error)); + + return nullptr; + } + + lexer.skip_token (); + + AST::DelimTokenTree tok_tree = parse_delim_token_tree (); + + return std::unique_ptr ( + new AST::MacroInvocation ( + AST::MacroInvocData (std::move (macro_path), + std::move (tok_tree)), + {}, locus)); + } + default: + // assume that this is a type path and not an error + return std::unique_ptr ( + new AST::TypePath (std::move (path))); + } + } + case LEFT_PAREN: + /* tuple type or parenthesised type - requires further disambiguation + * (the usual). ok apparently can be a parenthesised TraitBound too, so + * could be TraitObjectTypeOneBound */ + return parse_paren_prefixed_type_no_bounds (); + case FOR: + case ASYNC: + case CONST: + case UNSAFE: + case EXTERN_TOK: + case FN_TOK: + // bare function type (with no for lifetimes) + return parse_bare_function_type (std::vector ()); + case IMPL: + lexer.skip_token (); + if (lexer.peek_token ()->get_id () == LIFETIME) + { + /* cannot be one bound because lifetime prevents it from being + * traitbound not allowed as type no bounds, only here for error + * message */ + Error error ( + lexer.peek_token ()->get_locus (), + "lifetime (probably lifetime bound, in type param " + "bounds, in ImplTraitType) is not allowed in TypeNoBounds"); + add_error (std::move (error)); + + return nullptr; + } + else + { + // should be trait bound, so parse trait bound + std::unique_ptr initial_bound = parse_trait_bound (); + if (initial_bound == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse ImplTraitTypeOneBound bound"); + add_error (std::move (error)); + + return nullptr; + } + + Location locus = t->get_locus (); + + // ensure not a trait with multiple bounds + t = lexer.peek_token (); + if (t->get_id () == PLUS) + { + Error error (t->get_locus (), + "plus after trait bound means an ImplTraitType, " + "which is not allowed as a TypeNoBounds"); + add_error (std::move (error)); + + return nullptr; + } + + // convert trait bound to value object + AST::TraitBound value_bound (*initial_bound); + + return std::unique_ptr ( + new AST::ImplTraitTypeOneBound (std::move (value_bound), locus)); + } + case DYN: + case QUESTION_MARK: { + // either TraitObjectTypeOneBound + bool has_dyn = false; + if (t->get_id () == DYN) + { + lexer.skip_token (); + has_dyn = true; + } + + if (lexer.peek_token ()->get_id () == LIFETIME) + { + /* means that cannot be TraitObjectTypeOneBound - so here for + * error message */ + Error error (lexer.peek_token ()->get_locus (), + "lifetime as bound in TraitObjectTypeOneBound " + "is not allowed, so cannot be TypeNoBounds"); + add_error (std::move (error)); + + return nullptr; + } + + // should be trait bound, so parse trait bound + std::unique_ptr initial_bound = parse_trait_bound (); + if (initial_bound == nullptr) + { + Error error ( + lexer.peek_token ()->get_locus (), + "failed to parse TraitObjectTypeOneBound initial bound"); + add_error (std::move (error)); + + return nullptr; + } + + Location locus = t->get_locus (); + + // detect error with plus as next token + t = lexer.peek_token (); + if (t->get_id () == PLUS) + { + Error error (t->get_locus (), + "plus after trait bound means a TraitObjectType, " + "which is not allowed as a TypeNoBounds"); + add_error (std::move (error)); + + return nullptr; + } + + // convert trait bound to value object + AST::TraitBound value_bound (*initial_bound); + + return std::unique_ptr ( + new AST::TraitObjectTypeOneBound (std::move (value_bound), locus, + has_dyn)); + } + default: + add_error (Error (t->get_locus (), + "unrecognised token %qs in type no bounds", + t->get_token_description ())); + + return nullptr; + } +} + +// Parses a type no bounds beginning with '('. +template +std::unique_ptr +Parser::parse_paren_prefixed_type_no_bounds () +{ + /* NOTE: this could probably be parsed without the HACK solution of + * parse_paren_prefixed_type, but I was lazy. So FIXME for future.*/ + + /* NOTE: again, syntactical ambiguity of a parenthesised trait bound is + * considered a trait bound, not a parenthesised type, so that it can still + * be used in type param bounds. */ + + Location left_paren_locus = lexer.peek_token ()->get_locus (); + + // skip left delim + lexer.skip_token (); + /* while next token isn't close delim, parse comma-separated types, saving + * whether trailing comma happens */ + const_TokenPtr t = lexer.peek_token (); + bool trailing_comma = true; + std::vector> types; + + while (t->get_id () != RIGHT_PAREN) + { + std::unique_ptr type = parse_type (); + if (type == nullptr) + { + Error error (t->get_locus (), + "failed to parse type inside parentheses (probably " + "tuple or parenthesised)"); + add_error (std::move (error)); + + return nullptr; + } + types.push_back (std::move (type)); + + t = lexer.peek_token (); + if (t->get_id () != COMMA) + { + trailing_comma = false; + break; + } + lexer.skip_token (); + + t = lexer.peek_token (); + } + + if (!skip_token (RIGHT_PAREN)) + { + return nullptr; + } + + // if only one type and no trailing comma, then not a tuple type + if (types.size () == 1 && !trailing_comma) + { + // must be a TraitObjectType (with more than one bound) + if (lexer.peek_token ()->get_id () == PLUS) + { + // error - this is not allowed for type no bounds + Error error (lexer.peek_token ()->get_locus (), + "plus (implying TraitObjectType as type param " + "bounds) is not allowed in type no bounds"); + add_error (std::move (error)); + + return nullptr; + } + else + { + // release vector pointer + std::unique_ptr released_ptr = std::move (types[0]); + /* HACK: attempt to convert to trait bound. if fails, parenthesised + * type */ + std::unique_ptr converted_bound ( + released_ptr->to_trait_bound (true)); + if (converted_bound == nullptr) + { + // parenthesised type + return std::unique_ptr ( + new AST::ParenthesisedType (std::move (released_ptr), + left_paren_locus)); + } + else + { + // trait object type (one bound) + + // get value semantics trait bound + AST::TraitBound value_bound (*converted_bound); + + return std::unique_ptr ( + new AST::TraitObjectTypeOneBound (value_bound, + left_paren_locus)); + } + } + } + else + { + return std::unique_ptr ( + new AST::TupleType (std::move (types), left_paren_locus)); + } + /* TODO: ensure that this ensures that dynamic dispatch for traits is not + * lost somehow */ +} + +/* Parses a literal pattern or range pattern. Assumes that literals passed in + * are valid range pattern bounds. Do not pass in paths in expressions, for + * instance. */ +template +std::unique_ptr +Parser::parse_literal_or_range_pattern () +{ + const_TokenPtr range_lower = lexer.peek_token (); + AST::Literal::LitType type = AST::Literal::STRING; + bool has_minus = false; + + // get lit type + switch (range_lower->get_id ()) + { + case CHAR_LITERAL: + type = AST::Literal::CHAR; + lexer.skip_token (); + break; + case BYTE_CHAR_LITERAL: + type = AST::Literal::BYTE; + lexer.skip_token (); + break; + case INT_LITERAL: + type = AST::Literal::INT; + lexer.skip_token (); + break; + case FLOAT_LITERAL: + type = AST::Literal::FLOAT; + lexer.skip_token (); + break; + case MINUS: + // branch on next token + range_lower = lexer.peek_token (1); + switch (range_lower->get_id ()) + { + case INT_LITERAL: + type = AST::Literal::INT; + has_minus = true; + lexer.skip_token (1); + break; + case FLOAT_LITERAL: + type = AST::Literal::FLOAT; + has_minus = true; + lexer.skip_token (1); + break; + default: + add_error (Error (range_lower->get_locus (), + "token type %qs cannot be parsed as range pattern " + "bound or literal after minus symbol", + range_lower->get_token_description ())); + + return nullptr; + } + break; + default: + add_error ( + Error (range_lower->get_locus (), + "token type %qs cannot be parsed as range pattern bound", + range_lower->get_token_description ())); + + return nullptr; + } + + const_TokenPtr next = lexer.peek_token (); + if (next->get_id () == DOT_DOT_EQ || next->get_id () == ELLIPSIS) + { + // range pattern + lexer.skip_token (); + std::unique_ptr lower ( + new AST::RangePatternBoundLiteral ( + AST::Literal (range_lower->get_str (), type, + PrimitiveCoreType::CORETYPE_UNKNOWN), + range_lower->get_locus (), has_minus)); + + std::unique_ptr upper + = parse_range_pattern_bound (); + if (upper == nullptr) + { + Error error (next->get_locus (), + "failed to parse range pattern bound in range pattern"); + add_error (std::move (error)); + + return nullptr; + } + + return std::unique_ptr ( + new AST::RangePattern (std::move (lower), std::move (upper), + range_lower->get_locus ())); + } + else + { + // literal pattern + return std::unique_ptr ( + new AST::LiteralPattern (range_lower->get_str (), type, + range_lower->get_locus ())); + } +} + +// Parses a range pattern bound (value only). +template +std::unique_ptr +Parser::parse_range_pattern_bound () +{ + const_TokenPtr range_lower = lexer.peek_token (); + Location range_lower_locus = range_lower->get_locus (); + + // get lit type + switch (range_lower->get_id ()) + { + case CHAR_LITERAL: + lexer.skip_token (); + return std::unique_ptr ( + new AST::RangePatternBoundLiteral ( + AST::Literal (range_lower->get_str (), AST::Literal::CHAR, + range_lower->get_type_hint ()), + range_lower_locus)); + case BYTE_CHAR_LITERAL: + lexer.skip_token (); + return std::unique_ptr ( + new AST::RangePatternBoundLiteral ( + AST::Literal (range_lower->get_str (), AST::Literal::BYTE, + range_lower->get_type_hint ()), + range_lower_locus)); + case INT_LITERAL: + lexer.skip_token (); + return std::unique_ptr ( + new AST::RangePatternBoundLiteral ( + AST::Literal (range_lower->get_str (), AST::Literal::INT, + range_lower->get_type_hint ()), + range_lower_locus)); + case FLOAT_LITERAL: + lexer.skip_token (); + rust_debug ("warning: used deprecated float range pattern bound"); + return std::unique_ptr ( + new AST::RangePatternBoundLiteral ( + AST::Literal (range_lower->get_str (), AST::Literal::FLOAT, + range_lower->get_type_hint ()), + range_lower_locus)); + case MINUS: + // branch on next token + range_lower = lexer.peek_token (1); + switch (range_lower->get_id ()) + { + case INT_LITERAL: + lexer.skip_token (1); + return std::unique_ptr ( + new AST::RangePatternBoundLiteral ( + AST::Literal (range_lower->get_str (), AST::Literal::INT, + range_lower->get_type_hint ()), + range_lower_locus, true)); + case FLOAT_LITERAL: + lexer.skip_token (1); + rust_debug ("warning: used deprecated float range pattern bound"); + return std::unique_ptr ( + new AST::RangePatternBoundLiteral ( + AST::Literal (range_lower->get_str (), AST::Literal::FLOAT, + range_lower->get_type_hint ()), + range_lower_locus, true)); + default: + add_error (Error (range_lower->get_locus (), + "token type %qs cannot be parsed as range pattern " + "bound after minus symbol", + range_lower->get_token_description ())); + + return nullptr; + } + case IDENTIFIER: + case SUPER: + case SELF: + case SELF_ALIAS: + case CRATE: + case SCOPE_RESOLUTION: + case DOLLAR_SIGN: { + // path in expression + AST::PathInExpression path = parse_path_in_expression (); + if (path.is_error ()) + { + Error error ( + range_lower->get_locus (), + "failed to parse path in expression range pattern bound"); + add_error (std::move (error)); + + return nullptr; + } + return std::unique_ptr ( + new AST::RangePatternBoundPath (std::move (path))); + } + case LEFT_ANGLE: { + // qualified path in expression + AST::QualifiedPathInExpression path + = parse_qualified_path_in_expression (); + if (path.is_error ()) + { + Error error (range_lower->get_locus (), + "failed to parse qualified path in expression range " + "pattern bound"); + add_error (std::move (error)); + + return nullptr; + } + return std::unique_ptr ( + new AST::RangePatternBoundQualPath (std::move (path))); + } + default: + add_error ( + Error (range_lower->get_locus (), + "token type %qs cannot be parsed as range pattern bound", + range_lower->get_token_description ())); + + return nullptr; + } +} + +// Parses a pattern (will further disambiguate any pattern). +template +std::unique_ptr +Parser::parse_pattern () +{ + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case TRUE_LITERAL: + lexer.skip_token (); + return std::unique_ptr ( + new AST::LiteralPattern ("true", AST::Literal::BOOL, t->get_locus ())); + case FALSE_LITERAL: + lexer.skip_token (); + return std::unique_ptr ( + new AST::LiteralPattern ("false", AST::Literal::BOOL, t->get_locus ())); + case CHAR_LITERAL: + case BYTE_CHAR_LITERAL: + case INT_LITERAL: + case FLOAT_LITERAL: + return parse_literal_or_range_pattern (); + case STRING_LITERAL: + lexer.skip_token (); + return std::unique_ptr ( + new AST::LiteralPattern (t->get_str (), AST::Literal::STRING, + t->get_locus ())); + case BYTE_STRING_LITERAL: + lexer.skip_token (); + return std::unique_ptr ( + new AST::LiteralPattern (t->get_str (), AST::Literal::BYTE_STRING, + t->get_locus ())); + // raw string and raw byte string literals too if they are readded to + // lexer + case MINUS: + if (lexer.peek_token (1)->get_id () == INT_LITERAL) + { + return parse_literal_or_range_pattern (); + } + else if (lexer.peek_token (1)->get_id () == FLOAT_LITERAL) + { + return parse_literal_or_range_pattern (); + } + else + { + Error error (t->get_locus (), "unexpected token %<-%> in pattern - " + "did you forget an integer literal"); + add_error (std::move (error)); + + return nullptr; + } + case UNDERSCORE: + lexer.skip_token (); + return std::unique_ptr ( + new AST::WildcardPattern (t->get_locus ())); + case REF: + case MUT: + return parse_identifier_pattern (); + case IDENTIFIER: + /* if identifier with no scope resolution afterwards, identifier + * pattern. if scope resolution afterwards, path pattern (or range + * pattern or struct pattern or tuple struct pattern) or macro + * invocation */ + return parse_ident_leading_pattern (); + case AMP: + case LOGICAL_AND: + // reference pattern + return parse_reference_pattern (); + case LEFT_PAREN: + // tuple pattern or grouped pattern + return parse_grouped_or_tuple_pattern (); + case LEFT_SQUARE: + // slice pattern + return parse_slice_pattern (); + case LEFT_ANGLE: { + // qualified path in expression or qualified range pattern bound + AST::QualifiedPathInExpression path + = parse_qualified_path_in_expression (); + + if (lexer.peek_token ()->get_id () == DOT_DOT_EQ + || lexer.peek_token ()->get_id () == ELLIPSIS) + { + // qualified range pattern bound, so parse rest of range pattern + bool has_ellipsis_syntax + = lexer.peek_token ()->get_id () == ELLIPSIS; + lexer.skip_token (); + + std::unique_ptr lower_bound ( + new AST::RangePatternBoundQualPath (std::move (path))); + std::unique_ptr upper_bound + = parse_range_pattern_bound (); + + return std::unique_ptr ( + new AST::RangePattern (std::move (lower_bound), + std::move (upper_bound), t->get_locus (), + has_ellipsis_syntax)); + } + else + { + // just qualified path in expression + return std::unique_ptr ( + new AST::QualifiedPathInExpression (std::move (path))); + } + } + case SUPER: + case SELF: + case SELF_ALIAS: + case CRATE: + case SCOPE_RESOLUTION: + case DOLLAR_SIGN: { + // path in expression or range pattern bound + AST::PathInExpression path = parse_path_in_expression (); + + const_TokenPtr next = lexer.peek_token (); + switch (next->get_id ()) + { + case DOT_DOT_EQ: + case ELLIPSIS: { + // qualified range pattern bound, so parse rest of range pattern + bool has_ellipsis_syntax + = lexer.peek_token ()->get_id () == ELLIPSIS; + lexer.skip_token (); + + std::unique_ptr lower_bound ( + new AST::RangePatternBoundPath (std::move (path))); + std::unique_ptr upper_bound + = parse_range_pattern_bound (); + + return std::unique_ptr (new AST::RangePattern ( + std::move (lower_bound), std::move (upper_bound), + Linemap::unknown_location (), has_ellipsis_syntax)); + } + case EXCLAM: + return parse_macro_invocation_partial (std::move (path), + AST::AttrVec ()); + case LEFT_PAREN: { + // tuple struct + lexer.skip_token (); + + // check if empty tuple + if (lexer.peek_token ()->get_id () == RIGHT_PAREN) + { + lexer.skip_token (); + return std::unique_ptr ( + new AST::TupleStructPattern (std::move (path), nullptr)); + } + + // parse items + std::unique_ptr items + = parse_tuple_struct_items (); + if (items == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse tuple struct items"); + add_error (std::move (error)); + + return nullptr; + } + + if (!skip_token (RIGHT_PAREN)) + { + return nullptr; + } + + return std::unique_ptr ( + new AST::TupleStructPattern (std::move (path), + std::move (items))); + } + case LEFT_CURLY: { + // struct + lexer.skip_token (); + + // parse elements (optional) + AST::StructPatternElements elems = parse_struct_pattern_elems (); + + if (!skip_token (RIGHT_CURLY)) + { + return nullptr; + } + + return std::unique_ptr ( + new AST::StructPattern (std::move (path), t->get_locus (), + std::move (elems))); + } + default: + // assume path in expression + return std::unique_ptr ( + new AST::PathInExpression (std::move (path))); + } + } + default: + add_error (Error (t->get_locus (), "unexpected token %qs in pattern", + t->get_token_description ())); + + return nullptr; + } +} + +// Parses a single or double reference pattern. +template +std::unique_ptr +Parser::parse_reference_pattern () +{ + // parse double or single ref + bool is_double_ref = false; + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case AMP: + // still false + lexer.skip_token (); + break; + case LOGICAL_AND: + is_double_ref = true; + lexer.skip_token (); + break; + default: + add_error (Error (t->get_locus (), + "unexpected token %qs in reference pattern", + t->get_token_description ())); + + return nullptr; + } + + // parse mut (if it exists) + bool is_mut = false; + if (lexer.peek_token ()->get_id () == MUT) + { + is_mut = true; + lexer.skip_token (); + } + + // parse pattern to get reference of (required) + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse pattern in reference pattern"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::ReferencePattern (std::move (pattern), is_mut, is_double_ref, + t->get_locus ())); +} + +/* Parses a grouped pattern or tuple pattern. Prefers grouped over tuple if + * only a single element with no commas. */ +template +std::unique_ptr +Parser::parse_grouped_or_tuple_pattern () +{ + Location paren_locus = lexer.peek_token ()->get_locus (); + skip_token (LEFT_PAREN); + + // detect '..' token (ranged with no lower range) + if (lexer.peek_token ()->get_id () == DOT_DOT) + { + lexer.skip_token (); + + // parse new patterns while next token is a comma + std::vector> patterns; + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () == COMMA) + { + lexer.skip_token (); + + // break if next token is ')' + if (lexer.peek_token ()->get_id () == RIGHT_PAREN) + { + break; + } + + // parse pattern, which is required + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + Error error ( + lexer.peek_token ()->get_locus (), + "failed to parse pattern inside ranged tuple pattern"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + patterns.push_back (std::move (pattern)); + + t = lexer.peek_token (); + } + + if (!skip_token (RIGHT_PAREN)) + { + // skip somewhere? + return nullptr; + } + + // create ranged tuple pattern items with only upper items + std::unique_ptr items ( + new AST::TuplePatternItemsRanged ( + std::vector> (), std::move (patterns))); + return std::unique_ptr ( + new AST::TuplePattern (std::move (items), paren_locus)); + } + + // parse initial pattern (required) + std::unique_ptr initial_pattern = parse_pattern (); + if (initial_pattern == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse pattern in grouped or tuple pattern"); + add_error (std::move (error)); + + return nullptr; + } + + // branch on whether next token is a comma or not + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case RIGHT_PAREN: + // grouped pattern + lexer.skip_token (); + + return std::unique_ptr ( + new AST::GroupedPattern (std::move (initial_pattern), paren_locus)); + case COMMA: { + // tuple pattern + lexer.skip_token (); + + // create vector of patterns + std::vector> patterns; + patterns.push_back (std::move (initial_pattern)); + + t = lexer.peek_token (); + while (t->get_id () != RIGHT_PAREN && t->get_id () != DOT_DOT) + { + // parse pattern (required) + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + Error error (t->get_locus (), + "failed to parse pattern in tuple pattern"); + add_error (std::move (error)); + + return nullptr; + } + patterns.push_back (std::move (pattern)); + + if (lexer.peek_token ()->get_id () != COMMA) + break; + + lexer.skip_token (); + t = lexer.peek_token (); + } + + t = lexer.peek_token (); + if (t->get_id () == RIGHT_PAREN) + { + // non-ranged tuple pattern + lexer.skip_token (); + + std::unique_ptr items ( + new AST::TuplePatternItemsMultiple (std::move (patterns))); + return std::unique_ptr ( + new AST::TuplePattern (std::move (items), paren_locus)); + } + else if (t->get_id () == DOT_DOT) + { + // ranged tuple pattern + lexer.skip_token (); + + // parse upper patterns + std::vector> upper_patterns; + t = lexer.peek_token (); + while (t->get_id () == COMMA) + { + lexer.skip_token (); + + // break if end + if (lexer.peek_token ()->get_id () == RIGHT_PAREN) + break; + + // parse pattern (required) + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse pattern in tuple pattern"); + add_error (std::move (error)); + + return nullptr; + } + upper_patterns.push_back (std::move (pattern)); + + t = lexer.peek_token (); + } + + if (!skip_token (RIGHT_PAREN)) + { + return nullptr; + } + + std::unique_ptr items ( + new AST::TuplePatternItemsRanged (std::move (patterns), + std::move (upper_patterns))); + return std::unique_ptr ( + new AST::TuplePattern (std::move (items), paren_locus)); + } + else + { + // some kind of error + Error error (t->get_locus (), + "failed to parse tuple pattern (probably) or maybe " + "grouped pattern"); + add_error (std::move (error)); + + return nullptr; + } + } + default: + // error + add_error (Error (t->get_locus (), + "unrecognised token %qs in grouped or tuple pattern " + "after first pattern", + t->get_token_description ())); + + return nullptr; + } +} + +/* Parses a slice pattern that can match arrays or slices. Parses the square + * brackets too. */ +template +std::unique_ptr +Parser::parse_slice_pattern () +{ + Location square_locus = lexer.peek_token ()->get_locus (); + skip_token (LEFT_SQUARE); + + // parse initial pattern (required) + std::unique_ptr initial_pattern = parse_pattern (); + if (initial_pattern == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse initial pattern in slice pattern"); + add_error (std::move (error)); + + return nullptr; + } + + std::vector> patterns; + patterns.push_back (std::move (initial_pattern)); + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () == COMMA) + { + lexer.skip_token (); + + // break if end bracket + if (lexer.peek_token ()->get_id () == RIGHT_SQUARE) + break; + + // parse pattern (required) + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse pattern in slice pattern"); + add_error (std::move (error)); + + return nullptr; + } + patterns.push_back (std::move (pattern)); + + t = lexer.peek_token (); + } + + if (!skip_token (RIGHT_SQUARE)) + { + return nullptr; + } + + return std::unique_ptr ( + new AST::SlicePattern (std::move (patterns), square_locus)); +} + +/* Parses an identifier pattern (pattern that binds a value matched to a + * variable). */ +template +std::unique_ptr +Parser::parse_identifier_pattern () +{ + Location locus = lexer.peek_token ()->get_locus (); + + bool has_ref = false; + if (lexer.peek_token ()->get_id () == REF) + { + has_ref = true; + lexer.skip_token (); + + // DEBUG + rust_debug ("parsed ref in identifier pattern"); + } + + bool has_mut = false; + if (lexer.peek_token ()->get_id () == MUT) + { + has_mut = true; + lexer.skip_token (); + } + + // parse identifier (required) + const_TokenPtr ident_tok = expect_token (IDENTIFIER); + if (ident_tok == nullptr) + { + // skip somewhere? + return nullptr; + } + Identifier ident = ident_tok->get_str (); + + // DEBUG + rust_debug ("parsed identifier in identifier pattern"); + + // parse optional pattern binding thing + std::unique_ptr bind_pattern = nullptr; + if (lexer.peek_token ()->get_id () == PATTERN_BIND) + { + lexer.skip_token (); + + // parse required pattern to bind + bind_pattern = parse_pattern (); + if (bind_pattern == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse pattern to bind in identifier pattern"); + add_error (std::move (error)); + + return nullptr; + } + } + + // DEBUG + rust_debug ("about to return identifier pattern"); + + return std::unique_ptr ( + new AST::IdentifierPattern (std::move (ident), locus, has_ref, has_mut, + std::move (bind_pattern))); +} + +/* Parses a pattern that opens with an identifier. This includes identifier + * patterns, path patterns (and derivatives such as struct patterns, tuple + * struct patterns, and macro invocations), and ranges. */ +template +std::unique_ptr +Parser::parse_ident_leading_pattern () +{ + // ensure first token is actually identifier + const_TokenPtr initial_tok = lexer.peek_token (); + if (initial_tok->get_id () != IDENTIFIER) + { + return nullptr; + } + + // save initial identifier as it may be useful (but don't skip) + std::string initial_ident = initial_tok->get_str (); + + // parse next tokens as a PathInExpression + AST::PathInExpression path = parse_path_in_expression (); + + // branch on next token + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case EXCLAM: + return parse_macro_invocation_partial (std::move (path), AST::AttrVec ()); + case LEFT_PAREN: { + // tuple struct + lexer.skip_token (); + + // DEBUG + rust_debug ("parsing tuple struct pattern"); + + // check if empty tuple + if (lexer.peek_token ()->get_id () == RIGHT_PAREN) + { + lexer.skip_token (); + return std::unique_ptr ( + new AST::TupleStructPattern (std::move (path), nullptr)); + } + + // parse items + std::unique_ptr items + = parse_tuple_struct_items (); + if (items == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse tuple struct items"); + add_error (std::move (error)); + + return nullptr; + } + + // DEBUG + rust_debug ("successfully parsed tuple struct items"); + + if (!skip_token (RIGHT_PAREN)) + { + return nullptr; + } + + // DEBUG + rust_debug ("successfully parsed tuple struct pattern"); + + return std::unique_ptr ( + new AST::TupleStructPattern (std::move (path), std::move (items))); + } + case LEFT_CURLY: { + // struct + lexer.skip_token (); + + // parse elements (optional) + AST::StructPatternElements elems = parse_struct_pattern_elems (); + + if (!skip_token (RIGHT_CURLY)) + { + return nullptr; + } + + // DEBUG + rust_debug ("successfully parsed struct pattern"); + + return std::unique_ptr ( + new AST::StructPattern (std::move (path), initial_tok->get_locus (), + std::move (elems))); + } + case DOT_DOT_EQ: + case ELLIPSIS: { + // range + bool has_ellipsis_syntax = lexer.peek_token ()->get_id () == ELLIPSIS; + + lexer.skip_token (); + + std::unique_ptr lower_bound ( + new AST::RangePatternBoundPath (std::move (path))); + std::unique_ptr upper_bound + = parse_range_pattern_bound (); + + return std::unique_ptr (new AST::RangePattern ( + std::move (lower_bound), std::move (upper_bound), + Linemap::unknown_location (), has_ellipsis_syntax)); + } + case PATTERN_BIND: { + // only allow on single-segment paths + if (path.is_single_segment ()) + { + // identifier with pattern bind + lexer.skip_token (); + + std::unique_ptr bind_pattern = parse_pattern (); + if (bind_pattern == nullptr) + { + Error error ( + t->get_locus (), + "failed to parse pattern to bind to identifier pattern"); + add_error (std::move (error)); + + return nullptr; + } + return std::unique_ptr ( + new AST::IdentifierPattern (std::move (initial_ident), + initial_tok->get_locus (), false, + false, std::move (bind_pattern))); + } + Error error ( + t->get_locus (), + "failed to parse pattern bind to a path, not an identifier"); + add_error (std::move (error)); + + return nullptr; + } + default: + // assume identifier if single segment + if (path.is_single_segment ()) + { + return std::unique_ptr ( + new AST::IdentifierPattern (std::move (initial_ident), + initial_tok->get_locus ())); + } + // return path otherwise + return std::unique_ptr ( + new AST::PathInExpression (std::move (path))); + } +} + +// Parses tuple struct items if they exist. Does not parse parentheses. +template +std::unique_ptr +Parser::parse_tuple_struct_items () +{ + std::vector> lower_patterns; + + // DEBUG + rust_debug ("started parsing tuple struct items"); + + // check for '..' at front + if (lexer.peek_token ()->get_id () == DOT_DOT) + { + // only parse upper patterns + lexer.skip_token (); + + // DEBUG + rust_debug ("'..' at front in tuple struct items detected"); + + std::vector> upper_patterns; + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () == COMMA) + { + lexer.skip_token (); + + // break if right paren + if (lexer.peek_token ()->get_id () == RIGHT_PAREN) + break; + + // parse pattern, which is now required + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse pattern in tuple struct items"); + add_error (std::move (error)); + + return nullptr; + } + upper_patterns.push_back (std::move (pattern)); + + t = lexer.peek_token (); + } + + // DEBUG + rust_debug ( + "finished parsing tuple struct items ranged (upper/none only)"); + + return std::unique_ptr ( + new AST::TupleStructItemsRange (std::move (lower_patterns), + std::move (upper_patterns))); + } + + // has at least some lower patterns + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != RIGHT_PAREN && t->get_id () != DOT_DOT) + { + // DEBUG + rust_debug ("about to parse pattern in tuple struct items"); + + // parse pattern, which is required + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + Error error (t->get_locus (), + "failed to parse pattern in tuple struct items"); + add_error (std::move (error)); + + return nullptr; + } + lower_patterns.push_back (std::move (pattern)); + + // DEBUG + rust_debug ("successfully parsed pattern in tuple struct items"); + + if (lexer.peek_token ()->get_id () != COMMA) + { + // DEBUG + rust_debug ("broke out of parsing patterns in tuple struct " + "items as no comma"); + + break; + } + lexer.skip_token (); + t = lexer.peek_token (); + } + + // branch on next token + t = lexer.peek_token (); + switch (t->get_id ()) + { + case RIGHT_PAREN: + return std::unique_ptr ( + new AST::TupleStructItemsNoRange (std::move (lower_patterns))); + case DOT_DOT: { + // has an upper range that must be parsed separately + lexer.skip_token (); + + std::vector> upper_patterns; + + t = lexer.peek_token (); + while (t->get_id () == COMMA) + { + lexer.skip_token (); + + // break if next token is right paren + if (lexer.peek_token ()->get_id () == RIGHT_PAREN) + break; + + // parse pattern, which is required + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse pattern in tuple struct items"); + add_error (std::move (error)); + + return nullptr; + } + upper_patterns.push_back (std::move (pattern)); + + t = lexer.peek_token (); + } + + return std::unique_ptr ( + new AST::TupleStructItemsRange (std::move (lower_patterns), + std::move (upper_patterns))); + } + default: + // error + add_error (Error (t->get_locus (), + "unexpected token %qs in tuple struct items", + t->get_token_description ())); + + return nullptr; + } +} + +// Parses struct pattern elements if they exist. +template +AST::StructPatternElements +Parser::parse_struct_pattern_elems () +{ + std::vector> fields; + + AST::AttrVec etc_attrs; + bool has_etc = false; + + // try parsing struct pattern fields + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != RIGHT_CURLY) + { + AST::AttrVec outer_attrs = parse_outer_attributes (); + + // parse etc (must be last in struct pattern, so breaks) + if (lexer.peek_token ()->get_id () == DOT_DOT) + { + lexer.skip_token (); + etc_attrs = std::move (outer_attrs); + has_etc = true; + break; + } + + std::unique_ptr field + = parse_struct_pattern_field_partial (std::move (outer_attrs)); + if (field == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse struct pattern field"); + add_error (std::move (error)); + + // skip after somewhere? + return AST::StructPatternElements::create_empty (); + } + fields.push_back (std::move (field)); + + if (lexer.peek_token ()->get_id () != COMMA) + break; + + // skip comma + lexer.skip_token (); + t = lexer.peek_token (); + } + + if (has_etc) + return AST::StructPatternElements (std::move (fields), + std::move (etc_attrs)); + else + return AST::StructPatternElements (std::move (fields)); +} + +/* Parses a struct pattern field (tuple index/pattern, identifier/pattern, or + * identifier). */ +template +std::unique_ptr +Parser::parse_struct_pattern_field () +{ + // parse outer attributes (if they exist) + AST::AttrVec outer_attrs = parse_outer_attributes (); + + return parse_struct_pattern_field_partial (std::move (outer_attrs)); +} + +/* Parses a struct pattern field (tuple index/pattern, identifier/pattern, or + * identifier), with outer attributes passed in. */ +template +std::unique_ptr +Parser::parse_struct_pattern_field_partial ( + AST::AttrVec outer_attrs) +{ + // branch based on next token + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case INT_LITERAL: { + // tuple index + std::string index_str = t->get_str (); + int index = atoi (index_str.c_str ()); + + if (!skip_token (COLON)) + { + return nullptr; + } + + // parse required pattern + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + Error error ( + t->get_locus (), + "failed to parse pattern in tuple index struct pattern field"); + add_error (std::move (error)); + + return nullptr; + } + + return std::unique_ptr ( + new AST::StructPatternFieldTuplePat (index, std::move (pattern), + std::move (outer_attrs), + t->get_locus ())); + } + case IDENTIFIER: + // identifier-pattern OR only identifier + // branch on next token + switch (lexer.peek_token (1)->get_id ()) + { + case COLON: { + // identifier-pattern + Identifier ident = t->get_str (); + lexer.skip_token (); + + skip_token (COLON); + + // parse required pattern + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + Error error (t->get_locus (), + "failed to parse pattern in struct pattern field"); + add_error (std::move (error)); + + return nullptr; + } + + return std::unique_ptr ( + new AST::StructPatternFieldIdentPat (std::move (ident), + std::move (pattern), + std::move (outer_attrs), + t->get_locus ())); + } + case COMMA: + case RIGHT_CURLY: { + // identifier only + Identifier ident = t->get_str (); + lexer.skip_token (); + + return std::unique_ptr ( + new AST::StructPatternFieldIdent (std::move (ident), false, false, + std::move (outer_attrs), + t->get_locus ())); + } + default: + // error + add_error (Error (t->get_locus (), + "unrecognised token %qs in struct pattern field", + t->get_token_description ())); + + return nullptr; + } + case REF: + case MUT: { + // only identifier + bool has_ref = false; + if (t->get_id () == REF) + { + has_ref = true; + lexer.skip_token (); + } + + bool has_mut = false; + if (lexer.peek_token ()->get_id () == MUT) + { + has_mut = true; + lexer.skip_token (); + } + + const_TokenPtr ident_tok = expect_token (IDENTIFIER); + if (ident_tok == nullptr) + { + return nullptr; + } + Identifier ident = ident_tok->get_str (); + + return std::unique_ptr ( + new AST::StructPatternFieldIdent (std::move (ident), has_ref, has_mut, + std::move (outer_attrs), + t->get_locus ())); + } + default: + // not necessarily an error + return nullptr; + } +} + +template +ExprOrStmt +Parser::parse_stmt_or_expr_with_block ( + AST::AttrVec outer_attrs) +{ + auto expr = parse_expr_with_block (std::move (outer_attrs)); + if (expr == nullptr) + return ExprOrStmt::create_error (); + + auto tok = lexer.peek_token (); + + // tail expr in a block expr + if (tok->get_id () == RIGHT_CURLY) + return ExprOrStmt (std::move (expr)); + + // internal block expr must either have semicolons followed, or evaluate to + // () + auto locus = expr->get_locus (); + std::unique_ptr stmt ( + new AST::ExprStmtWithBlock (std::move (expr), locus, + tok->get_id () == SEMICOLON)); + if (tok->get_id () == SEMICOLON) + lexer.skip_token (); + + return ExprOrStmt (std::move (stmt)); +} + +/* Parses a statement or expression (depending on whether a trailing semicolon + * exists). Useful for block expressions where it cannot be determined through + * lookahead whether it is a statement or expression to be parsed. */ +template +ExprOrStmt +Parser::parse_stmt_or_expr_without_block () +{ + // quick exit for empty statement + const_TokenPtr t = lexer.peek_token (); + if (t->get_id () == SEMICOLON) + { + lexer.skip_token (); + std::unique_ptr stmt ( + new AST::EmptyStmt (t->get_locus ())); + return ExprOrStmt (std::move (stmt)); + } + + // parse outer attributes + AST::AttrVec outer_attrs = parse_outer_attributes (); + + // parsing this will be annoying because of the many different possibilities + /* best may be just to copy paste in parse_item switch, and failing that try + * to parse outer attributes, and then pass them in to either a let + * statement or (fallback) expression statement. */ + // FIXME: think of a way to do this without such a large switch? + + /* FIXME: for expressions at least, the only way that they can really be + * parsed properly in this way is if they don't support operators on them. + * They must be pratt-parsed otherwise. As such due to composability, only + * explicit statements will have special cases here. This should roughly + * correspond to "expr-with-block", but this warning is here in case it + * isn't the case. */ + t = lexer.peek_token (); + switch (t->get_id ()) + { + case LET: { + // let statement + std::unique_ptr stmt ( + parse_let_stmt (std::move (outer_attrs))); + return ExprOrStmt (std::move (stmt)); + } + case PUB: + case MOD: + case EXTERN_TOK: + case USE: + case FN_TOK: + case TYPE: + case STRUCT_TOK: + case ENUM_TOK: + case CONST: + case STATIC_TOK: + case TRAIT: + case IMPL: { + std::unique_ptr item ( + parse_vis_item (std::move (outer_attrs))); + return ExprOrStmt (std::move (item)); + } + /* TODO: implement union keyword but not really because of + * context-dependence crappy hack way to parse a union written below to + * separate it from the good code. */ + // case UNION: + case UNSAFE: { // maybe - unsafe traits are a thing + /* if any of these (should be all possible VisItem prefixes), parse a + * VisItem - can't parse item because would require reparsing outer + * attributes */ + const_TokenPtr t2 = lexer.peek_token (1); + switch (t2->get_id ()) + { + case LEFT_CURLY: { + // unsafe block + return parse_stmt_or_expr_with_block (std::move (outer_attrs)); + } + case TRAIT: { + // unsafe trait + std::unique_ptr item ( + parse_vis_item (std::move (outer_attrs))); + return ExprOrStmt (std::move (item)); + } + case EXTERN_TOK: + case FN_TOK: { + // unsafe function + std::unique_ptr item ( + parse_vis_item (std::move (outer_attrs))); + return ExprOrStmt (std::move (item)); + } + case IMPL: { + // unsafe trait impl + std::unique_ptr item ( + parse_vis_item (std::move (outer_attrs))); + return ExprOrStmt (std::move (item)); + } + default: + add_error (Error (t2->get_locus (), + "unrecognised token %qs after parsing unsafe - " + "expected beginning of expression or statement", + t->get_token_description ())); + + // skip somewhere? + return ExprOrStmt::create_error (); + } + } + case SUPER: + case SELF: + case CRATE: + case DOLLAR_SIGN: { + /* path-based thing so struct/enum or path or macro invocation of a + * kind. however, the expressions are composable (i think) */ + + std::unique_ptr expr + = parse_expr_without_block (); + + if (lexer.peek_token ()->get_id () == SEMICOLON) + { + // must be expression statement + lexer.skip_token (); + + std::unique_ptr stmt ( + new AST::ExprStmtWithoutBlock (std::move (expr), + t->get_locus ())); + return ExprOrStmt (std::move (stmt)); + } + + // return expression + return ExprOrStmt (std::move (expr)); + } + /* FIXME: this is either a macro invocation or macro invocation semi. + * start parsing to determine which one it is. */ + // FIXME: or this is another path-based thing - struct/enum or path + // itself return parse_path_based_stmt_or_expr(std::move(outer_attrs)); + // FIXME: old code there + case LOOP: + case WHILE: + case FOR: + case IF: + case MATCH_TOK: + case LEFT_CURLY: + case ASYNC: { + return parse_stmt_or_expr_with_block (std::move (outer_attrs)); + } + case LIFETIME: { + /* FIXME: are there any expressions without blocks that can have + * lifetime as their first token? Or is loop expr the only one? */ + // safe side for now: + const_TokenPtr t2 = lexer.peek_token (2); + if (lexer.peek_token (1)->get_id () == COLON + && (t2->get_id () == LOOP || t2->get_id () == WHILE + || t2->get_id () == FOR)) + { + return parse_stmt_or_expr_with_block (std::move (outer_attrs)); + } + else + { + // should be expr without block + std::unique_ptr expr + = parse_expr_without_block (std::move (outer_attrs)); + + if (lexer.peek_token ()->get_id () == SEMICOLON) + { + // must be expression statement + lexer.skip_token (); + + std::unique_ptr stmt ( + new AST::ExprStmtWithoutBlock (std::move (expr), + t->get_locus ())); + return ExprOrStmt (std::move (stmt)); + } + + // return expression + return ExprOrStmt (std::move (expr)); + } + } + // crappy hack to do union "keyword" + case IDENTIFIER: + if (t->get_str () == "union" + && lexer.peek_token (1)->get_id () == IDENTIFIER) + { + std::unique_ptr item ( + parse_vis_item (std::move (outer_attrs))); + return ExprOrStmt (std::move (item)); + // or should this go straight to parsing union? + } + else if (t->get_str () == "macro_rules") + { + // macro_rules! macro item + std::unique_ptr item ( + parse_macro_item (std::move (outer_attrs))); + return ExprOrStmt (std::move (item)); + } + else if (lexer.peek_token (1)->get_id () == SCOPE_RESOLUTION + || lexer.peek_token (1)->get_id () == EXCLAM + || lexer.peek_token (1)->get_id () == LEFT_CURLY) + { + /* path (probably) or macro invocation or struct or enum, so + * probably a macro invocation semi decide how to parse - probably + * parse path and then get macro from it */ + + // FIXME: old code was good until composability was required + // return parse_path_based_stmt_or_expr(std::move(outer_attrs)); + std::unique_ptr expr + = parse_expr_without_block (std::move (outer_attrs)); + + if (lexer.peek_token ()->get_id () == SEMICOLON) + { + // must be expression statement + lexer.skip_token (); + + std::unique_ptr stmt ( + new AST::ExprStmtWithoutBlock (std::move (expr), + t->get_locus ())); + return ExprOrStmt (std::move (stmt)); + } + + // return expression + return ExprOrStmt (std::move (expr)); + } + gcc_fallthrough (); + // TODO: find out how to disable gcc "implicit fallthrough" warning + default: { + /* expression statement (without block) or expression itself - parse + * expression then make it statement if semi afterwards */ + + std::unique_ptr expr + = parse_expr_without_block (std::move (outer_attrs)); + + if (lexer.peek_token ()->get_id () == SEMICOLON) + { + // must be expression statement + lexer.skip_token (); + + std::unique_ptr stmt ( + new AST::ExprStmtWithoutBlock (std::move (expr), + t->get_locus ())); + return ExprOrStmt (std::move (stmt)); + } + + // return expression + return ExprOrStmt (std::move (expr)); + } + } +} + +/* Parses a statement or expression beginning with a path (i.e. macro, + * struct/enum, or path expr) */ +template +ExprOrStmt +Parser::parse_path_based_stmt_or_expr ( + AST::AttrVec outer_attrs) +{ + // attempt to parse path + Location stmt_or_expr_loc = lexer.peek_token ()->get_locus (); + AST::PathInExpression path = parse_path_in_expression (); + + // branch on next token + const_TokenPtr t2 = lexer.peek_token (); + switch (t2->get_id ()) + { + case EXCLAM: { + /* macro invocation or macro invocation semi - depends on whether + * there is a final ';' */ + // convert path in expr to simple path (as that is used in macros) + AST::SimplePath macro_path = path.as_simple_path (); + if (macro_path.is_empty ()) + { + Error error (t2->get_locus (), + "failed to convert parsed path to simple " + "path (for macro invocation or semi)"); + add_error (std::move (error)); + + return ExprOrStmt::create_error (); + } + + // skip exclamation mark + lexer.skip_token (); + + const_TokenPtr t3 = lexer.peek_token (); + Location tok_tree_loc = t3->get_locus (); + + AST::DelimType type = AST::PARENS; + switch (t3->get_id ()) + { + case LEFT_PAREN: + type = AST::PARENS; + break; + case LEFT_SQUARE: + type = AST::SQUARE; + break; + case LEFT_CURLY: + type = AST::CURLY; + break; + default: + add_error ( + Error (t3->get_locus (), + "unrecognised token %qs in macro invocation - (opening) " + "delimiter expected", + t3->get_token_description ())); + + return ExprOrStmt::create_error (); + } + lexer.skip_token (); + + // parse actual token trees + std::vector> token_trees; + auto delim_open + = std::unique_ptr (new AST::Token (std::move (t3))); + token_trees.push_back (std::move (delim_open)); + + t3 = lexer.peek_token (); + // parse token trees until the initial delimiter token is found again + while (!token_id_matches_delims (t3->get_id (), type)) + { + std::unique_ptr tree = parse_token_tree (); + + if (tree == nullptr) + { + Error error (t3->get_locus (), + "failed to parse token tree for macro " + "invocation (or semi) - " + "found %qs", + t3->get_token_description ()); + add_error (std::move (error)); + + return ExprOrStmt::create_error (); + } + + token_trees.push_back (std::move (tree)); + + t3 = lexer.peek_token (); + } + + auto delim_close + = std::unique_ptr (new AST::Token (std::move (t3))); + token_trees.push_back (std::move (delim_close)); + + // parse end delimiters + t3 = lexer.peek_token (); + if (token_id_matches_delims (t3->get_id (), type)) + { + // tokens match opening delimiter, so skip. + lexer.skip_token (); + + /* with curly bracketed macros, assume it is a macro invocation + * unless a semicolon is explicitly put at the end. this is not + * necessarily true (i.e. context-dependence) and so may have to + * be fixed up via HACKs in semantic analysis (by checking whether + * it is the last elem in the vector). */ + + AST::DelimTokenTree delim_tok_tree (type, std::move (token_trees), + tok_tree_loc); + AST::MacroInvocData invoc_data (std::move (macro_path), + std::move (delim_tok_tree)); + + if (lexer.peek_token ()->get_id () == SEMICOLON) + { + lexer.skip_token (); + + std::unique_ptr stmt ( + new AST::MacroInvocation (std::move (invoc_data), + std::move (outer_attrs), + stmt_or_expr_loc, true)); + return ExprOrStmt (std::move (stmt)); + } + + // otherwise, create macro invocation + std::unique_ptr expr ( + new AST::MacroInvocation (std::move (invoc_data), + std::move (outer_attrs), + stmt_or_expr_loc, false)); + return ExprOrStmt (std::move (expr)); + } + else + { + // tokens don't match opening delimiters, so produce error + Error error ( + t2->get_locus (), + "unexpected token %qs - expecting closing delimiter %qs (for a " + "macro invocation)", + t2->get_token_description (), + (type == AST::PARENS ? ")" : (type == AST::SQUARE ? "]" : "}"))); + add_error (std::move (error)); + + return ExprOrStmt::create_error (); + } + } + case LEFT_CURLY: { + /* definitely not a block: + * path '{' ident ',' + * path '{' ident ':' [anything] ',' + * path '{' ident ':' [not a type] + * otherwise, assume block expr and thus path */ + bool not_a_block = lexer.peek_token (1)->get_id () == IDENTIFIER + && (lexer.peek_token (2)->get_id () == COMMA + || (lexer.peek_token (2)->get_id () == COLON + && (lexer.peek_token (4)->get_id () == COMMA + || !can_tok_start_type ( + lexer.peek_token (3)->get_id ())))); + std::unique_ptr expr = nullptr; + + if (not_a_block) + { + /* assume struct expr struct (as struct-enum disambiguation + * requires name lookup) again, make statement if final ';' */ + expr = parse_struct_expr_struct_partial (std::move (path), + std::move (outer_attrs)); + if (expr == nullptr) + { + Error error (t2->get_locus (), + "failed to parse struct expr struct"); + add_error (std::move (error)); + + return ExprOrStmt::create_error (); + } + } + else + { + // assume path - make statement if final ';' + // lexer.skip_token(); + + // HACK: add outer attrs to path + path.set_outer_attrs (std::move (outer_attrs)); + expr = std::unique_ptr ( + new AST::PathInExpression (std::move (path))); + } + + // determine if statement if ends with semicolon + if (lexer.peek_token ()->get_id () == SEMICOLON) + { + // statement + lexer.skip_token (); + std::unique_ptr stmt ( + new AST::ExprStmtWithoutBlock (std::move (expr), + stmt_or_expr_loc)); + return ExprOrStmt (std::move (stmt)); + } + + // otherwise, expression + return ExprOrStmt (std::move (expr)); + } + case LEFT_PAREN: { + /* assume struct expr tuple (as struct-enum disambiguation requires + * name lookup) again, make statement if final ';' */ + std::unique_ptr struct_expr + = parse_struct_expr_tuple_partial (std::move (path), + std::move (outer_attrs)); + if (struct_expr == nullptr) + { + Error error (t2->get_locus (), "failed to parse struct expr tuple"); + add_error (std::move (error)); + + return ExprOrStmt::create_error (); + } + + // determine if statement if ends with semicolon + if (lexer.peek_token ()->get_id () == SEMICOLON) + { + // statement + lexer.skip_token (); + std::unique_ptr stmt ( + new AST::ExprStmtWithoutBlock (std::move (struct_expr), + stmt_or_expr_loc)); + return ExprOrStmt (std::move (stmt)); + } + + // otherwise, expression + return ExprOrStmt (std::move (struct_expr)); + } + default: { + // assume path - make statement if final ';' + // lexer.skip_token(); + + // HACK: replace outer attributes in path + path.set_outer_attrs (std::move (outer_attrs)); + std::unique_ptr expr ( + new AST::PathInExpression (std::move (path))); + + if (lexer.peek_token ()->get_id () == SEMICOLON) + { + lexer.skip_token (); + + std::unique_ptr stmt ( + new AST::ExprStmtWithoutBlock (std::move (expr), + stmt_or_expr_loc)); + return ExprOrStmt (std::move (stmt)); + } + + return ExprOrStmt (std::move (expr)); + } + } +} + +// Parses a struct expression field. +template +std::unique_ptr +Parser::parse_struct_expr_field () +{ + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case IDENTIFIER: + if (lexer.peek_token (1)->get_id () == COLON) + { + // struct expr field with identifier and expr + Identifier ident = t->get_str (); + lexer.skip_token (1); + + // parse expression (required) + std::unique_ptr expr = parse_expr (); + if (expr == nullptr) + { + Error error (t->get_locus (), + "failed to parse struct expression field with " + "identifier and expression"); + add_error (std::move (error)); + + return nullptr; + } + + return std::unique_ptr ( + new AST::StructExprFieldIdentifierValue (std::move (ident), + std::move (expr), + t->get_locus ())); + } + else + { + // struct expr field with identifier only + Identifier ident = t->get_str (); + lexer.skip_token (); + + return std::unique_ptr ( + new AST::StructExprFieldIdentifier (std::move (ident), + t->get_locus ())); + } + case INT_LITERAL: { + // parse tuple index field + int index = atoi (t->get_str ().c_str ()); + lexer.skip_token (); + + if (!skip_token (COLON)) + { + // skip somewhere? + return nullptr; + } + + // parse field expression (required) + std::unique_ptr expr = parse_expr (); + if (expr == nullptr) + { + Error error (t->get_locus (), + "failed to parse expr in struct (or enum) expr " + "field with tuple index"); + add_error (std::move (error)); + + return nullptr; + } + + return std::unique_ptr ( + new AST::StructExprFieldIndexValue (index, std::move (expr), + t->get_locus ())); + } + case DOT_DOT: + /* this is a struct base and can't be parsed here, so just return + * nothing without erroring */ + + return nullptr; + default: + add_error ( + Error (t->get_locus (), + "unrecognised token %qs as first token of struct expr field - " + "expected identifier or integer literal", + t->get_token_description ())); + + return nullptr; + } +} + +// Parses a macro invocation or macro invocation semi. +template +ExprOrStmt +Parser::parse_macro_invocation_maybe_semi ( + AST::AttrVec outer_attrs) +{ + Location macro_locus = lexer.peek_token ()->get_locus (); + AST::SimplePath macro_path = parse_simple_path (); + if (macro_path.is_empty ()) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse simple path in macro invocation or semi"); + add_error (std::move (error)); + + return ExprOrStmt::create_error (); + } + + if (!skip_token (EXCLAM)) + { + return ExprOrStmt::create_error (); + } + + const_TokenPtr t3 = lexer.peek_token (); + Location tok_tree_loc = t3->get_locus (); + + AST::DelimType type = AST::PARENS; + switch (t3->get_id ()) + { + case LEFT_PAREN: + type = AST::PARENS; + break; + case LEFT_SQUARE: + type = AST::SQUARE; + break; + case LEFT_CURLY: + type = AST::CURLY; + break; + default: + add_error ( + Error (t3->get_locus (), + "unrecognised token %qs in macro invocation - (opening) " + "delimiter expected", + t3->get_token_description ())); + + return ExprOrStmt::create_error (); + } + lexer.skip_token (); + + // parse actual token trees + std::vector> token_trees; + auto delim_open + = std::unique_ptr (new AST::Token (std::move (t3))); + token_trees.push_back (std::move (delim_open)); + + t3 = lexer.peek_token (); + // parse token trees until the initial delimiter token is found again + while (!token_id_matches_delims (t3->get_id (), type)) + { + std::unique_ptr tree = parse_token_tree (); + + if (tree == nullptr) + { + Error error (t3->get_locus (), + "failed to parse token tree for macro invocation (or " + "semi) - found %qs", + t3->get_token_description ()); + add_error (std::move (error)); + + return ExprOrStmt::create_error (); + } + + token_trees.push_back (std::move (tree)); + + t3 = lexer.peek_token (); + } + auto delim_close + = std::unique_ptr (new AST::Token (std::move (t3))); + token_trees.push_back (std::move (delim_close)); + + // parse end delimiters + t3 = lexer.peek_token (); + if (token_id_matches_delims (t3->get_id (), type)) + { + // tokens match opening delimiter, so skip. + lexer.skip_token (); + + /* with curly bracketed macros, assume it is a macro invocation unless + * a semicolon is explicitly put at the end. this is not necessarily + * true (i.e. context-dependence) and so may have to be fixed up via + * HACKs in semantic analysis (by checking whether it is the last elem + * in the vector). */ + + AST::DelimTokenTree delim_tok_tree (type, std::move (token_trees), + tok_tree_loc); + AST::MacroInvocData invoc_data (std::move (macro_path), + std::move (delim_tok_tree)); + + if (lexer.peek_token ()->get_id () == SEMICOLON) + { + lexer.skip_token (); + + std::unique_ptr stmt ( + new AST::MacroInvocation (std::move (invoc_data), + std::move (outer_attrs), macro_locus, + true)); + return ExprOrStmt (std::move (stmt)); + } + + // otherwise, create macro invocation + std::unique_ptr expr ( + new AST::MacroInvocation (std::move (invoc_data), + std::move (outer_attrs), macro_locus)); + return ExprOrStmt (std::move (expr)); + } + else + { + const_TokenPtr t = lexer.peek_token (); + // tokens don't match opening delimiters, so produce error + Error error ( + t->get_locus (), + "unexpected token %qs - expecting closing delimiter %qs (for a " + "macro invocation)", + t->get_token_description (), + (type == AST::PARENS ? ")" : (type == AST::SQUARE ? "]" : "}"))); + add_error (std::move (error)); + + return ExprOrStmt::create_error (); + } +} + +// "Unexpected token" panic mode - flags gcc error at unexpected token +template +void +Parser::unexpected_token (const_TokenPtr t) +{ + Error error (t->get_locus (), "unexpected token %qs\n", + t->get_token_description ()); + add_error (std::move (error)); +} + +/* Crappy "error recovery" performed after error by skipping tokens until a + * semi-colon is found */ +template +void +Parser::skip_after_semicolon () +{ + const_TokenPtr t = lexer.peek_token (); + + while (t->get_id () != END_OF_FILE && t->get_id () != SEMICOLON) + { + lexer.skip_token (); + t = lexer.peek_token (); + } + + if (t->get_id () == SEMICOLON) + lexer.skip_token (); +} + +/* Checks if current token has inputted id - skips it and returns true if so, + * diagnoses an error and returns false otherwise. */ +template +bool +Parser::skip_token (TokenId token_id) +{ + return expect_token (token_id) != const_TokenPtr (); +} + +/* Checks if current token has inputted id - skips it and returns true if so, + * returns false otherwise without diagnosing an error */ +template +bool +Parser::maybe_skip_token (TokenId token_id) +{ + if (lexer.peek_token ()->get_id () != token_id) + return false; + else + return skip_token (token_id); +} + +/* Checks the current token - if id is same as expected, skips and returns it, + * otherwise diagnoses error and returns null. */ +template +const_TokenPtr +Parser::expect_token (TokenId token_id) +{ + const_TokenPtr t = lexer.peek_token (); + if (t->get_id () == token_id) + { + lexer.skip_token (); + return t; + } + else + { + Error error (t->get_locus (), "expecting %qs but %qs found", + get_token_description (token_id), + t->get_token_description ()); + add_error (std::move (error)); + + return const_TokenPtr (); + } +} + +// Skips all tokens until EOF or }. Don't use. +template +void +Parser::skip_after_end () +{ + const_TokenPtr t = lexer.peek_token (); + + while (t->get_id () != END_OF_FILE && t->get_id () != RIGHT_CURLY) + { + lexer.skip_token (); + t = lexer.peek_token (); + } + + if (t->get_id () == RIGHT_CURLY) + { + lexer.skip_token (); + } +} + +/* A slightly more aware error-handler that skips all tokens until it reaches + * the end of the block scope (i.e. when left curly brackets = right curly + * brackets). Note: assumes currently in the middle of a block. Use + * skip_after_next_block to skip based on the assumption that the block + * has not been entered yet. */ +template +void +Parser::skip_after_end_block () +{ + const_TokenPtr t = lexer.peek_token (); + int curly_count = 1; + + while (curly_count > 0 && t->get_id () != END_OF_FILE) + { + switch (t->get_id ()) + { + case LEFT_CURLY: + curly_count++; + break; + case RIGHT_CURLY: + curly_count--; + break; + default: + break; + } + lexer.skip_token (); + t = lexer.peek_token (); + } +} + +/* Skips tokens until the end of the next block. i.e. assumes that the block + * has not been entered yet. */ +template +void +Parser::skip_after_next_block () +{ + const_TokenPtr t = lexer.peek_token (); + + // initial loop - skip until EOF if no left curlies encountered + while (t->get_id () != END_OF_FILE && t->get_id () != LEFT_CURLY) + { + lexer.skip_token (); + + t = lexer.peek_token (); + } + + // if next token is left, skip it and then skip after the block ends + if (t->get_id () == LEFT_CURLY) + { + lexer.skip_token (); + + skip_after_end_block (); + } + // otherwise, do nothing as EOF +} + +/* Skips all tokens until ] (the end of an attribute) - does not skip the ] + * (as designed for attribute body use) */ +template +void +Parser::skip_after_end_attribute () +{ + const_TokenPtr t = lexer.peek_token (); + + while (t->get_id () != RIGHT_SQUARE) + { + lexer.skip_token (); + t = lexer.peek_token (); + } + + // Don't skip the RIGHT_SQUARE token +} + +/* Pratt parser impl of parse_expr. FIXME: this is only provisional and + * probably will be changed. + * FIXME: this may only parse expressions without blocks as they are the only + * expressions to have precedence? */ +template +std::unique_ptr +Parser::parse_expr (int right_binding_power, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions) +{ + const_TokenPtr current_token = lexer.peek_token (); + // Special hack because we are allowed to return nullptr, in that case we + // don't want to skip the token, since we don't actually parse it. But if + // null isn't allowed it indicates an error, and we want to skip past that. + // So return early if it is one of the tokens that ends an expression + // (or at least cannot start a new expression). + if (restrictions.expr_can_be_null) + { + TokenId id = current_token->get_id (); + if (id == SEMICOLON || id == RIGHT_PAREN || id == RIGHT_CURLY + || id == RIGHT_SQUARE) + return nullptr; + } + lexer.skip_token (); + + // parse null denotation (unary part of expression) + std::unique_ptr expr + = null_denotation (current_token, {}, restrictions); + + if (expr == nullptr) + { + // DEBUG + rust_debug ("null denotation is null; returning null for parse_expr"); + return nullptr; + } + + // stop parsing if find lower priority token - parse higher priority first + while (right_binding_power < left_binding_power (lexer.peek_token ())) + { + current_token = lexer.peek_token (); + lexer.skip_token (); + + expr = left_denotation (current_token, std::move (expr), + std::move (outer_attrs), restrictions); + + if (expr == nullptr) + { + // DEBUG + rust_debug ("left denotation is null; returning null for parse_expr"); + + return nullptr; + } + } + + return expr; +} + +// Parse expression with lowest left binding power. +template +std::unique_ptr +Parser::parse_expr (AST::AttrVec outer_attrs, + ParseRestrictions restrictions) +{ + return parse_expr (LBP_LOWEST, std::move (outer_attrs), restrictions); +} + +/* Determines action to take when finding token at beginning of expression. + * FIXME: this may only apply to precedence-capable expressions (which are all + * expressions without blocks), so make return type ExprWithoutBlock? It would + * simplify stuff. */ +template +std::unique_ptr +Parser::null_denotation (const_TokenPtr tok, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions) +{ + /* note: tok is previous character in input stream, not current one, as + * parse_expr skips it before passing it in */ + + /* as a Pratt parser (which works by decomposing expressions into a null + * denotation and then a left denotation), null denotations handle primaries + * and unary operands (but only prefix unary operands) */ + + switch (tok->get_id ()) + { + case IDENTIFIER: { + // DEBUG + rust_debug ("beginning null denotation identifier handling"); + + /* best option: parse as path, then extract identifier, macro, + * struct/enum, or just path info from it */ + AST::PathInExpression path = parse_path_in_expression_pratt (tok); + + // DEBUG: + rust_debug ("finished null denotation identifier path parsing - " + "next is branching"); + + // branch on next token + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case EXCLAM: + // macro + return parse_macro_invocation_partial (std::move (path), + std::move (outer_attrs), + restrictions); + case LEFT_CURLY: { + bool not_a_block + = lexer.peek_token (1)->get_id () == IDENTIFIER + && (lexer.peek_token (2)->get_id () == COMMA + || (lexer.peek_token (2)->get_id () == COLON + && (lexer.peek_token (4)->get_id () == COMMA + || !can_tok_start_type ( + lexer.peek_token (3)->get_id ())))); + + /* definitely not a block: + * path '{' ident ',' + * path '{' ident ':' [anything] ',' + * path '{' ident ':' [not a type] + * otherwise, assume block expr and thus path */ + // DEBUG + rust_debug ("values of lookahead: '%s' '%s' '%s' '%s' ", + lexer.peek_token (1)->get_token_description (), + lexer.peek_token (2)->get_token_description (), + lexer.peek_token (3)->get_token_description (), + lexer.peek_token (4)->get_token_description ()); + + rust_debug ("can be struct expr: '%s', not a block: '%s'", + restrictions.can_be_struct_expr ? "true" : "false", + not_a_block ? "true" : "false"); + + // struct/enum expr struct + if (!restrictions.can_be_struct_expr && !not_a_block) + { + // HACK: add outer attrs to path + path.set_outer_attrs (std::move (outer_attrs)); + return std::unique_ptr ( + new AST::PathInExpression (std::move (path))); + } + return parse_struct_expr_struct_partial (std::move (path), + std::move (outer_attrs)); + } + case LEFT_PAREN: + // struct/enum expr tuple + if (!restrictions.can_be_struct_expr) + { + // HACK: add outer attrs to path + path.set_outer_attrs (std::move (outer_attrs)); + return std::unique_ptr ( + new AST::PathInExpression (std::move (path))); + } + return parse_struct_expr_tuple_partial (std::move (path), + std::move (outer_attrs)); + default: + // assume path is returned if not single segment + if (path.is_single_segment ()) + { + // have to return an identifier expression or something, idk + /* HACK: may have to become permanent, but this is my current + * identifier expression */ + return std::unique_ptr ( + new AST::IdentifierExpr (tok->get_str (), {}, + tok->get_locus ())); + } + // HACK: add outer attrs to path + path.set_outer_attrs (std::move (outer_attrs)); + return std::unique_ptr ( + new AST::PathInExpression (std::move (path))); + } + gcc_unreachable (); + } + /* FIXME: delegate to parse_literal_expr instead? would have to rejig + * tokens and whatever. */ + /* FIXME: could also be path expression (and hence macro expression, + * struct/enum expr) */ + case LEFT_ANGLE: { + // qualified path + // HACK: add outer attrs to path + AST::QualifiedPathInExpression path + = parse_qualified_path_in_expression (tok->get_locus ()); + path.set_outer_attrs (std::move (outer_attrs)); + return std::unique_ptr ( + new AST::QualifiedPathInExpression (std::move (path))); + } + // FIXME: for literal exprs, should outer attrs be passed in or just + // ignored? + case INT_LITERAL: + // we should check the range, but ignore for now + // encode as int? + return std::unique_ptr ( + new AST::LiteralExpr (tok->get_str (), AST::Literal::INT, + tok->get_type_hint (), {}, tok->get_locus ())); + case FLOAT_LITERAL: + // encode as float? + return std::unique_ptr ( + new AST::LiteralExpr (tok->get_str (), AST::Literal::FLOAT, + tok->get_type_hint (), {}, tok->get_locus ())); + case STRING_LITERAL: + return std::unique_ptr ( + new AST::LiteralExpr (tok->get_str (), AST::Literal::STRING, + tok->get_type_hint (), {}, tok->get_locus ())); + case BYTE_STRING_LITERAL: + return std::unique_ptr ( + new AST::LiteralExpr (tok->get_str (), AST::Literal::BYTE_STRING, + tok->get_type_hint (), {}, tok->get_locus ())); + case CHAR_LITERAL: + return std::unique_ptr ( + new AST::LiteralExpr (tok->get_str (), AST::Literal::CHAR, + tok->get_type_hint (), {}, tok->get_locus ())); + case BYTE_CHAR_LITERAL: + return std::unique_ptr ( + new AST::LiteralExpr (tok->get_str (), AST::Literal::BYTE, + tok->get_type_hint (), {}, tok->get_locus ())); + case TRUE_LITERAL: + return std::unique_ptr ( + new AST::LiteralExpr ("true", AST::Literal::BOOL, tok->get_type_hint (), + {}, tok->get_locus ())); + case FALSE_LITERAL: + return std::unique_ptr ( + new AST::LiteralExpr ("false", AST::Literal::BOOL, + tok->get_type_hint (), {}, tok->get_locus ())); + case LEFT_PAREN: + return parse_grouped_or_tuple_expr (std::move (outer_attrs), + tok->get_locus ()); + + /*case PLUS: { // unary plus operator + // invoke parse_expr recursively with appropriate priority, etc. for + below AST::Expr* expr = parse_expr(LBP_UNARY_PLUS); + + if (expr == nullptr) + return nullptr; + // can only apply to integer and float expressions + if (expr->get_type() != integer_type_node || expr->get_type() != + float_type_node) { rust_error_at(tok->get_locus(), "operand of unary + plus must be int or float but it is %s", print_type(expr->get_type())); + return nullptr; + } + + return Tree(expr, tok->get_locus()); + }*/ + // Rust has no unary plus operator + case MINUS: { // unary minus + ParseRestrictions entered_from_unary; + entered_from_unary.entered_from_unary = true; + if (!restrictions.can_be_struct_expr) + entered_from_unary.can_be_struct_expr = false; + std::unique_ptr expr + = parse_expr (LBP_UNARY_MINUS, {}, entered_from_unary); + + if (expr == nullptr) + return nullptr; + // can only apply to integer and float expressions + /*if (expr.get_type() != integer_type_node || expr.get_type() != + float_type_node) { rust_error_at(tok->get_locus(), "operand of unary + minus must be int or float but it is %s", + print_type(expr.get_type())); return Tree::error(); + }*/ + /* FIXME: when implemented the "get type" method on expr, ensure it is + * int or float type (except unsigned int). Actually, this would + * probably have to be done in semantic analysis (as type checking). + */ + + /* FIXME: allow outer attributes on these expressions by having an + * outer attrs parameter in function*/ + return std::unique_ptr ( + new AST::NegationExpr (std::move (expr), NegationOperator::NEGATE, + std::move (outer_attrs), tok->get_locus ())); + } + case EXCLAM: { // logical or bitwise not + ParseRestrictions entered_from_unary; + entered_from_unary.entered_from_unary = true; + if (!restrictions.can_be_struct_expr) + entered_from_unary.can_be_struct_expr = false; + std::unique_ptr expr + = parse_expr (LBP_UNARY_EXCLAM, {}, entered_from_unary); + + if (expr == nullptr) + return nullptr; + // can only apply to boolean expressions + /*if (expr.get_type() != boolean_type_node) { + rust_error_at(tok->get_locus(), + "operand of logical not must be a boolean but it is %s", + print_type(expr.get_type())); + return Tree::error(); + }*/ + /* FIXME: type checking for boolean or integer expressions in semantic + * analysis */ + + // FIXME: allow outer attributes on these expressions + return std::unique_ptr ( + new AST::NegationExpr (std::move (expr), NegationOperator::NOT, + std::move (outer_attrs), tok->get_locus ())); + } + case ASTERISK: { + /* pointer dereference only - HACK: as struct expressions should + * always be value expressions, cannot be dereferenced */ + ParseRestrictions entered_from_unary; + entered_from_unary.entered_from_unary = true; + entered_from_unary.can_be_struct_expr = false; + std::unique_ptr expr + = parse_expr (LBP_UNARY_ASTERISK, {}, entered_from_unary); + // FIXME: allow outer attributes on expression + return std::unique_ptr ( + new AST::DereferenceExpr (std::move (expr), std::move (outer_attrs), + tok->get_locus ())); + } + case AMP: { + // (single) "borrow" expression - shared (mutable) or immutable + std::unique_ptr expr = nullptr; + bool is_mut_borrow = false; + + /* HACK: as struct expressions should always be value expressions, + * cannot be referenced */ + ParseRestrictions entered_from_unary; + entered_from_unary.entered_from_unary = true; + entered_from_unary.can_be_struct_expr = false; + + if (lexer.peek_token ()->get_id () == MUT) + { + lexer.skip_token (); + expr = parse_expr (LBP_UNARY_AMP_MUT, {}, entered_from_unary); + is_mut_borrow = true; + } + else + { + expr = parse_expr (LBP_UNARY_AMP, {}, entered_from_unary); + } + + // FIXME: allow outer attributes on expression + return std::unique_ptr ( + new AST::BorrowExpr (std::move (expr), is_mut_borrow, false, + std::move (outer_attrs), tok->get_locus ())); + } + case LOGICAL_AND: { + // (double) "borrow" expression - shared (mutable) or immutable + std::unique_ptr expr = nullptr; + bool is_mut_borrow = false; + + ParseRestrictions entered_from_unary; + entered_from_unary.entered_from_unary = true; + + if (lexer.peek_token ()->get_id () == MUT) + { + lexer.skip_token (); + expr = parse_expr (LBP_UNARY_AMP_MUT, {}, entered_from_unary); + is_mut_borrow = true; + } + else + { + expr = parse_expr (LBP_UNARY_AMP, {}, entered_from_unary); + } + + // FIXME: allow outer attributes on expression + return std::unique_ptr ( + new AST::BorrowExpr (std::move (expr), is_mut_borrow, true, + std::move (outer_attrs), tok->get_locus ())); + } + case SCOPE_RESOLUTION: { + // TODO: fix: this is for global paths, i.e. std::string::whatever + Error error (tok->get_locus (), + "found null denotation scope resolution operator, and " + "have not written handling for it"); + add_error (std::move (error)); + + return nullptr; + } + case SELF: + case SELF_ALIAS: + case DOLLAR_SIGN: + case CRATE: + case SUPER: { + // DEBUG + rust_debug ("beginning null denotation " + "self/self-alias/dollar/crate/super handling"); + + /* best option: parse as path, then extract identifier, macro, + * struct/enum, or just path info from it */ + AST::PathInExpression path = parse_path_in_expression_pratt (tok); + + // DEBUG + rust_debug ( + "just finished parsing path (going to disambiguate) - peeked " + "token is '%s'", + lexer.peek_token ()->get_token_description ()); + + // HACK: always make "self" by itself a path (regardless of next + // tokens) + if (tok->get_id () == SELF && path.is_single_segment ()) + { + // HACK: add outer attrs to path + path.set_outer_attrs (std::move (outer_attrs)); + return std::unique_ptr ( + new AST::PathInExpression (std::move (path))); + } + + // branch on next token + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case EXCLAM: + // macro + return parse_macro_invocation_partial (std::move (path), + std::move (outer_attrs)); + case LEFT_CURLY: { + // struct/enum expr struct + rust_debug ("can_be_struct_expr: %s", + restrictions.can_be_struct_expr ? "true" : "false"); + + bool not_a_block + = lexer.peek_token (1)->get_id () == IDENTIFIER + && (lexer.peek_token (2)->get_id () == COMMA + || (lexer.peek_token (2)->get_id () == COLON + && (lexer.peek_token (4)->get_id () == COMMA + || !can_tok_start_type ( + lexer.peek_token (3)->get_id ())))); + + if (!restrictions.can_be_struct_expr && !not_a_block) + { + // assume path is returned + // HACK: add outer attributes to path + path.set_outer_attrs (std::move (outer_attrs)); + return std::unique_ptr ( + new AST::PathInExpression (std::move (path))); + } + return parse_struct_expr_struct_partial (std::move (path), + std::move (outer_attrs)); + } + case LEFT_PAREN: + // struct/enum expr tuple + if (!restrictions.can_be_struct_expr) + { + // assume path is returned + // HACK: add outer attributes to path + path.set_outer_attrs (std::move (outer_attrs)); + return std::unique_ptr ( + new AST::PathInExpression (std::move (path))); + } + return parse_struct_expr_tuple_partial (std::move (path), + std::move (outer_attrs)); + default: + // assume path is returned + // HACK: add outer attributes to path + path.set_outer_attrs (std::move (outer_attrs)); + return std::unique_ptr ( + new AST::PathInExpression (std::move (path))); + } + gcc_unreachable (); + } + case OR: + case PIPE: + case MOVE: + // closure expression + return parse_closure_expr_pratt (tok, std::move (outer_attrs)); + case DOT_DOT: + // either "range to" or "range full" expressions + return parse_nud_range_exclusive_expr (tok, std::move (outer_attrs)); + case DOT_DOT_EQ: + // range to inclusive expr + return parse_range_to_inclusive_expr (tok, std::move (outer_attrs)); + case RETURN_TOK: + // FIXME: is this really a null denotation expression? + return parse_return_expr (std::move (outer_attrs), tok->get_locus ()); + case BREAK: + // FIXME: is this really a null denotation expression? + return parse_break_expr (std::move (outer_attrs), tok->get_locus ()); + case CONTINUE: + return parse_continue_expr (std::move (outer_attrs), tok->get_locus ()); + case LEFT_CURLY: + // ok - this is an expression with block for once. + return parse_block_expr (std::move (outer_attrs), tok->get_locus ()); + case IF: + // if or if let, so more lookahead to find out + if (lexer.peek_token (1)->get_id () == LET) + { + // if let expr + return parse_if_let_expr (std::move (outer_attrs), tok->get_locus ()); + } + else + { + // if expr + return parse_if_expr (std::move (outer_attrs), tok->get_locus ()); + } + case LOOP: + return parse_loop_expr (std::move (outer_attrs), AST::LoopLabel::error (), + tok->get_locus ()); + case WHILE: + return parse_while_loop_expr (std::move (outer_attrs), + AST::LoopLabel::error (), + tok->get_locus ()); + case MATCH_TOK: + // also an expression with block + return parse_match_expr (std::move (outer_attrs), tok->get_locus ()); + case LEFT_SQUARE: + // array definition expr (not indexing) + return parse_array_expr (std::move (outer_attrs), tok->get_locus ()); + case UNSAFE: + return parse_unsafe_block_expr (std::move (outer_attrs), + tok->get_locus ()); + default: + if (!restrictions.expr_can_be_null) + add_error (Error (tok->get_locus (), + "found unexpected token %qs in null denotation", + tok->get_token_description ())); + return nullptr; + } +} + +/* Called for each token that can appear in infix (between) position. Can be + * operators or other punctuation. Returns a function pointer to member + * function that implements the left denotation for the token given. */ +template +std::unique_ptr +Parser::left_denotation (const_TokenPtr tok, + std::unique_ptr left, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions) +{ + // Token passed in has already been skipped, so peek gives "next" token + switch (tok->get_id ()) + { + // FIXME: allow for outer attributes to be applied + case QUESTION_MARK: { + Location left_locus = left->get_locus (); + // error propagation expression - unary postfix + return std::unique_ptr ( + new AST::ErrorPropagationExpr (std::move (left), + std::move (outer_attrs), left_locus)); + } + case PLUS: + // sum expression - binary infix + /*return parse_binary_plus_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_arithmetic_or_logical_expr (tok, std::move (left), + std::move (outer_attrs), + ArithmeticOrLogicalOperator::ADD, + restrictions); + case MINUS: + // difference expression - binary infix + /*return parse_binary_minus_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions);*/ + return parse_arithmetic_or_logical_expr ( + tok, std::move (left), std::move (outer_attrs), + ArithmeticOrLogicalOperator::SUBTRACT, restrictions); + case ASTERISK: + // product expression - binary infix + /*return parse_binary_mult_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_arithmetic_or_logical_expr ( + tok, std::move (left), std::move (outer_attrs), + ArithmeticOrLogicalOperator::MULTIPLY, restrictions); + case DIV: + // quotient expression - binary infix + /*return parse_binary_div_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_arithmetic_or_logical_expr ( + tok, std::move (left), std::move (outer_attrs), + ArithmeticOrLogicalOperator::DIVIDE, restrictions); + case PERCENT: + // modulo expression - binary infix + /*return parse_binary_mod_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_arithmetic_or_logical_expr ( + tok, std::move (left), std::move (outer_attrs), + ArithmeticOrLogicalOperator::MODULUS, restrictions); + case AMP: + // logical or bitwise and expression - binary infix + /*return parse_bitwise_and_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_arithmetic_or_logical_expr ( + tok, std::move (left), std::move (outer_attrs), + ArithmeticOrLogicalOperator::BITWISE_AND, restrictions); + case PIPE: + // logical or bitwise or expression - binary infix + /*return parse_bitwise_or_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_arithmetic_or_logical_expr ( + tok, std::move (left), std::move (outer_attrs), + ArithmeticOrLogicalOperator::BITWISE_OR, restrictions); + case CARET: + // logical or bitwise xor expression - binary infix + /*return parse_bitwise_xor_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_arithmetic_or_logical_expr ( + tok, std::move (left), std::move (outer_attrs), + ArithmeticOrLogicalOperator::BITWISE_XOR, restrictions); + case LEFT_SHIFT: + // left shift expression - binary infix + /*return parse_left_shift_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_arithmetic_or_logical_expr ( + tok, std::move (left), std::move (outer_attrs), + ArithmeticOrLogicalOperator::LEFT_SHIFT, restrictions); + case RIGHT_SHIFT: + // right shift expression - binary infix + /*return parse_right_shift_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_arithmetic_or_logical_expr ( + tok, std::move (left), std::move (outer_attrs), + ArithmeticOrLogicalOperator::RIGHT_SHIFT, restrictions); + case EQUAL_EQUAL: + // equal to expression - binary infix (no associativity) + /*return parse_binary_equal_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions);*/ + return parse_comparison_expr (tok, std::move (left), + std::move (outer_attrs), + ComparisonOperator::EQUAL, restrictions); + case NOT_EQUAL: + // not equal to expression - binary infix (no associativity) + /*return parse_binary_not_equal_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions);*/ + return parse_comparison_expr (tok, std::move (left), + std::move (outer_attrs), + ComparisonOperator::NOT_EQUAL, + restrictions); + case RIGHT_ANGLE: + // greater than expression - binary infix (no associativity) + /*return parse_binary_greater_than_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions);*/ + return parse_comparison_expr (tok, std::move (left), + std::move (outer_attrs), + ComparisonOperator::GREATER_THAN, + restrictions); + case LEFT_ANGLE: + // less than expression - binary infix (no associativity) + /*return parse_binary_less_than_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions);*/ + return parse_comparison_expr (tok, std::move (left), + std::move (outer_attrs), + ComparisonOperator::LESS_THAN, + restrictions); + case GREATER_OR_EQUAL: + // greater than or equal to expression - binary infix (no associativity) + /*return parse_binary_greater_equal_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions);*/ + return parse_comparison_expr (tok, std::move (left), + std::move (outer_attrs), + ComparisonOperator::GREATER_OR_EQUAL, + restrictions); + case LESS_OR_EQUAL: + // less than or equal to expression - binary infix (no associativity) + /*return parse_binary_less_equal_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions);*/ + return parse_comparison_expr (tok, std::move (left), + std::move (outer_attrs), + ComparisonOperator::LESS_OR_EQUAL, + restrictions); + case OR: + // lazy logical or expression - binary infix + return parse_lazy_or_expr (tok, std::move (left), std::move (outer_attrs), + restrictions); + case LOGICAL_AND: + // lazy logical and expression - binary infix + return parse_lazy_and_expr (tok, std::move (left), + std::move (outer_attrs), restrictions); + case AS: + /* type cast expression - kind of binary infix (RHS is actually a + * TypeNoBounds) */ + return parse_type_cast_expr (tok, std::move (left), + std::move (outer_attrs), restrictions); + case EQUAL: + // assignment expression - binary infix (note right-to-left + // associativity) + return parse_assig_expr (tok, std::move (left), std::move (outer_attrs), + restrictions); + case PLUS_EQ: + /* plus-assignment expression - binary infix (note right-to-left + * associativity) */ + /*return parse_plus_assig_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_compound_assignment_expr (tok, std::move (left), + std::move (outer_attrs), + CompoundAssignmentOperator::ADD, + restrictions); + case MINUS_EQ: + /* minus-assignment expression - binary infix (note right-to-left + * associativity) */ + /*return parse_minus_assig_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_compound_assignment_expr ( + tok, std::move (left), std::move (outer_attrs), + CompoundAssignmentOperator::SUBTRACT, restrictions); + case ASTERISK_EQ: + /* multiply-assignment expression - binary infix (note right-to-left + * associativity) */ + /*return parse_mult_assig_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_compound_assignment_expr ( + tok, std::move (left), std::move (outer_attrs), + CompoundAssignmentOperator::MULTIPLY, restrictions); + case DIV_EQ: + /* division-assignment expression - binary infix (note right-to-left + * associativity) */ + /*return parse_div_assig_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_compound_assignment_expr (tok, std::move (left), + std::move (outer_attrs), + CompoundAssignmentOperator::DIVIDE, + restrictions); + case PERCENT_EQ: + /* modulo-assignment expression - binary infix (note right-to-left + * associativity) */ + /*return parse_mod_assig_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_compound_assignment_expr ( + tok, std::move (left), std::move (outer_attrs), + CompoundAssignmentOperator::MODULUS, restrictions); + case AMP_EQ: + /* bitwise and-assignment expression - binary infix (note right-to-left + * associativity) */ + /*return parse_and_assig_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_compound_assignment_expr ( + tok, std::move (left), std::move (outer_attrs), + CompoundAssignmentOperator::BITWISE_AND, restrictions); + case PIPE_EQ: + /* bitwise or-assignment expression - binary infix (note right-to-left + * associativity) */ + /*return parse_or_assig_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_compound_assignment_expr ( + tok, std::move (left), std::move (outer_attrs), + CompoundAssignmentOperator::BITWISE_OR, restrictions); + case CARET_EQ: + /* bitwise xor-assignment expression - binary infix (note right-to-left + * associativity) */ + /*return parse_xor_assig_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_compound_assignment_expr ( + tok, std::move (left), std::move (outer_attrs), + CompoundAssignmentOperator::BITWISE_XOR, restrictions); + case LEFT_SHIFT_EQ: + /* left shift-assignment expression - binary infix (note right-to-left + * associativity) */ + /*return parse_left_shift_assig_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions);*/ + return parse_compound_assignment_expr ( + tok, std::move (left), std::move (outer_attrs), + CompoundAssignmentOperator::LEFT_SHIFT, restrictions); + case RIGHT_SHIFT_EQ: + /* right shift-assignment expression - binary infix (note right-to-left + * associativity) */ + /*return parse_right_shift_assig_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions);*/ + return parse_compound_assignment_expr ( + tok, std::move (left), std::move (outer_attrs), + CompoundAssignmentOperator::RIGHT_SHIFT, restrictions); + case DOT_DOT: + /* range exclusive expression - binary infix (no associativity) + * either "range" or "range from" */ + return parse_led_range_exclusive_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions); + case DOT_DOT_EQ: + /* range inclusive expression - binary infix (no associativity) + * unambiguously RangeInclusiveExpr */ + return parse_range_inclusive_expr (tok, std::move (left), + std::move (outer_attrs), restrictions); + case SCOPE_RESOLUTION: + // path expression - binary infix? FIXME should this even be parsed + // here? + add_error ( + Error (tok->get_locus (), + "found scope resolution operator in left denotation " + "function - this should probably be handled elsewhere")); + + return nullptr; + case DOT: { + /* field expression or method call - relies on parentheses after next + * identifier or await if token after is "await" (unary postfix) or + * tuple index if token after is a decimal int literal */ + + const_TokenPtr next_tok = lexer.peek_token (); + if (next_tok->get_id () == IDENTIFIER + && next_tok->get_str () == "await") + { + // await expression + return parse_await_expr (tok, std::move (left), + std::move (outer_attrs)); + } + else if (next_tok->get_id () == INT_LITERAL) + { + // tuple index expression - TODO check for decimal int literal + return parse_tuple_index_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions); + } + else if (next_tok->get_id () == IDENTIFIER + && lexer.peek_token (1)->get_id () != LEFT_PAREN + && lexer.peek_token (1)->get_id () != SCOPE_RESOLUTION) + { + /* field expression (or should be) - FIXME: scope resolution right + * after identifier should always be method, I'm pretty sure */ + return parse_field_access_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions); + } + else + { + // method call (probably) + return parse_method_call_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions); + } + } + case LEFT_PAREN: + // function call - method call is based on dot notation first + return parse_function_call_expr (tok, std::move (left), + std::move (outer_attrs), restrictions); + case LEFT_SQUARE: + // array or slice index expression (pseudo binary infix) + return parse_index_expr (tok, std::move (left), std::move (outer_attrs), + restrictions); + case FLOAT_LITERAL: + /* HACK: get around lexer mis-identifying '.0' or '.1' or whatever as a + * float literal - TODO does this happen anymore? It shouldn't. */ + return parse_tuple_index_expr_float (tok, std::move (left), + std::move (outer_attrs), + restrictions); + default: + add_error (Error (tok->get_locus (), + "found unexpected token %qs in left denotation", + tok->get_token_description ())); + + return nullptr; + } +} + +/* Returns the left binding power for the given ArithmeticOrLogicalExpr type. + * TODO make constexpr? Would that even do anything useful? */ +inline binding_powers +get_lbp_for_arithmetic_or_logical_expr ( + AST::ArithmeticOrLogicalExpr::ExprType expr_type) +{ + switch (expr_type) + { + case ArithmeticOrLogicalOperator::ADD: + return LBP_PLUS; + case ArithmeticOrLogicalOperator::SUBTRACT: + return LBP_MINUS; + case ArithmeticOrLogicalOperator::MULTIPLY: + return LBP_MUL; + case ArithmeticOrLogicalOperator::DIVIDE: + return LBP_DIV; + case ArithmeticOrLogicalOperator::MODULUS: + return LBP_MOD; + case ArithmeticOrLogicalOperator::BITWISE_AND: + return LBP_AMP; + case ArithmeticOrLogicalOperator::BITWISE_OR: + return LBP_PIPE; + case ArithmeticOrLogicalOperator::BITWISE_XOR: + return LBP_CARET; + case ArithmeticOrLogicalOperator::LEFT_SHIFT: + return LBP_L_SHIFT; + case ArithmeticOrLogicalOperator::RIGHT_SHIFT: + return LBP_R_SHIFT; + default: + // WTF? should not happen, this is an error + gcc_unreachable (); + + return LBP_PLUS; + } +} + +// Parses an arithmetic or logical expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_arithmetic_or_logical_expr ( + const_TokenPtr, std::unique_ptr left, AST::AttrVec, + AST::ArithmeticOrLogicalExpr::ExprType expr_type, + ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (get_lbp_for_arithmetic_or_logical_expr (expr_type), + AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), + expr_type, locus)); +} + +// Parses a binary addition expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_binary_plus_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_PLUS, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), + ArithmeticOrLogicalOperator::ADD, locus)); +} + +// Parses a binary subtraction expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_binary_minus_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_MINUS, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), + ArithmeticOrLogicalOperator::SUBTRACT, + locus)); +} + +// Parses a binary multiplication expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_binary_mult_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_MUL, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), + ArithmeticOrLogicalOperator::MULTIPLY, + locus)); +} + +// Parses a binary division expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_binary_div_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_DIV, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), + ArithmeticOrLogicalOperator::DIVIDE, + locus)); +} + +// Parses a binary modulo expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_binary_mod_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_MOD, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), + ArithmeticOrLogicalOperator::MODULUS, + locus)); +} + +/* Parses a binary bitwise (or eager logical) and expression (with Pratt + * parsing). */ +template +std::unique_ptr +Parser::parse_bitwise_and_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_AMP, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), + ArithmeticOrLogicalOperator::BITWISE_AND, + locus)); +} + +/* Parses a binary bitwise (or eager logical) or expression (with Pratt + * parsing). */ +template +std::unique_ptr +Parser::parse_bitwise_or_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_PIPE, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), + ArithmeticOrLogicalOperator::BITWISE_OR, + locus)); +} + +/* Parses a binary bitwise (or eager logical) xor expression (with Pratt + * parsing). */ +template +std::unique_ptr +Parser::parse_bitwise_xor_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_CARET, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), + ArithmeticOrLogicalOperator::BITWISE_XOR, + locus)); +} + +// Parses a binary left shift expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_left_shift_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_L_SHIFT, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), + ArithmeticOrLogicalOperator::LEFT_SHIFT, + locus)); +} + +// Parses a binary right shift expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_right_shift_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_R_SHIFT, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), + ArithmeticOrLogicalOperator::RIGHT_SHIFT, + locus)); +} + +/* Returns the left binding power for the given ComparisonExpr type. + * TODO make constexpr? Would that even do anything useful? */ +inline binding_powers +get_lbp_for_comparison_expr (AST::ComparisonExpr::ExprType expr_type) +{ + switch (expr_type) + { + case ComparisonOperator::EQUAL: + return LBP_EQUAL; + case ComparisonOperator::NOT_EQUAL: + return LBP_NOT_EQUAL; + case ComparisonOperator::GREATER_THAN: + return LBP_GREATER_THAN; + case ComparisonOperator::LESS_THAN: + return LBP_SMALLER_THAN; + case ComparisonOperator::GREATER_OR_EQUAL: + return LBP_GREATER_EQUAL; + case ComparisonOperator::LESS_OR_EQUAL: + return LBP_SMALLER_EQUAL; + default: + // WTF? should not happen, this is an error + gcc_unreachable (); + + return LBP_EQUAL; + } +} + +/* Parses a ComparisonExpr of given type and LBP. TODO find a way to only + * specify one and have the other looked up - e.g. specify ExprType and + * binding power is looked up? */ +template +std::unique_ptr +Parser::parse_comparison_expr ( + const_TokenPtr, std::unique_ptr left, AST::AttrVec, + AST::ComparisonExpr::ExprType expr_type, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (get_lbp_for_comparison_expr (expr_type), AST::AttrVec (), + restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ComparisonExpr (std::move (left), std::move (right), expr_type, + locus)); +} + +// Parses a binary equal to expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_binary_equal_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_EQUAL, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ComparisonExpr (std::move (left), std::move (right), + ComparisonOperator::EQUAL, locus)); +} + +// Parses a binary not equal to expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_binary_not_equal_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_NOT_EQUAL, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ComparisonExpr (std::move (left), std::move (right), + ComparisonOperator::NOT_EQUAL, locus)); +} + +// Parses a binary greater than expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_binary_greater_than_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_GREATER_THAN, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ComparisonExpr (std::move (left), std::move (right), + ComparisonOperator::GREATER_THAN, locus)); +} + +// Parses a binary less than expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_binary_less_than_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_SMALLER_THAN, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ComparisonExpr (std::move (left), std::move (right), + ComparisonOperator::LESS_THAN, locus)); +} + +// Parses a binary greater than or equal to expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_binary_greater_equal_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_GREATER_EQUAL, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ComparisonExpr (std::move (left), std::move (right), + ComparisonOperator::GREATER_OR_EQUAL, locus)); +} + +// Parses a binary less than or equal to expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_binary_less_equal_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_SMALLER_EQUAL, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ComparisonExpr (std::move (left), std::move (right), + ComparisonOperator::LESS_OR_EQUAL, locus)); +} + +// Parses a binary lazy boolean or expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_lazy_or_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_LOGICAL_OR, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::LazyBooleanExpr (std::move (left), std::move (right), + LazyBooleanOperator::LOGICAL_OR, locus)); +} + +// Parses a binary lazy boolean and expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_lazy_and_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_LOGICAL_AND, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::LazyBooleanExpr (std::move (left), std::move (right), + LazyBooleanOperator::LOGICAL_AND, locus)); +} + +// Parses a pseudo-binary infix type cast expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_type_cast_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr expr_to_cast, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, + ParseRestrictions restrictions ATTRIBUTE_UNUSED) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr type = parse_type_no_bounds (); + if (type == nullptr) + return nullptr; + // FIXME: how do I get precedence put in here? + + // TODO: check types. actually, do so during semantic analysis + Location locus = expr_to_cast->get_locus (); + + return std::unique_ptr ( + new AST::TypeCastExpr (std::move (expr_to_cast), std::move (type), locus)); +} + +// Parses a binary assignment expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_assig_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_ASSIG - 1, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::AssignmentExpr (std::move (left), std::move (right), + std::move (outer_attrs), locus)); +} + +/* Returns the left binding power for the given CompoundAssignmentExpr type. + * TODO make constexpr? Would that even do anything useful? */ +inline binding_powers +get_lbp_for_compound_assignment_expr ( + AST::CompoundAssignmentExpr::ExprType expr_type) +{ + switch (expr_type) + { + case CompoundAssignmentOperator::ADD: + return LBP_PLUS; + case CompoundAssignmentOperator::SUBTRACT: + return LBP_MINUS; + case CompoundAssignmentOperator::MULTIPLY: + return LBP_MUL; + case CompoundAssignmentOperator::DIVIDE: + return LBP_DIV; + case CompoundAssignmentOperator::MODULUS: + return LBP_MOD; + case CompoundAssignmentOperator::BITWISE_AND: + return LBP_AMP; + case CompoundAssignmentOperator::BITWISE_OR: + return LBP_PIPE; + case CompoundAssignmentOperator::BITWISE_XOR: + return LBP_CARET; + case CompoundAssignmentOperator::LEFT_SHIFT: + return LBP_L_SHIFT; + case CompoundAssignmentOperator::RIGHT_SHIFT: + return LBP_R_SHIFT; + default: + // WTF? should not happen, this is an error + gcc_unreachable (); + + return LBP_PLUS; + } +} + +// Parses a compound assignment expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_compound_assignment_expr ( + const_TokenPtr, std::unique_ptr left, AST::AttrVec, + AST::CompoundAssignmentExpr::ExprType expr_type, + ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (get_lbp_for_compound_assignment_expr (expr_type) - 1, + AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::CompoundAssignmentExpr (std::move (left), std::move (right), + expr_type, locus)); +} + +// Parses a binary add-assignment expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_plus_assig_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_PLUS_ASSIG - 1, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::CompoundAssignmentExpr (std::move (left), std::move (right), + CompoundAssignmentOperator::ADD, locus)); +} + +// Parses a binary minus-assignment expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_minus_assig_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_MINUS_ASSIG - 1, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::CompoundAssignmentExpr (std::move (left), std::move (right), + CompoundAssignmentOperator::SUBTRACT, + locus)); +} + +// Parses a binary multiplication-assignment expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_mult_assig_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_MULT_ASSIG - 1, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::CompoundAssignmentExpr (std::move (left), std::move (right), + CompoundAssignmentOperator::MULTIPLY, + locus)); +} + +// Parses a binary division-assignment expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_div_assig_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_DIV_ASSIG - 1, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::CompoundAssignmentExpr (std::move (left), std::move (right), + CompoundAssignmentOperator::DIVIDE, + locus)); +} + +// Parses a binary modulo-assignment expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_mod_assig_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_MOD_ASSIG - 1, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::CompoundAssignmentExpr (std::move (left), std::move (right), + CompoundAssignmentOperator::MODULUS, + locus)); +} + +// Parses a binary and-assignment expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_and_assig_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_AMP_ASSIG - 1, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::CompoundAssignmentExpr (std::move (left), std::move (right), + CompoundAssignmentOperator::BITWISE_AND, + locus)); +} + +// Parses a binary or-assignment expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_or_assig_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_PIPE_ASSIG - 1, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::CompoundAssignmentExpr (std::move (left), std::move (right), + CompoundAssignmentOperator::BITWISE_OR, + locus)); +} + +// Parses a binary xor-assignment expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_xor_assig_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_CARET_ASSIG - 1, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::CompoundAssignmentExpr (std::move (left), std::move (right), + CompoundAssignmentOperator::BITWISE_XOR, + locus)); +} + +// Parses a binary left shift-assignment expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_left_shift_assig_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_L_SHIFT_ASSIG - 1, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::CompoundAssignmentExpr (std::move (left), std::move (right), + CompoundAssignmentOperator::LEFT_SHIFT, + locus)); +} + +// Parses a binary right shift-assignment expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_right_shift_assig_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_R_SHIFT_ASSIG - 1, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::CompoundAssignmentExpr (std::move (left), std::move (right), + CompoundAssignmentOperator::RIGHT_SHIFT, + locus)); +} + +// Parses a postfix unary await expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_await_expr ( + const_TokenPtr tok, std::unique_ptr expr_to_await, + AST::AttrVec outer_attrs) +{ + /* skip "await" identifier (as "." has already been consumed in + * parse_expression) this assumes that the identifier was already identified + * as await */ + if (!skip_token (IDENTIFIER)) + { + Error error (tok->get_locus (), "failed to skip % in await expr " + "- this is probably a deep issue"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + // TODO: check inside async block in semantic analysis + Location locus = expr_to_await->get_locus (); + + return std::unique_ptr ( + new AST::AwaitExpr (std::move (expr_to_await), std::move (outer_attrs), + locus)); +} + +/* Parses an exclusive range ('..') in left denotation position (i.e. + * RangeFromExpr or RangeFromToExpr). */ +template +std::unique_ptr +Parser::parse_led_range_exclusive_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // FIXME: this probably parses expressions accidently or whatever + // try parsing RHS (as tok has already been consumed in parse_expression) + // Can be nullptr, in which case it is a RangeFromExpr, otherwise a + // RangeFromToExpr. + restrictions.expr_can_be_null = true; + std::unique_ptr right + = parse_expr (LBP_DOT_DOT, AST::AttrVec (), restrictions); + + Location locus = left->get_locus (); + + if (right == nullptr) + { + // range from expr + return std::unique_ptr ( + new AST::RangeFromExpr (std::move (left), locus)); + } + else + { + return std::unique_ptr ( + new AST::RangeFromToExpr (std::move (left), std::move (right), locus)); + } + // FIXME: make non-associative +} + +/* Parses an exclusive range ('..') in null denotation position (i.e. + * RangeToExpr or RangeFullExpr). */ +template +std::unique_ptr +Parser::parse_nud_range_exclusive_expr ( + const_TokenPtr tok, AST::AttrVec outer_attrs ATTRIBUTE_UNUSED) +{ + // FIXME: this probably parses expressions accidently or whatever + // try parsing RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right = parse_expr (LBP_DOT_DOT, AST::AttrVec ()); + + Location locus = tok->get_locus (); + + if (right == nullptr) + { + // range from expr + return std::unique_ptr ( + new AST::RangeFullExpr (locus)); + } + else + { + return std::unique_ptr ( + new AST::RangeToExpr (std::move (right), locus)); + } + // FIXME: make non-associative +} + +// Parses a full binary range inclusive expression. +template +std::unique_ptr +Parser::parse_range_inclusive_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_DOT_DOT_EQ, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: make non-associative + + // TODO: check types. actually, do so during semantic analysis + Location locus = left->get_locus (); + + return std::unique_ptr ( + new AST::RangeFromToInclExpr (std::move (left), std::move (right), locus)); +} + +// Parses an inclusive range-to prefix unary expression. +template +std::unique_ptr +Parser::parse_range_to_inclusive_expr ( + const_TokenPtr tok, AST::AttrVec outer_attrs ATTRIBUTE_UNUSED) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right = parse_expr (LBP_DOT_DOT_EQ); + if (right == nullptr) + return nullptr; + // FIXME: make non-associative + + // TODO: check types. actually, do so during semantic analysis + + return std::unique_ptr ( + new AST::RangeToInclExpr (std::move (right), tok->get_locus ())); +} + +// Parses a pseudo-binary infix tuple index expression. +template +std::unique_ptr +Parser::parse_tuple_index_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr tuple_expr, + AST::AttrVec outer_attrs, ParseRestrictions restrictions ATTRIBUTE_UNUSED) +{ + // parse int literal (as token already skipped) + const_TokenPtr index_tok = expect_token (INT_LITERAL); + if (index_tok == nullptr) + { + return nullptr; + } + std::string index = index_tok->get_str (); + + // convert to integer + if (!index_tok->is_pure_decimal ()) + { + Error error (index_tok->get_locus (), + "tuple index should be a pure decimal literal"); + add_error (std::move (error)); + } + int index_int = atoi (index.c_str ()); + + Location locus = tuple_expr->get_locus (); + + return std::unique_ptr ( + new AST::TupleIndexExpr (std::move (tuple_expr), index_int, + std::move (outer_attrs), locus)); +} + +// Parses a pseudo-binary infix array (or slice) index expression. +template +std::unique_ptr +Parser::parse_index_expr ( + const_TokenPtr, std::unique_ptr array_expr, + AST::AttrVec outer_attrs, ParseRestrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + /*std::unique_ptr index_expr + = parse_expr (LBP_ARRAY_REF, AST::AttrVec (), + restrictions);*/ + // TODO: conceptually, should treat [] as brackets, so just parse all expr + std::unique_ptr index_expr = parse_expr (); + if (index_expr == nullptr) + return nullptr; + + // skip ']' at end of array + if (!skip_token (RIGHT_SQUARE)) + { + // skip somewhere? + return nullptr; + } + + // TODO: check types. actually, do so during semantic analysis + Location locus = array_expr->get_locus (); + + return std::unique_ptr ( + new AST::ArrayIndexExpr (std::move (array_expr), std::move (index_expr), + std::move (outer_attrs), locus)); +} + +// Parses a pseudo-binary infix struct field access expression. +template +std::unique_ptr +Parser::parse_field_access_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr struct_expr, + AST::AttrVec outer_attrs, ParseRestrictions restrictions ATTRIBUTE_UNUSED) +{ + /* get field name identifier (assume that this is a field access expr and + * not await, for instance) */ + const_TokenPtr ident_tok = expect_token (IDENTIFIER); + if (ident_tok == nullptr) + return nullptr; + + Identifier ident = ident_tok->get_str (); + + Location locus = struct_expr->get_locus (); + + // TODO: check types. actually, do so during semantic analysis + return std::unique_ptr ( + new AST::FieldAccessExpr (std::move (struct_expr), std::move (ident), + std::move (outer_attrs), locus)); +} + +// Parses a pseudo-binary infix method call expression. +template +std::unique_ptr +Parser::parse_method_call_expr ( + const_TokenPtr tok, std::unique_ptr receiver_expr, + AST::AttrVec outer_attrs, ParseRestrictions) +{ + // parse path expr segment + AST::PathExprSegment segment = parse_path_expr_segment (); + if (segment.is_error ()) + { + Error error (tok->get_locus (), + "failed to parse path expr segment of method call expr"); + add_error (std::move (error)); + + return nullptr; + } + + // skip left parentheses + if (!skip_token (LEFT_PAREN)) + { + return nullptr; + } + + // parse method params (if they exist) + std::vector> params; + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != RIGHT_PAREN) + { + std::unique_ptr param = parse_expr (); + if (param == nullptr) + { + Error error (t->get_locus (), + "failed to parse method param in method call"); + add_error (std::move (error)); + + return nullptr; + } + params.push_back (std::move (param)); + + if (lexer.peek_token ()->get_id () != COMMA) + break; + + lexer.skip_token (); + t = lexer.peek_token (); + } + + // skip right paren + if (!skip_token (RIGHT_PAREN)) + { + return nullptr; + } + + // TODO: check types. actually do so in semantic analysis pass. + Location locus = receiver_expr->get_locus (); + + return std::unique_ptr ( + new AST::MethodCallExpr (std::move (receiver_expr), std::move (segment), + std::move (params), std::move (outer_attrs), + locus)); +} + +// Parses a pseudo-binary infix function call expression. +template +std::unique_ptr +Parser::parse_function_call_expr ( + const_TokenPtr, std::unique_ptr function_expr, + AST::AttrVec outer_attrs, ParseRestrictions) +{ + // parse function params (if they exist) + std::vector> params; + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != RIGHT_PAREN) + { + std::unique_ptr param = parse_expr (); + if (param == nullptr) + { + Error error (t->get_locus (), + "failed to parse function param in function call"); + add_error (std::move (error)); + + return nullptr; + } + params.push_back (std::move (param)); + + if (lexer.peek_token ()->get_id () != COMMA) + break; + + lexer.skip_token (); + t = lexer.peek_token (); + } + + // skip ')' at end of param list + if (!skip_token (RIGHT_PAREN)) + { + // skip somewhere? + return nullptr; + } + + // TODO: check types. actually, do so during semantic analysis + Location locus = function_expr->get_locus (); + + return std::unique_ptr ( + new AST::CallExpr (std::move (function_expr), std::move (params), + std::move (outer_attrs), locus)); +} + +/* Parses a macro invocation with a path in expression already parsed (but not + * '!' token). */ +template +std::unique_ptr +Parser::parse_macro_invocation_partial ( + AST::PathInExpression path, AST::AttrVec outer_attrs, + ParseRestrictions restrictions) +{ + // macro invocation + if (!skip_token (EXCLAM)) + { + return nullptr; + } + + // convert PathInExpression to SimplePath - if this isn't possible, error + AST::SimplePath converted_path = path.as_simple_path (); + if (converted_path.is_empty ()) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse simple path in macro invocation"); + add_error (std::move (error)); + + return nullptr; + } + + AST::DelimTokenTree tok_tree = parse_delim_token_tree (); + + rust_debug ("successfully parsed macro invocation (via partial)"); + + Location macro_locus = converted_path.get_locus (); + + return std::unique_ptr (new AST::MacroInvocation ( + AST::MacroInvocData (std::move (converted_path), std::move (tok_tree)), + std::move (outer_attrs), macro_locus, restrictions.expr_can_be_stmt)); +} + +/* Parses a struct expr struct with a path in expression already parsed (but + * not + * '{' token). */ +template +std::unique_ptr +Parser::parse_struct_expr_struct_partial ( + AST::PathInExpression path, AST::AttrVec outer_attrs) +{ + // assume struct expr struct (as struct-enum disambiguation requires name + // lookup) again, make statement if final ';' + if (!skip_token (LEFT_CURLY)) + { + return nullptr; + } + + // parse inner attributes + AST::AttrVec inner_attrs = parse_inner_attributes (); + + // branch based on next token + const_TokenPtr t = lexer.peek_token (); + Location path_locus = path.get_locus (); + switch (t->get_id ()) + { + case RIGHT_CURLY: + // struct with no body + lexer.skip_token (); + + return std::unique_ptr ( + new AST::StructExprStruct (std::move (path), std::move (inner_attrs), + std::move (outer_attrs), path_locus)); + case DOT_DOT: + /* technically this would give a struct base-only struct, but this + * algorithm should work too. As such, AST type not happening. */ + case IDENTIFIER: + case INT_LITERAL: { + // struct with struct expr fields + + // parse struct expr fields + std::vector> fields; + + while (t->get_id () != RIGHT_CURLY && t->get_id () != DOT_DOT) + { + std::unique_ptr field + = parse_struct_expr_field (); + if (field == nullptr) + { + Error error (t->get_locus (), + "failed to parse struct (or enum) expr field"); + add_error (std::move (error)); + + return nullptr; + } + + // DEBUG: + rust_debug ("struct/enum expr field validated to not be null"); + + fields.push_back (std::move (field)); + + // DEBUG: + rust_debug ("struct/enum expr field pushed back"); + + if (lexer.peek_token ()->get_id () != COMMA) + { + // DEBUG: + rust_debug ("lack of comma detected in struct/enum expr " + "fields - break"); + break; + } + lexer.skip_token (); + + // DEBUG: + rust_debug ("struct/enum expr fields comma skipped "); + + t = lexer.peek_token (); + } + + // DEBUG: + rust_debug ("struct/enum expr about to parse struct base "); + + // parse struct base if it exists + AST::StructBase struct_base = AST::StructBase::error (); + if (lexer.peek_token ()->get_id () == DOT_DOT) + { + Location dot_dot_location = lexer.peek_token ()->get_locus (); + lexer.skip_token (); + + // parse required struct base expr + std::unique_ptr base_expr = parse_expr (); + if (base_expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse struct base expression in struct " + "expression"); + add_error (std::move (error)); + + return nullptr; + } + + // DEBUG: + rust_debug ("struct/enum expr - parsed and validated base expr"); + + struct_base + = AST::StructBase (std::move (base_expr), dot_dot_location); + + // DEBUG: + rust_debug ("assigned struct base to new struct base "); + } + + if (!skip_token (RIGHT_CURLY)) + { + return nullptr; + } + + // DEBUG: + rust_debug ( + "struct/enum expr skipped right curly - done and ready to return"); + + return std::unique_ptr ( + new AST::StructExprStructFields (std::move (path), std::move (fields), + path_locus, std::move (struct_base), + std::move (inner_attrs), + std::move (outer_attrs))); + } + default: + add_error ( + Error (t->get_locus (), + "unrecognised token %qs in struct (or enum) expression - " + "expected %<}%>, identifier, integer literal, or %<..%>", + t->get_token_description ())); + + return nullptr; + } +} + +/* Parses a struct expr tuple with a path in expression already parsed (but + * not + * '(' token). + * FIXME: this currently outputs a call expr, as they cannot be disambiguated. + * A better solution would be to just get this to call that function directly. + * */ +template +std::unique_ptr +Parser::parse_struct_expr_tuple_partial ( + AST::PathInExpression path, AST::AttrVec outer_attrs) +{ + if (!skip_token (LEFT_PAREN)) + { + return nullptr; + } + + AST::AttrVec inner_attrs = parse_inner_attributes (); + + std::vector> exprs; + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != RIGHT_PAREN) + { + // parse expression (required) + std::unique_ptr expr = parse_expr (); + if (expr == nullptr) + { + Error error (t->get_locus (), "failed to parse expression in " + "struct (or enum) expression tuple"); + add_error (std::move (error)); + + return nullptr; + } + exprs.push_back (std::move (expr)); + + if (lexer.peek_token ()->get_id () != COMMA) + break; + + lexer.skip_token (); + + t = lexer.peek_token (); + } + + if (!skip_token (RIGHT_PAREN)) + { + return nullptr; + } + + Location path_locus = path.get_locus (); + + auto pathExpr = std::unique_ptr ( + new AST::PathInExpression (std::move (path))); + + return std::unique_ptr ( + new AST::CallExpr (std::move (pathExpr), std::move (exprs), + std::move (outer_attrs), path_locus)); +} + +/* Parses a path in expression with the first token passed as a parameter (as + * it is skipped in token stream). Note that this only parses segment-first + * paths, not global ones. */ +template +AST::PathInExpression +Parser::parse_path_in_expression_pratt (const_TokenPtr tok) +{ + // HACK-y way of making up for pratt-parsing consuming first token + + // DEBUG + rust_debug ("current peek token when starting path pratt parse: '%s'", + lexer.peek_token ()->get_token_description ()); + + // create segment vector + std::vector segments; + + std::string initial_str; + + switch (tok->get_id ()) + { + case IDENTIFIER: + initial_str = tok->get_str (); + break; + case SUPER: + initial_str = "super"; + break; + case SELF: + initial_str = "self"; + break; + case SELF_ALIAS: + initial_str = "Self"; + break; + case CRATE: + initial_str = "crate"; + break; + case DOLLAR_SIGN: + if (lexer.peek_token ()->get_id () == CRATE) + { + initial_str = "$crate"; + break; + } + gcc_fallthrough (); + default: + add_error (Error (tok->get_locus (), + "unrecognised token %qs in path in expression", + tok->get_token_description ())); + + return AST::PathInExpression::create_error (); + } + + // parse required initial segment + AST::PathExprSegment initial_segment (initial_str, tok->get_locus ()); + // parse generic args (and turbofish), if they exist + /* use lookahead to determine if they actually exist (don't want to + * accidently parse over next ident segment) */ + if (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION + && lexer.peek_token (1)->get_id () == LEFT_ANGLE) + { + // skip scope resolution + lexer.skip_token (); + + AST::GenericArgs generic_args = parse_path_generic_args (); + + initial_segment + = AST::PathExprSegment (AST::PathIdentSegment (initial_str, + tok->get_locus ()), + tok->get_locus (), std::move (generic_args)); + } + if (initial_segment.is_error ()) + { + // skip after somewhere? + // don't necessarily throw error but yeah + + // DEBUG + rust_debug ("initial segment is error - returning null"); + + return AST::PathInExpression::create_error (); + } + segments.push_back (std::move (initial_segment)); + + // parse optional segments (as long as scope resolution operator exists) + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () == SCOPE_RESOLUTION) + { + // skip scope resolution operator + lexer.skip_token (); + + // parse the actual segment - it is an error if it doesn't exist now + AST::PathExprSegment segment = parse_path_expr_segment (); + if (segment.is_error ()) + { + // skip after somewhere? + Error error (t->get_locus (), + "could not parse path expression segment"); + add_error (std::move (error)); + + return AST::PathInExpression::create_error (); + } + + segments.push_back (std::move (segment)); + + t = lexer.peek_token (); + } + + // DEBUG: + rust_debug ( + "current token (just about to return path to null denotation): '%s'", + lexer.peek_token ()->get_token_description ()); + + return AST::PathInExpression (std::move (segments), {}, tok->get_locus (), + false); +} + +// Parses a closure expression with pratt parsing (from null denotation). +template +std::unique_ptr +Parser::parse_closure_expr_pratt (const_TokenPtr tok, + AST::AttrVec outer_attrs) +{ + // TODO: does this need pratt parsing (for precedence)? probably not, but + // idk + Location locus = tok->get_locus (); + bool has_move = false; + if (tok->get_id () == MOVE) + { + has_move = true; + tok = lexer.peek_token (); + lexer.skip_token (); + // skip token and reassign + } + + // handle parameter list + std::vector params; + + switch (tok->get_id ()) + { + case OR: + // no parameters, don't skip token + break; + case PIPE: { + // actually may have parameters + // don't skip token + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != PIPE) + { + AST::ClosureParam param = parse_closure_param (); + if (param.is_error ()) + { + // TODO is this really an error? + Error error (t->get_locus (), "could not parse closure param"); + add_error (std::move (error)); + + return nullptr; + } + params.push_back (std::move (param)); + + if (lexer.peek_token ()->get_id () != COMMA) + { + // not an error but means param list is done + break; + } + // skip comma + lexer.skip_token (); + + t = lexer.peek_token (); + } + + if (!skip_token (PIPE)) + { + return nullptr; + } + break; + } + default: + add_error (Error (tok->get_locus (), + "unexpected token %qs in closure expression - expected " + "%<|%> or %<||%>", + tok->get_token_description ())); + + // skip somewhere? + return nullptr; + } + + // again branch based on next token + tok = lexer.peek_token (); + if (tok->get_id () == RETURN_TYPE) + { + // must be return type closure with block expr + + // skip "return type" token + lexer.skip_token (); + + // parse actual type, which is required + std::unique_ptr type = parse_type_no_bounds (); + if (type == nullptr) + { + // error + Error error (tok->get_locus (), "failed to parse type for closure"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + // parse block expr, which is required + std::unique_ptr block = parse_block_expr (); + if (block == nullptr) + { + // error + Error error (lexer.peek_token ()->get_locus (), + "failed to parse block expr in closure"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::ClosureExprInnerTyped (std::move (type), std::move (block), + std::move (params), locus, has_move, + std::move (outer_attrs))); + } + else + { + // must be expr-only closure + + // parse expr, which is required + std::unique_ptr expr = parse_expr (); + if (expr == nullptr) + { + Error error (tok->get_locus (), + "failed to parse expression in closure"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::ClosureExprInner (std::move (expr), std::move (params), locus, + has_move, std::move (outer_attrs))); + } +} + +/* Parses a tuple index expression (pratt-parsed) from a 'float' token as a + * result of lexer misidentification. */ +template +std::unique_ptr +Parser::parse_tuple_index_expr_float ( + const_TokenPtr tok, std::unique_ptr tuple_expr, + AST::AttrVec outer_attrs, ParseRestrictions restrictions ATTRIBUTE_UNUSED) +{ + // only works on float literals + if (tok->get_id () != FLOAT_LITERAL) + return nullptr; + + // DEBUG: + rust_debug ("exact string form of float: '%s'", tok->get_str ().c_str ()); + + // get float string and remove dot and initial 0 + std::string index_str = tok->get_str (); + index_str.erase (index_str.begin ()); + + // get int from string + int index = atoi (index_str.c_str ()); + + Location locus = tuple_expr->get_locus (); + + return std::unique_ptr ( + new AST::TupleIndexExpr (std::move (tuple_expr), index, + std::move (outer_attrs), locus)); +} + +// Returns true if the next token is END, ELSE, or EOF; +template +bool +Parser::done_end_or_else () +{ + const_TokenPtr t = lexer.peek_token (); + return (t->get_id () == RIGHT_CURLY || t->get_id () == ELSE + || t->get_id () == END_OF_FILE); +} + +// Returns true if the next token is END or EOF. +template +bool +Parser::done_end () +{ + const_TokenPtr t = lexer.peek_token (); + return (t->get_id () == RIGHT_CURLY || t->get_id () == END_OF_FILE); +} + +// Dumps lexer output to stderr. +template +void +Parser::debug_dump_lex_output (std::ostream &out) +{ + /* TODO: a better implementation of "lexer dump" (as in dump what was + * actually tokenised) would actually be to "write" a token to a file every + * time skip_token() here was called. This would reflect the parser + * modifications to the token stream, such as fixing the template angle + * brackets. */ + + const_TokenPtr tok = lexer.peek_token (); + + while (true) + { + if (tok->get_id () == Rust::END_OF_FILE) + break; + + bool has_text = tok->get_id () == Rust::IDENTIFIER + || tok->get_id () == Rust::INT_LITERAL + || tok->get_id () == Rust::FLOAT_LITERAL + || tok->get_id () == Rust::STRING_LITERAL + || tok->get_id () == Rust::CHAR_LITERAL + || tok->get_id () == Rust::BYTE_STRING_LITERAL + || tok->get_id () == Rust::BYTE_CHAR_LITERAL; + + Location loc = tok->get_locus (); + + out << "token_id_to_str (); + out << has_text ? (std::string (", text=") + tok->get_str () + + std::string (", typehint=") + + std::string (tok->get_type_hint_str ())) + : ""; + out << lexer.get_line_map ()->to_string (loc); + + lexer.skip_token (); + tok = lexer.peek_token (); + } +} + +// Parses crate and dumps AST to stderr, recursively. +template +void +Parser::debug_dump_ast_output (AST::Crate &crate, + std::ostream &out) +{ + out << crate.as_string (); +} +} // namespace Rust