Skip to content

Refactor token collection to capture trailing token immediately #81017

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 23, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 0 additions & 11 deletions compiler/rustc_ast/src/tokenstream.rs
Original file line number Diff line number Diff line change
@@ -127,14 +127,10 @@ where
}

pub trait CreateTokenStream: sync::Send + sync::Sync {
fn add_trailing_semi(&self) -> Box<dyn CreateTokenStream>;
fn create_token_stream(&self) -> TokenStream;
}

impl CreateTokenStream for TokenStream {
fn add_trailing_semi(&self) -> Box<dyn CreateTokenStream> {
panic!("Cannot call `add_trailing_semi` on a `TokenStream`!");
}
fn create_token_stream(&self) -> TokenStream {
self.clone()
}
@@ -151,13 +147,6 @@ impl LazyTokenStream {
LazyTokenStream(Lrc::new(Box::new(inner)))
}

/// Extends the captured stream by one token,
/// which must be a trailing semicolon. This
/// affects the `TokenStream` created by `make_tokenstream`.
pub fn add_trailing_semi(&self) -> LazyTokenStream {
LazyTokenStream(Lrc::new(self.0.add_trailing_semi()))
}

pub fn create_token_stream(&self) -> TokenStream {
self.0.create_token_stream()
}
2 changes: 1 addition & 1 deletion compiler/rustc_parse/src/lib.rs
Original file line number Diff line number Diff line change
@@ -292,7 +292,7 @@ pub fn nt_to_tokenstream(
} else if matches!(synthesize_tokens, CanSynthesizeMissingTokens::Yes) {
return fake_token_stream(sess, nt);
} else {
panic!("Missing tokens for nt {:?}", pprust::nonterminal_to_string(nt));
panic!("Missing tokens for nt at {:?}: {:?}", nt.span(), pprust::nonterminal_to_string(nt));
}
}

4 changes: 2 additions & 2 deletions compiler/rustc_parse/src/parser/item.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use super::diagnostics::{dummy_arg, ConsumeClosingDelim, Error};
use super::ty::{AllowPlus, RecoverQPath, RecoverReturnSign};
use super::{FollowedByType, ForceCollect, Parser, PathStyle};
use super::{FollowedByType, ForceCollect, Parser, PathStyle, TrailingToken};

use crate::{maybe_collect_tokens, maybe_whole};

@@ -125,7 +125,7 @@ impl<'a> Parser<'a> {
let item = maybe_collect_tokens!(self, force_collect, &attrs, |this: &mut Self| {
let item = this.parse_item_common_(attrs, mac_allowed, attrs_allowed, req_name);
unclosed_delims.append(&mut this.unclosed_delims);
item
Ok((item?, TrailingToken::None))
})?;

self.unclosed_delims.append(&mut unclosed_delims);
55 changes: 28 additions & 27 deletions compiler/rustc_parse/src/parser/mod.rs
Original file line number Diff line number Diff line change
@@ -61,6 +61,11 @@ pub enum ForceCollect {
No,
}

pub enum TrailingToken {
None,
Semi,
}

/// Like `maybe_whole_expr`, but for things other than expressions.
#[macro_export]
macro_rules! maybe_whole {
@@ -1225,6 +1230,13 @@ impl<'a> Parser<'a> {
}
}

pub fn collect_tokens<R: HasTokens>(
&mut self,
f: impl FnOnce(&mut Self) -> PResult<'a, R>,
) -> PResult<'a, R> {
self.collect_tokens_trailing_token(|this| Ok((f(this)?, TrailingToken::None)))
}

/// Records all tokens consumed by the provided callback,
/// including the current token. These tokens are collected
/// into a `LazyTokenStream`, and returned along with the result
@@ -1241,9 +1253,9 @@ impl<'a> Parser<'a> {
/// This restriction shouldn't be an issue in practice,
/// since this function is used to record the tokens for
/// a parsed AST item, which always has matching delimiters.
pub fn collect_tokens<R: HasTokens>(
pub fn collect_tokens_trailing_token<R: HasTokens>(
&mut self,
f: impl FnOnce(&mut Self) -> PResult<'a, R>,
f: impl FnOnce(&mut Self) -> PResult<'a, (R, TrailingToken)>,
) -> PResult<'a, R> {
let start_token = (self.token.clone(), self.token_spacing);
let cursor_snapshot = TokenCursor {
@@ -1256,7 +1268,7 @@ impl<'a> Parser<'a> {
append_unglued_token: self.token_cursor.append_unglued_token.clone(),
};

let mut ret = f(self)?;
let (mut ret, trailing_token) = f(self)?;

// Produces a `TokenStream` on-demand. Using `cursor_snapshot`
// and `num_calls`, we can reconstruct the `TokenStream` seen
@@ -1275,55 +1287,44 @@ impl<'a> Parser<'a> {
cursor_snapshot: TokenCursor,
num_calls: usize,
desugar_doc_comments: bool,
trailing_semi: bool,
append_unglued_token: Option<TreeAndSpacing>,
}
impl CreateTokenStream for LazyTokenStreamImpl {
fn create_token_stream(&self) -> TokenStream {
let mut num_calls = self.num_calls;
if self.trailing_semi {
num_calls += 1;
}
// The token produced by the final call to `next` or `next_desugared`
// was not actually consumed by the callback. The combination
// of chaining the initial token and using `take` produces the desired
// result - we produce an empty `TokenStream` if no calls were made,
// and omit the final token otherwise.
let mut cursor_snapshot = self.cursor_snapshot.clone();
let tokens = std::iter::once(self.start_token.clone())
.chain((0..num_calls).map(|_| {
.chain((0..self.num_calls).map(|_| {
if self.desugar_doc_comments {
cursor_snapshot.next_desugared()
} else {
cursor_snapshot.next()
}
}))
.take(num_calls);
.take(self.num_calls);

make_token_stream(tokens, self.append_unglued_token.clone())
}
fn add_trailing_semi(&self) -> Box<dyn CreateTokenStream> {
if self.trailing_semi {
panic!("Called `add_trailing_semi` twice!");
}
if self.append_unglued_token.is_some() {
panic!(
"Cannot call `add_trailing_semi` when we have an unglued token {:?}",
self.append_unglued_token
);
}
let mut new = self.clone();
new.trailing_semi = true;
Box::new(new)
}

let mut num_calls = self.token_cursor.num_next_calls - cursor_snapshot.num_next_calls;
match trailing_token {
TrailingToken::None => {}
TrailingToken::Semi => {
assert_eq!(self.token.kind, token::Semi);
num_calls += 1;
}
}

let lazy_impl = LazyTokenStreamImpl {
start_token,
num_calls: self.token_cursor.num_next_calls - cursor_snapshot.num_next_calls,
num_calls,
cursor_snapshot,
desugar_doc_comments: self.desugar_doc_comments,
trailing_semi: false,
append_unglued_token: self.token_cursor.append_unglued_token.clone(),
};
ret.finalize_tokens(LazyTokenStream::new(lazy_impl));
@@ -1427,9 +1428,9 @@ macro_rules! maybe_collect_tokens {
if matches!($force_collect, ForceCollect::Yes)
|| $crate::parser::attr::maybe_needs_tokens($attrs)
{
$self.collect_tokens($f)
$self.collect_tokens_trailing_token($f)
} else {
$f($self)
Ok($f($self)?.0)
}
};
}
170 changes: 93 additions & 77 deletions compiler/rustc_parse/src/parser/stmt.rs
Original file line number Diff line number Diff line change
@@ -3,14 +3,13 @@ use super::diagnostics::{AttemptLocalParseRecovery, Error};
use super::expr::LhsExpr;
use super::pat::{GateOr, RecoverComma};
use super::path::PathStyle;
use super::{BlockMode, ForceCollect, Parser, Restrictions, SemiColonMode};
use super::{BlockMode, ForceCollect, Parser, Restrictions, SemiColonMode, TrailingToken};
use crate::{maybe_collect_tokens, maybe_whole};

use rustc_ast as ast;
use rustc_ast::attr::HasAttrs;
use rustc_ast::ptr::P;
use rustc_ast::token::{self, TokenKind};
use rustc_ast::tokenstream::LazyTokenStream;
use rustc_ast::util::classify;
use rustc_ast::{AttrStyle, AttrVec, Attribute, MacCall, MacCallStmt, MacStmtStyle};
use rustc_ast::{Block, BlockCheckMode, Expr, ExprKind, Local, Stmt, StmtKind, DUMMY_NODE_ID};
@@ -25,7 +24,7 @@ impl<'a> Parser<'a> {
/// e.g., a `StmtKind::Semi` parses to a `StmtKind::Expr`, leaving the trailing `;` unconsumed.
// Public for rustfmt usage.
pub fn parse_stmt(&mut self, force_collect: ForceCollect) -> PResult<'a, Option<Stmt>> {
Ok(self.parse_stmt_without_recovery(force_collect).unwrap_or_else(|mut e| {
Ok(self.parse_stmt_without_recovery(false, force_collect).unwrap_or_else(|mut e| {
e.emit();
self.recover_stmt_(SemiColonMode::Break, BlockMode::Ignore);
None
@@ -36,6 +35,7 @@ impl<'a> Parser<'a> {
/// or not we have attributes
fn parse_stmt_without_recovery(
&mut self,
capture_semi: bool,
force_collect: ForceCollect,
) -> PResult<'a, Option<Stmt>> {
let mut attrs = self.parse_outer_attributes()?;
@@ -50,68 +50,77 @@ impl<'a> Parser<'a> {
Some(stmt)
});

maybe_collect_tokens!(self, force_collect, &attrs, |this: &mut Self| {
let stmt = if this.eat_keyword(kw::Let) {
this.parse_local_mk(lo, attrs.into())?
} else if this.is_kw_followed_by_ident(kw::Mut) {
this.recover_stmt_local(lo, attrs.into(), "missing keyword", "let mut")?
} else if this.is_kw_followed_by_ident(kw::Auto) {
this.bump(); // `auto`
let msg = "write `let` instead of `auto` to introduce a new variable";
this.recover_stmt_local(lo, attrs.into(), msg, "let")?
} else if this.is_kw_followed_by_ident(sym::var) {
this.bump(); // `var`
let msg = "write `let` instead of `var` to introduce a new variable";
this.recover_stmt_local(lo, attrs.into(), msg, "let")?
} else if this.check_path()
&& !this.token.is_qpath_start()
&& !this.is_path_start_item()
{
// We have avoided contextual keywords like `union`, items with `crate` visibility,
// or `auto trait` items. We aim to parse an arbitrary path `a::b` but not something
// that starts like a path (1 token), but it fact not a path.
// Also, we avoid stealing syntax from `parse_item_`.
this.parse_stmt_path_start(lo, attrs)?
} else if let Some(item) =
this.parse_item_common(attrs.clone(), false, true, |_| true, force_collect)?
{
// FIXME: Bad copy of attrs
this.mk_stmt(lo.to(item.span), StmtKind::Item(P(item)))
} else if this.eat(&token::Semi) {
// Do not attempt to parse an expression if we're done here.
this.error_outer_attrs(&attrs);
this.mk_stmt(lo, StmtKind::Empty)
} else if this.token != token::CloseDelim(token::Brace) {
// Remainder are line-expr stmts.
let e = this.parse_expr_res(Restrictions::STMT_EXPR, Some(attrs.into()))?;
this.mk_stmt(lo.to(e.span), StmtKind::Expr(e))
} else {
this.error_outer_attrs(&attrs);
return Ok(None);
};
Ok(Some(stmt))
})
Ok(Some(if self.token.is_keyword(kw::Let) {
self.parse_local_mk(lo, attrs.into(), capture_semi, force_collect)?
} else if self.is_kw_followed_by_ident(kw::Mut) {
self.recover_stmt_local(lo, attrs.into(), "missing keyword", "let mut")?
} else if self.is_kw_followed_by_ident(kw::Auto) {
self.bump(); // `auto`
let msg = "write `let` instead of `auto` to introduce a new variable";
self.recover_stmt_local(lo, attrs.into(), msg, "let")?
} else if self.is_kw_followed_by_ident(sym::var) {
self.bump(); // `var`
let msg = "write `let` instead of `var` to introduce a new variable";
self.recover_stmt_local(lo, attrs.into(), msg, "let")?
} else if self.check_path() && !self.token.is_qpath_start() && !self.is_path_start_item() {
// We have avoided contextual keywords like `union`, items with `crate` visibility,
// or `auto trait` items. We aim to parse an arbitrary path `a::b` but not something
// that starts like a path (1 token), but it fact not a path.
// Also, we avoid stealing syntax from `parse_item_`.
self.parse_stmt_path_start(lo, attrs, force_collect)?
} else if let Some(item) =
self.parse_item_common(attrs.clone(), false, true, |_| true, force_collect)?
{
// FIXME: Bad copy of attrs
self.mk_stmt(lo.to(item.span), StmtKind::Item(P(item)))
} else if self.eat(&token::Semi) {
// Do not attempt to parse an expression if we're done here.
self.error_outer_attrs(&attrs);
self.mk_stmt(lo, StmtKind::Empty)
} else if self.token != token::CloseDelim(token::Brace) {
// Remainder are line-expr stmts.
let e = self.parse_expr_res(Restrictions::STMT_EXPR, Some(attrs.into()))?;
self.mk_stmt(lo.to(e.span), StmtKind::Expr(e))
} else {
self.error_outer_attrs(&attrs);
return Ok(None);
}))
}

fn parse_stmt_path_start(&mut self, lo: Span, attrs: Vec<Attribute>) -> PResult<'a, Stmt> {
let path = self.parse_path(PathStyle::Expr)?;
fn parse_stmt_path_start(
&mut self,
lo: Span,
attrs: Vec<Attribute>,
force_collect: ForceCollect,
) -> PResult<'a, Stmt> {
maybe_collect_tokens!(self, force_collect, &attrs, |this: &mut Parser<'a>| {
let path = this.parse_path(PathStyle::Expr)?;

if self.eat(&token::Not) {
return self.parse_stmt_mac(lo, attrs.into(), path);
}
if this.eat(&token::Not) {
let stmt_mac = this.parse_stmt_mac(lo, attrs.into(), path)?;
if this.token == token::Semi {
return Ok((stmt_mac, TrailingToken::Semi));
} else {
return Ok((stmt_mac, TrailingToken::None));
}
}

let expr = if self.eat(&token::OpenDelim(token::Brace)) {
self.parse_struct_expr(path, AttrVec::new(), true)?
} else {
let hi = self.prev_token.span;
self.mk_expr(lo.to(hi), ExprKind::Path(None, path), AttrVec::new())
};
let expr = if this.eat(&token::OpenDelim(token::Brace)) {
this.parse_struct_expr(path, AttrVec::new(), true)?
} else {
let hi = this.prev_token.span;
this.mk_expr(lo.to(hi), ExprKind::Path(None, path), AttrVec::new())
};

let expr = self.with_res(Restrictions::STMT_EXPR, |this| {
let expr = this.parse_dot_or_call_expr_with(expr, lo, attrs.into())?;
this.parse_assoc_expr_with(0, LhsExpr::AlreadyParsed(expr))
})?;
Ok(self.mk_stmt(lo.to(self.prev_token.span), StmtKind::Expr(expr)))
let expr = this.with_res(Restrictions::STMT_EXPR, |this| {
let expr = this.parse_dot_or_call_expr_with(expr, lo, attrs.into())?;
this.parse_assoc_expr_with(0, LhsExpr::AlreadyParsed(expr))
})?;
Ok((
this.mk_stmt(lo.to(this.prev_token.span), StmtKind::Expr(expr)),
TrailingToken::None,
))
})
}

/// Parses a statement macro `mac!(args)` provided a `path` representing `mac`.
@@ -159,15 +168,34 @@ impl<'a> Parser<'a> {
msg: &str,
sugg: &str,
) -> PResult<'a, Stmt> {
let stmt = self.parse_local_mk(lo, attrs)?;
let stmt = self.recover_local_after_let(lo, attrs)?;
self.struct_span_err(lo, "invalid variable declaration")
.span_suggestion(lo, msg, sugg.to_string(), Applicability::MachineApplicable)
.emit();
Ok(stmt)
}

fn parse_local_mk(&mut self, lo: Span, attrs: AttrVec) -> PResult<'a, Stmt> {
let local = self.parse_local(attrs)?;
fn parse_local_mk(
&mut self,
lo: Span,
attrs: AttrVec,
capture_semi: bool,
force_collect: ForceCollect,
) -> PResult<'a, Stmt> {
maybe_collect_tokens!(self, force_collect, &attrs, |this: &mut Parser<'a>| {
this.expect_keyword(kw::Let)?;
let local = this.parse_local(attrs.into())?;
let trailing = if capture_semi && this.token.kind == token::Semi {
TrailingToken::Semi
} else {
TrailingToken::None
};
Ok((this.mk_stmt(lo.to(this.prev_token.span), StmtKind::Local(local)), trailing))
})
}

fn recover_local_after_let(&mut self, lo: Span, attrs: AttrVec) -> PResult<'a, Stmt> {
let local = self.parse_local(attrs.into())?;
Ok(self.mk_stmt(lo.to(self.prev_token.span), StmtKind::Local(local)))
}

@@ -289,7 +317,7 @@ impl<'a> Parser<'a> {
// bar;
//
// which is valid in other languages, but not Rust.
match self.parse_stmt_without_recovery(ForceCollect::No) {
match self.parse_stmt_without_recovery(false, ForceCollect::No) {
// If the next token is an open brace (e.g., `if a b {`), the place-
// inside-a-block suggestion would be more likely wrong than right.
Ok(Some(_))
@@ -392,17 +420,11 @@ impl<'a> Parser<'a> {
// Skip looking for a trailing semicolon when we have an interpolated statement.
maybe_whole!(self, NtStmt, |x| Some(x));

let mut stmt = match self.parse_stmt_without_recovery(ForceCollect::No)? {
let mut stmt = match self.parse_stmt_without_recovery(true, ForceCollect::No)? {
Some(stmt) => stmt,
None => return Ok(None),
};

let add_semi_token = |tokens: Option<&mut LazyTokenStream>| {
if let Some(tokens) = tokens {
*tokens = tokens.add_trailing_semi();
}
};

let mut eat_semi = true;
match stmt.kind {
// Expression without semicolon.
@@ -458,18 +480,12 @@ impl<'a> Parser<'a> {
}
}
eat_semi = false;
// We just checked that there's a semicolon in the tokenstream,
// so capture it
add_semi_token(local.tokens.as_mut());
}
StmtKind::Empty | StmtKind::Item(_) | StmtKind::Semi(_) => eat_semi = false,
}

if eat_semi && self.eat(&token::Semi) {
stmt = stmt.add_trailing_semicolon();
// We just checked that we have a semicolon in the tokenstream,
// so capture it
add_semi_token(stmt.tokens_mut());
}
stmt.span = stmt.span.to(self.prev_token.span);
Ok(Some(stmt))