Skip to content

libsyntax: Refactor parser.rs into reasonably sized logical units #63469

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Aug 12, 2019
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6,781 changes: 308 additions & 6,473 deletions src/libsyntax/parse/parser.rs

Large diffs are not rendered by default.

1,748 changes: 1,748 additions & 0 deletions src/libsyntax/parse/parser/expr.rs

Large diffs are not rendered by default.

276 changes: 276 additions & 0 deletions src/libsyntax/parse/parser/generics.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,276 @@
use super::{Parser, PResult};

use crate::ast::{self, WhereClause, GenericParam, GenericParamKind, GenericBounds, Attribute};
use crate::parse::token;
use crate::source_map::DUMMY_SP;
use crate::symbol::kw;

impl<'a> Parser<'a> {
/// Parses bounds of a lifetime parameter `BOUND + BOUND + BOUND`, possibly with trailing `+`.
///
/// ```
/// BOUND = LT_BOUND (e.g., `'a`)
/// ```
fn parse_lt_param_bounds(&mut self) -> GenericBounds {
let mut lifetimes = Vec::new();
while self.check_lifetime() {
lifetimes.push(ast::GenericBound::Outlives(self.expect_lifetime()));

if !self.eat_plus() {
break
}
}
lifetimes
}

/// Matches `typaram = IDENT (`?` unbound)? optbounds ( EQ ty )?`.
fn parse_ty_param(&mut self,
preceding_attrs: Vec<Attribute>)
-> PResult<'a, GenericParam> {
let ident = self.parse_ident()?;

// Parse optional colon and param bounds.
let bounds = if self.eat(&token::Colon) {
self.parse_generic_bounds(Some(self.prev_span))?
} else {
Vec::new()
};

let default = if self.eat(&token::Eq) {
Some(self.parse_ty()?)
} else {
None
};

Ok(GenericParam {
ident,
id: ast::DUMMY_NODE_ID,
attrs: preceding_attrs.into(),
bounds,
kind: GenericParamKind::Type {
default,
}
})
}

fn parse_const_param(&mut self, preceding_attrs: Vec<Attribute>) -> PResult<'a, GenericParam> {
self.expect_keyword(kw::Const)?;
let ident = self.parse_ident()?;
self.expect(&token::Colon)?;
let ty = self.parse_ty()?;

Ok(GenericParam {
ident,
id: ast::DUMMY_NODE_ID,
attrs: preceding_attrs.into(),
bounds: Vec::new(),
kind: GenericParamKind::Const {
ty,
}
})
}

/// Parses a (possibly empty) list of lifetime and type parameters, possibly including
/// a trailing comma and erroneous trailing attributes.
crate fn parse_generic_params(&mut self) -> PResult<'a, Vec<ast::GenericParam>> {
let mut params = Vec::new();
loop {
let attrs = self.parse_outer_attributes()?;
if self.check_lifetime() {
let lifetime = self.expect_lifetime();
// Parse lifetime parameter.
let bounds = if self.eat(&token::Colon) {
self.parse_lt_param_bounds()
} else {
Vec::new()
};
params.push(ast::GenericParam {
ident: lifetime.ident,
id: lifetime.id,
attrs: attrs.into(),
bounds,
kind: ast::GenericParamKind::Lifetime,
});
} else if self.check_keyword(kw::Const) {
// Parse const parameter.
params.push(self.parse_const_param(attrs)?);
} else if self.check_ident() {
// Parse type parameter.
params.push(self.parse_ty_param(attrs)?);
} else {
// Check for trailing attributes and stop parsing.
if !attrs.is_empty() {
if !params.is_empty() {
self.struct_span_err(
attrs[0].span,
&format!("trailing attribute after generic parameter"),
)
.span_label(attrs[0].span, "attributes must go before parameters")
.emit();
} else {
self.struct_span_err(
attrs[0].span,
&format!("attribute without generic parameters"),
)
.span_label(
attrs[0].span,
"attributes are only permitted when preceding parameters",
)
.emit();
}
}
break
}

if !self.eat(&token::Comma) {
break
}
}
Ok(params)
}

/// Parses a set of optional generic type parameter declarations. Where
/// clauses are not parsed here, and must be added later via
/// `parse_where_clause()`.
///
/// matches generics = ( ) | ( < > ) | ( < typaramseq ( , )? > ) | ( < lifetimes ( , )? > )
/// | ( < lifetimes , typaramseq ( , )? > )
/// where typaramseq = ( typaram ) | ( typaram , typaramseq )
pub(super) fn parse_generics(&mut self) -> PResult<'a, ast::Generics> {
let span_lo = self.token.span;
let (params, span) = if self.eat_lt() {
let params = self.parse_generic_params()?;
self.expect_gt()?;
(params, span_lo.to(self.prev_span))
} else {
(vec![], self.prev_span.between(self.token.span))
};
Ok(ast::Generics {
params,
where_clause: WhereClause {
predicates: Vec::new(),
span: DUMMY_SP,
},
span,
})
}

/// Parses an optional where-clause and places it in `generics`.
///
/// ```ignore (only-for-syntax-highlight)
/// where T : Trait<U, V> + 'b, 'a : 'b
/// ```
pub(super) fn parse_where_clause(&mut self) -> PResult<'a, WhereClause> {
let mut where_clause = WhereClause {
predicates: Vec::new(),
span: self.prev_span.to(self.prev_span),
};

if !self.eat_keyword(kw::Where) {
return Ok(where_clause);
}
let lo = self.prev_span;

// We are considering adding generics to the `where` keyword as an alternative higher-rank
// parameter syntax (as in `where<'a>` or `where<T>`. To avoid that being a breaking
// change we parse those generics now, but report an error.
if self.choose_generics_over_qpath() {
let generics = self.parse_generics()?;
self.struct_span_err(
generics.span,
"generic parameters on `where` clauses are reserved for future use",
)
.span_label(generics.span, "currently unsupported")
.emit();
}

loop {
let lo = self.token.span;
if self.check_lifetime() && self.look_ahead(1, |t| !t.is_like_plus()) {
let lifetime = self.expect_lifetime();
// Bounds starting with a colon are mandatory, but possibly empty.
self.expect(&token::Colon)?;
let bounds = self.parse_lt_param_bounds();
where_clause.predicates.push(ast::WherePredicate::RegionPredicate(
ast::WhereRegionPredicate {
span: lo.to(self.prev_span),
lifetime,
bounds,
}
));
} else if self.check_type() {
// Parse optional `for<'a, 'b>`.
// This `for` is parsed greedily and applies to the whole predicate,
// the bounded type can have its own `for` applying only to it.
// Examples:
// * `for<'a> Trait1<'a>: Trait2<'a /* ok */>`
// * `(for<'a> Trait1<'a>): Trait2<'a /* not ok */>`
// * `for<'a> for<'b> Trait1<'a, 'b>: Trait2<'a /* ok */, 'b /* not ok */>`
let lifetime_defs = self.parse_late_bound_lifetime_defs()?;

// Parse type with mandatory colon and (possibly empty) bounds,
// or with mandatory equality sign and the second type.
let ty = self.parse_ty()?;
if self.eat(&token::Colon) {
let bounds = self.parse_generic_bounds(Some(self.prev_span))?;
where_clause.predicates.push(ast::WherePredicate::BoundPredicate(
ast::WhereBoundPredicate {
span: lo.to(self.prev_span),
bound_generic_params: lifetime_defs,
bounded_ty: ty,
bounds,
}
));
// FIXME: Decide what should be used here, `=` or `==`.
// FIXME: We are just dropping the binders in lifetime_defs on the floor here.
} else if self.eat(&token::Eq) || self.eat(&token::EqEq) {
let rhs_ty = self.parse_ty()?;
where_clause.predicates.push(ast::WherePredicate::EqPredicate(
ast::WhereEqPredicate {
span: lo.to(self.prev_span),
lhs_ty: ty,
rhs_ty,
id: ast::DUMMY_NODE_ID,
}
));
} else {
return self.unexpected();
}
} else {
break
}

if !self.eat(&token::Comma) {
break
}
}

where_clause.span = lo.to(self.prev_span);
Ok(where_clause)
}

pub(super) fn choose_generics_over_qpath(&self) -> bool {
// There's an ambiguity between generic parameters and qualified paths in impls.
// If we see `<` it may start both, so we have to inspect some following tokens.
// The following combinations can only start generics,
// but not qualified paths (with one exception):
// `<` `>` - empty generic parameters
// `<` `#` - generic parameters with attributes
// `<` (LIFETIME|IDENT) `>` - single generic parameter
// `<` (LIFETIME|IDENT) `,` - first generic parameter in a list
// `<` (LIFETIME|IDENT) `:` - generic parameter with bounds
// `<` (LIFETIME|IDENT) `=` - generic parameter with a default
// `<` const - generic const parameter
// The only truly ambiguous case is
// `<` IDENT `>` `::` IDENT ...
// we disambiguate it in favor of generics (`impl<T> ::absolute::Path<T> { ... }`)
// because this is what almost always expected in practice, qualified paths in impls
// (`impl <Type>::AssocTy { ... }`) aren't even allowed by type checker at the moment.
self.token == token::Lt &&
(self.look_ahead(1, |t| t == &token::Pound || t == &token::Gt) ||
self.look_ahead(1, |t| t.is_lifetime() || t.is_ident()) &&
self.look_ahead(2, |t| t == &token::Gt || t == &token::Comma ||
t == &token::Colon || t == &token::Eq) ||
self.is_keyword_ahead(1, &[kw::Const]))
}
}
1,915 changes: 1,915 additions & 0 deletions src/libsyntax/parse/parser/item.rs

Large diffs are not rendered by default.

332 changes: 332 additions & 0 deletions src/libsyntax/parse/parser/module.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,332 @@
use super::{Parser, PResult};
use super::item::ItemInfo;

use crate::attr;
use crate::ast::{self, Ident, Attribute, ItemKind, Mod, Crate};
use crate::parse::{new_sub_parser_from_file, DirectoryOwnership};
use crate::parse::token::{self, TokenKind};
use crate::parse::diagnostics::{Error};
use crate::source_map::{SourceMap, Span, DUMMY_SP, FileName};
use crate::symbol::sym;

use std::path::{self, Path, PathBuf};

/// Information about the path to a module.
pub struct ModulePath {
name: String,
path_exists: bool,
pub result: Result<ModulePathSuccess, Error>,
}

pub struct ModulePathSuccess {
pub path: PathBuf,
pub directory_ownership: DirectoryOwnership,
warn: bool,
}

impl<'a> Parser<'a> {
/// Parses a source module as a crate. This is the main entry point for the parser.
pub fn parse_crate_mod(&mut self) -> PResult<'a, Crate> {
let lo = self.token.span;
let krate = Ok(ast::Crate {
attrs: self.parse_inner_attributes()?,
module: self.parse_mod_items(&token::Eof, lo)?,
span: lo.to(self.token.span),
});
krate
}

/// Parse a `mod <foo> { ... }` or `mod <foo>;` item
pub(super) fn parse_item_mod(&mut self, outer_attrs: &[Attribute]) -> PResult<'a, ItemInfo> {
let (in_cfg, outer_attrs) = {
let mut strip_unconfigured = crate::config::StripUnconfigured {
sess: self.sess,
features: None, // don't perform gated feature checking
};
let mut outer_attrs = outer_attrs.to_owned();
strip_unconfigured.process_cfg_attrs(&mut outer_attrs);
(!self.cfg_mods || strip_unconfigured.in_cfg(&outer_attrs), outer_attrs)
};

let id_span = self.token.span;
let id = self.parse_ident()?;
if self.eat(&token::Semi) {
if in_cfg && self.recurse_into_file_modules {
// This mod is in an external file. Let's go get it!
let ModulePathSuccess { path, directory_ownership, warn } =
self.submod_path(id, &outer_attrs, id_span)?;
let (module, mut attrs) =
self.eval_src_mod(path, directory_ownership, id.to_string(), id_span)?;
// Record that we fetched the mod from an external file
if warn {
let attr = attr::mk_attr_outer(
attr::mk_word_item(Ident::with_empty_ctxt(sym::warn_directory_ownership)));
attr::mark_known(&attr);
attrs.push(attr);
}
Ok((id, ItemKind::Mod(module), Some(attrs)))
} else {
let placeholder = ast::Mod {
inner: DUMMY_SP,
items: Vec::new(),
inline: false
};
Ok((id, ItemKind::Mod(placeholder), None))
}
} else {
let old_directory = self.directory.clone();
self.push_directory(id, &outer_attrs);

self.expect(&token::OpenDelim(token::Brace))?;
let mod_inner_lo = self.token.span;
let attrs = self.parse_inner_attributes()?;
let module = self.parse_mod_items(&token::CloseDelim(token::Brace), mod_inner_lo)?;

self.directory = old_directory;
Ok((id, ItemKind::Mod(module), Some(attrs)))
}
}

/// Given a termination token, parses all of the items in a module.
fn parse_mod_items(&mut self, term: &TokenKind, inner_lo: Span) -> PResult<'a, Mod> {
let mut items = vec![];
while let Some(item) = self.parse_item()? {
items.push(item);
self.maybe_consume_incorrect_semicolon(&items);
}

if !self.eat(term) {
let token_str = self.this_token_descr();
if !self.maybe_consume_incorrect_semicolon(&items) {
let mut err = self.fatal(&format!("expected item, found {}", token_str));
err.span_label(self.token.span, "expected item");
return Err(err);
}
}

let hi = if self.token.span.is_dummy() {
inner_lo
} else {
self.prev_span
};

Ok(Mod {
inner: inner_lo.to(hi),
items,
inline: true
})
}

fn submod_path(
&mut self,
id: ast::Ident,
outer_attrs: &[Attribute],
id_sp: Span
) -> PResult<'a, ModulePathSuccess> {
if let Some(path) = Parser::submod_path_from_attr(outer_attrs, &self.directory.path) {
return Ok(ModulePathSuccess {
directory_ownership: match path.file_name().and_then(|s| s.to_str()) {
// All `#[path]` files are treated as though they are a `mod.rs` file.
// This means that `mod foo;` declarations inside `#[path]`-included
// files are siblings,
//
// Note that this will produce weirdness when a file named `foo.rs` is
// `#[path]` included and contains a `mod foo;` declaration.
// If you encounter this, it's your own darn fault :P
Some(_) => DirectoryOwnership::Owned { relative: None },
_ => DirectoryOwnership::UnownedViaMod(true),
},
path,
warn: false,
});
}

let relative = match self.directory.ownership {
DirectoryOwnership::Owned { relative } => relative,
DirectoryOwnership::UnownedViaBlock |
DirectoryOwnership::UnownedViaMod(_) => None,
};
let paths = Parser::default_submod_path(
id, relative, &self.directory.path, self.sess.source_map());

match self.directory.ownership {
DirectoryOwnership::Owned { .. } => {
paths.result.map_err(|err| self.span_fatal_err(id_sp, err))
},
DirectoryOwnership::UnownedViaBlock => {
let msg =
"Cannot declare a non-inline module inside a block \
unless it has a path attribute";
let mut err = self.diagnostic().struct_span_err(id_sp, msg);
if paths.path_exists {
let msg = format!("Maybe `use` the module `{}` instead of redeclaring it",
paths.name);
err.span_note(id_sp, &msg);
}
Err(err)
}
DirectoryOwnership::UnownedViaMod(warn) => {
if warn {
if let Ok(result) = paths.result {
return Ok(ModulePathSuccess { warn: true, ..result });
}
}
let mut err = self.diagnostic().struct_span_err(id_sp,
"cannot declare a new module at this location");
if !id_sp.is_dummy() {
let src_path = self.sess.source_map().span_to_filename(id_sp);
if let FileName::Real(src_path) = src_path {
if let Some(stem) = src_path.file_stem() {
let mut dest_path = src_path.clone();
dest_path.set_file_name(stem);
dest_path.push("mod.rs");
err.span_note(id_sp,
&format!("maybe move this module `{}` to its own \
directory via `{}`", src_path.display(),
dest_path.display()));
}
}
}
if paths.path_exists {
err.span_note(id_sp,
&format!("... or maybe `use` the module `{}` instead \
of possibly redeclaring it",
paths.name));
}
Err(err)
}
}
}

pub fn submod_path_from_attr(attrs: &[Attribute], dir_path: &Path) -> Option<PathBuf> {
if let Some(s) = attr::first_attr_value_str_by_name(attrs, sym::path) {
let s = s.as_str();

// On windows, the base path might have the form
// `\\?\foo\bar` in which case it does not tolerate
// mixed `/` and `\` separators, so canonicalize
// `/` to `\`.
#[cfg(windows)]
let s = s.replace("/", "\\");
Some(dir_path.join(s))
} else {
None
}
}

/// Returns a path to a module.
pub fn default_submod_path(
id: ast::Ident,
relative: Option<ast::Ident>,
dir_path: &Path,
source_map: &SourceMap) -> ModulePath
{
// If we're in a foo.rs file instead of a mod.rs file,
// we need to look for submodules in
// `./foo/<id>.rs` and `./foo/<id>/mod.rs` rather than
// `./<id>.rs` and `./<id>/mod.rs`.
let relative_prefix_string;
let relative_prefix = if let Some(ident) = relative {
relative_prefix_string = format!("{}{}", ident.as_str(), path::MAIN_SEPARATOR);
&relative_prefix_string
} else {
""
};

let mod_name = id.to_string();
let default_path_str = format!("{}{}.rs", relative_prefix, mod_name);
let secondary_path_str = format!("{}{}{}mod.rs",
relative_prefix, mod_name, path::MAIN_SEPARATOR);
let default_path = dir_path.join(&default_path_str);
let secondary_path = dir_path.join(&secondary_path_str);
let default_exists = source_map.file_exists(&default_path);
let secondary_exists = source_map.file_exists(&secondary_path);

let result = match (default_exists, secondary_exists) {
(true, false) => Ok(ModulePathSuccess {
path: default_path,
directory_ownership: DirectoryOwnership::Owned {
relative: Some(id),
},
warn: false,
}),
(false, true) => Ok(ModulePathSuccess {
path: secondary_path,
directory_ownership: DirectoryOwnership::Owned {
relative: None,
},
warn: false,
}),
(false, false) => Err(Error::FileNotFoundForModule {
mod_name: mod_name.clone(),
default_path: default_path_str,
secondary_path: secondary_path_str,
dir_path: dir_path.display().to_string(),
}),
(true, true) => Err(Error::DuplicatePaths {
mod_name: mod_name.clone(),
default_path: default_path_str,
secondary_path: secondary_path_str,
}),
};

ModulePath {
name: mod_name,
path_exists: default_exists || secondary_exists,
result,
}
}

/// Reads a module from a source file.
fn eval_src_mod(
&mut self,
path: PathBuf,
directory_ownership: DirectoryOwnership,
name: String,
id_sp: Span,
) -> PResult<'a, (Mod, Vec<Attribute>)> {
let mut included_mod_stack = self.sess.included_mod_stack.borrow_mut();
if let Some(i) = included_mod_stack.iter().position(|p| *p == path) {
let mut err = String::from("circular modules: ");
let len = included_mod_stack.len();
for p in &included_mod_stack[i.. len] {
err.push_str(&p.to_string_lossy());
err.push_str(" -> ");
}
err.push_str(&path.to_string_lossy());
return Err(self.span_fatal(id_sp, &err[..]));
}
included_mod_stack.push(path.clone());
drop(included_mod_stack);

let mut p0 =
new_sub_parser_from_file(self.sess, &path, directory_ownership, Some(name), id_sp);
p0.cfg_mods = self.cfg_mods;
let mod_inner_lo = p0.token.span;
let mod_attrs = p0.parse_inner_attributes()?;
let mut m0 = p0.parse_mod_items(&token::Eof, mod_inner_lo)?;
m0.inline = false;
self.sess.included_mod_stack.borrow_mut().pop();
Ok((m0, mod_attrs))
}

fn push_directory(&mut self, id: Ident, attrs: &[Attribute]) {
if let Some(path) = attr::first_attr_value_str_by_name(attrs, sym::path) {
self.directory.path.to_mut().push(&path.as_str());
self.directory.ownership = DirectoryOwnership::Owned { relative: None };
} else {
// We have to push on the current module name in the case of relative
// paths in order to ensure that any additional module paths from inline
// `mod x { ... }` come after the relative extension.
//
// For example, a `mod z { ... }` inside `x/y.rs` should set the current
// directory path to `/x/y/z`, not `/x/z` with a relative offset of `y`.
if let DirectoryOwnership::Owned { relative } = &mut self.directory.ownership {
if let Some(ident) = relative.take() { // remove the relative offset
self.directory.path.to_mut().push(ident.as_str());
}
}
self.directory.path.to_mut().push(&id.as_str());
}
}
}
634 changes: 634 additions & 0 deletions src/libsyntax/parse/parser/pat.rs

Large diffs are not rendered by default.

474 changes: 474 additions & 0 deletions src/libsyntax/parse/parser/path.rs

Large diffs are not rendered by default.

458 changes: 458 additions & 0 deletions src/libsyntax/parse/parser/stmt.rs

Large diffs are not rendered by default.

461 changes: 461 additions & 0 deletions src/libsyntax/parse/parser/ty.rs

Large diffs are not rendered by default.