//! Resolution of the entire dependency graph for a project. //! //! This module implements the core logic in taking all contracts of a project and creating a //! resolved graph with applied remappings for all source contracts. //! //! Some constraints we're working with when resolving contracts //! //! 1. Each file can contain several source units and can have any number of imports/dependencies //! (using the term interchangeably). Each dependency can declare a version range that it is //! compatible with, solidity version pragma. //! 2. A dependency can be imported from any directory, //! see `Remappings` //! //! Finding all dependencies is fairly simple, we're simply doing a DFS, starting the source //! contracts //! //! ## Performance //! //! Note that this is a relatively performance-critical portion of the ethers-solc preprocessing. //! The data that needs to be processed is proportional to the size of the dependency //! graph, which can, depending on the project, often be quite large. //! //! Note that, unlike the solidity compiler, we work with the filesystem, where we have to resolve //! remappings and follow relative paths. We're also limiting the nodes in the graph to solidity //! files, since we're only interested in their //! [version pragma](https://docs.soliditylang.org/en/develop/layout-of-source-files.html#version-pragma), //! which is defined on a per source file basis. use std::{ collections::{HashMap, HashSet, VecDeque}, path::{Path, PathBuf}, }; use rayon::prelude::*; use regex::Match; use semver::VersionReq; use solang_parser::pt::{Import, Loc, SourceUnitPart}; use crate::{error::Result, utils, ProjectPathsConfig, Solc, Source, Sources}; /// The underlying edges of the graph which only contains the raw relationship data. /// /// This is kept separate from the `Graph` as the `Node`s get consumed when the `Solc` to `Sources` /// set is determined. #[derive(Debug, Clone)] pub struct GraphEdges { /// The indices of `edges` correspond to the `nodes`. That is, `edges[0]` /// is the set of outgoing edges for `nodes[0]`. edges: Vec>, /// index maps for a solidity file to an index, for fast lookup. indices: HashMap, /// reverse of `indices` for reverse lookup rev_indices: HashMap, /// the identified version requirement of a file versions: HashMap>, /// with how many input files we started with, corresponds to `let input_files = /// nodes[..num_input_files]`. /// /// Combined with the `indices` this way we can determine if a file was original added to the /// graph as input or was added as resolved import, see [`Self::is_input_file()`] num_input_files: usize, } impl GraphEdges { /// Returns a list of nodes the given node index points to for the given kind. pub fn imported_nodes(&self, from: usize) -> &[usize] { &self.edges[from] } /// Returns all files imported by the given file pub fn imports(&self, file: impl AsRef) -> HashSet<&PathBuf> { if let Some(start) = self.indices.get(file.as_ref()).copied() { NodesIter::new(start, self).skip(1).map(move |idx| &self.rev_indices[&idx]).collect() } else { HashSet::new() } } /// Returns true if the `file` was originally included when the graph was first created and not /// added when all `imports` were resolved pub fn is_input_file(&self, file: impl AsRef) -> bool { if let Some(idx) = self.indices.get(file.as_ref()).copied() { idx < self.num_input_files } else { false } } /// Returns the `VersionReq` for the given file pub fn version_requirement(&self, file: impl AsRef) -> Option<&VersionReq> { self.indices .get(file.as_ref()) .and_then(|idx| self.versions.get(idx)) .and_then(|v| v.as_ref()) } } /// Represents a fully-resolved solidity dependency graph. Each node in the graph /// is a file and edges represent dependencies between them. /// See also https://docs.soliditylang.org/en/latest/layout-of-source-files.html?highlight=import#importing-other-source-files #[derive(Debug)] pub struct Graph { nodes: Vec, /// relationship of the nodes edges: GraphEdges, /// the root of the project this graph represents #[allow(unused)] root: PathBuf, } impl Graph { /// Returns a list of nodes the given node index points to for the given kind. pub fn imported_nodes(&self, from: usize) -> &[usize] { self.edges.imported_nodes(from) } /// Returns all the resolved files and their index in the graph pub fn files(&self) -> &HashMap { &self.edges.indices } /// Gets a node by index. /// /// # Panics /// /// if the `index` node id is not included in the graph pub fn node(&self, index: usize) -> &Node { &self.nodes[index] } /// Returns an iterator that yields all nodes of the dependency tree that the given node id /// spans, starting with the node itself. /// /// # Panics /// /// if the `start` node id is not included in the graph pub fn node_ids(&self, start: usize) -> impl Iterator + '_ { NodesIter::new(start, &self.edges) } /// Same as `Self::node_ids` but returns the actual `Node` pub fn nodes(&self, start: usize) -> impl Iterator + '_ { self.node_ids(start).map(move |idx| self.node(idx)) } /// Consumes the `Graph`, effectively splitting the `nodes` and the `GraphEdges` off and /// returning the `nodes` converted to `Sources` pub fn into_sources(self) -> (Sources, GraphEdges) { let Graph { nodes, edges, .. } = self; (nodes.into_iter().map(|node| (node.path, node.source)).collect(), edges) } /// Returns an iterator that yields only those nodes that represent input files. /// See `Self::resolve_sources` /// This won't yield any resolved library nodes pub fn input_nodes(&self) -> impl Iterator { self.nodes.iter().take(self.edges.num_input_files) } pub fn imports(&self, path: impl AsRef) -> HashSet<&PathBuf> { self.edges.imports(path) } /// Resolves a number of sources within the given config pub fn resolve_sources(paths: &ProjectPathsConfig, sources: Sources) -> Result { /// checks if the given target path was already resolved, if so it adds its id to the list /// of resolved imports. If it hasn't been resolved yet, it queues in the file for /// processing fn add_node( unresolved: &mut VecDeque<(PathBuf, Node)>, index: &mut HashMap, resolved_imports: &mut Vec, target: PathBuf, ) -> Result<()> { if let Some(idx) = index.get(&target).copied() { resolved_imports.push(idx); } else { // imported file is not part of the input files let node = read_node(&target)?; unresolved.push_back((target.clone(), node)); let idx = index.len(); index.insert(target, idx); resolved_imports.push(idx); } Ok(()) } // we start off by reading all input files, which includes all solidity files from the // source and test folder let mut unresolved: VecDeque<(PathBuf, Node)> = sources .into_par_iter() .map(|(path, source)| { let data = parse_data(source.as_ref()); (path.clone(), Node { path, source, data }) }) .collect(); // identifiers of all resolved files let mut index: HashMap<_, _> = unresolved.iter().enumerate().map(|(idx, (p, _))| (p.clone(), idx)).collect(); let num_input_files = unresolved.len(); // contains the files and their dependencies let mut nodes = Vec::with_capacity(unresolved.len()); let mut edges = Vec::with_capacity(unresolved.len()); // now we need to resolve all imports for the source file and those imported from other // locations while let Some((path, node)) = unresolved.pop_front() { let mut resolved_imports = Vec::with_capacity(node.data.imports.len()); // parent directory of the current file let cwd = match path.parent() { Some(inner) => inner, None => continue, }; for import in node.data.imports.iter() { match paths.resolve_import(cwd, import.data()) { Ok(import) => { add_node(&mut unresolved, &mut index, &mut resolved_imports, import)?; } Err(err) => tracing::trace!("failed to resolve import component \"{:?}\"", err), }; } nodes.push(node); edges.push(resolved_imports); } let edges = GraphEdges { edges, rev_indices: index.iter().map(|(k, v)| (*v, k.clone())).collect(), indices: index, num_input_files, versions: nodes .iter() .enumerate() .map(|(idx, node)| (idx, node.data.version_req.clone())) .collect(), }; Ok(Graph { nodes, edges, root: paths.root.clone() }) } /// Resolves the dependencies of a project's source contracts pub fn resolve(paths: &ProjectPathsConfig) -> Result { Self::resolve_sources(paths, paths.read_input_files()?) } } #[cfg(all(feature = "svm", feature = "async"))] impl Graph { /// Consumes the nodes of the graph and returns all input files together with their appropriate /// version and the edges of the graph /// /// First we determine the compatible version for each input file (from sources and test folder, /// see `Self::resolve`) and then we add all resolved library imports. pub fn into_sources_by_version(self, offline: bool) -> Result<(VersionedSources, GraphEdges)> { /// insert the imports of the given node into the sources map /// There can be following graph: /// `A(<=0.8.10) imports C(>0.4.0)` and `B(0.8.11) imports C(>0.4.0)` /// where `C` is a library import, in which case we assign `C` only to the first input file. /// However, it's not required to include them in the solc `CompilerInput` as they would get /// picked up by solc otherwise, but we add them, so we can create a corresponding /// cache entry for them as well. This can be optimized however fn insert_imports( idx: usize, all_nodes: &mut HashMap, sources: &mut Sources, edges: &[Vec], num_input_files: usize, ) { for dep in edges[idx].iter().copied() { // we only process nodes that were added as part of the resolve step because input // nodes are handled separately if dep >= num_input_files { // library import if let Some(node) = all_nodes.remove(&dep) { sources.insert(node.path, node.source); insert_imports(dep, all_nodes, sources, edges, num_input_files); } } } } let versioned_nodes = self.get_input_node_versions(offline)?; let Self { nodes, edges, .. } = self; let mut versioned_sources = HashMap::with_capacity(versioned_nodes.len()); let mut all_nodes = nodes.into_iter().enumerate().collect::>(); // determine the `Sources` set for each solc version for (version, input_node_indices) in versioned_nodes { let mut sources = Sources::new(); // we only process input nodes (from sources, tests for example) for idx in input_node_indices { // insert the input node in the sources set and remove it from the available set let node = all_nodes.remove(&idx).expect("node is preset. qed"); sources.insert(node.path, node.source); insert_imports( idx, &mut all_nodes, &mut sources, &edges.edges, edges.num_input_files, ); } versioned_sources.insert(version, sources); } Ok((VersionedSources { inner: versioned_sources, offline }, edges)) } /// Writes the list of imported files into the given formatter: /// `A (version) imports B (version)` fn format_imports_list( &self, idx: usize, f: &mut W, ) -> std::result::Result<(), std::fmt::Error> { let node = self.node(idx); for dep in self.imported_nodes(idx) { let dep = self.node(*dep); writeln!( f, " {} ({:?}) imports {} ({:?})", utils::source_name(&node.path, &self.root).display(), node.data.version, utils::source_name(&dep.path, &self.root).display(), dep.data.version )?; } for dep in self.imported_nodes(idx) { self.format_imports_list(*dep, f)?; } Ok(()) } /// Filters incompatible versions from the `candidates`. fn retain_compatible_versions(&self, idx: usize, candidates: &mut Vec<&crate::SolcVersion>) { let nodes: HashSet<_> = self.node_ids(idx).collect(); for node in nodes { let node = self.node(node); if let Some(ref req) = node.data.version_req { candidates.retain(|v| req.matches(v.as_ref())); } if candidates.is_empty() { // nothing to filter anymore return } } } /// Ensures that all files are compatible with all of their imports. pub fn ensure_compatible_imports(&self, offline: bool) -> Result<()> { self.get_input_node_versions(offline)?; Ok(()) } /// Returns a map of versions together with the input nodes that are compatible with that /// version. /// /// This will essentially do a DFS on all input sources and their transitive imports and /// checking that all can compiled with the version stated in the input file. /// /// Returns an error message with __all__ input files that don't have compatible imports. /// /// This also attempts to prefer local installations over remote available. /// If `offline` is set to `true` then only already installed. fn get_input_node_versions( &self, offline: bool, ) -> Result>> { tracing::trace!("resolving input node versions"); // this is likely called by an application and will be eventually printed so we don't exit // on first error, instead gather all the errors and return a bundled error message instead let mut errors = Vec::new(); // we also don't want duplicate error diagnostic let mut erroneous_nodes = std::collections::HashSet::with_capacity(self.edges.num_input_files); let all_versions = if offline { Solc::installed_versions() } else { Solc::all_versions() }; // stores all versions and their nodes let mut versioned_nodes = HashMap::new(); // walking through the node's dep tree and filtering the versions along the way for idx in 0..self.edges.num_input_files { let mut candidates = all_versions.iter().collect::>(); self.retain_compatible_versions(idx, &mut candidates); if candidates.is_empty() && !erroneous_nodes.contains(&idx) { let mut msg = String::new(); self.format_imports_list(idx, &mut msg).unwrap(); errors.push(format!( "Discovered incompatible solidity versions in following\n: {}", msg )); erroneous_nodes.insert(idx); } else { let candidate = (*candidates .iter() .rev() .find(|v| v.is_installed()) .or_else(|| candidates.iter().last()) .unwrap()) .clone(); versioned_nodes.entry(candidate).or_insert_with(|| Vec::with_capacity(1)).push(idx); } } if errors.is_empty() { tracing::trace!( "resolved {} versions {:?}", versioned_nodes.len(), versioned_nodes.keys() ); Ok(versioned_nodes) } else { tracing::error!("failed to resolve versions"); Err(crate::error::SolcError::msg(errors.join("\n"))) } } } /// An iterator over a node and its dependencies #[derive(Debug)] pub struct NodesIter<'a> { /// stack of nodes stack: VecDeque, visited: HashSet, graph: &'a GraphEdges, } impl<'a> NodesIter<'a> { fn new(start: usize, graph: &'a GraphEdges) -> Self { Self { stack: VecDeque::from([start]), visited: HashSet::new(), graph } } } impl<'a> Iterator for NodesIter<'a> { type Item = usize; fn next(&mut self) -> Option { let node = self.stack.pop_front()?; if self.visited.insert(node) { // push the node's direct dependencies to the stack if we haven't visited it already self.stack.extend(self.graph.imported_nodes(node).iter().copied()); } Some(node) } } /// Container type for solc versions and their compatible sources #[cfg(all(feature = "svm", feature = "async"))] #[derive(Debug)] pub struct VersionedSources { inner: HashMap, offline: bool, } #[cfg(all(feature = "svm", feature = "async"))] impl VersionedSources { /// Resolves or installs the corresponding `Solc` installation. pub fn get( self, allowed_lib_paths: &crate::AllowedLibPaths, ) -> Result> { use crate::SolcError; // we take the installer lock here to ensure installation checking is done in sync #[cfg(any(test, feature = "tests"))] let _lock = crate::compile::take_solc_installer_lock(); let mut sources_by_version = std::collections::BTreeMap::new(); for (version, sources) in self.inner { if !version.is_installed() { if self.offline { return Err(SolcError::msg(format!( "missing solc \"{}\" installation in offline mode", version ))) } else { Solc::blocking_install(version.as_ref())?; } } let solc = Solc::find_svm_installed_version(version.to_string())?.ok_or_else(|| { SolcError::msg(format!("solc \"{}\" should have been installed", version)) })?; tracing::trace!("verifying solc checksum for {}", solc.solc.display()); if solc.verify_checksum().is_err() { tracing::trace!("corrupted solc version, redownloading \"{}\"", version); Solc::blocking_install(version.as_ref())?; tracing::trace!("reinstalled solc: \"{}\"", version); } let solc = solc.arg("--allow-paths").arg(allowed_lib_paths.to_string()); let version = solc.version()?; sources_by_version.insert(solc, (version, sources)); } Ok(sources_by_version) } } #[derive(Debug)] pub struct Node { path: PathBuf, source: Source, data: SolData, } impl Node { pub fn content(&self) -> &str { &self.source.content } pub fn imports(&self) -> &Vec> { &self.data.imports } pub fn version(&self) -> &Option> { &self.data.version } pub fn license(&self) -> &Option> { &self.data.license } } #[derive(Debug, Clone)] #[allow(unused)] struct SolData { license: Option>, version: Option>, imports: Vec>, version_req: Option, } #[derive(Debug, Clone)] pub struct SolDataUnit { loc: Location, data: T, } #[derive(Debug, Clone)] pub struct Location { pub start: usize, pub end: usize, } /// Solidity Data Unit decorated with its location within the file impl SolDataUnit { pub fn new(data: T, loc: Location) -> Self { Self { data, loc } } /// Returns the underlying data for the unit pub fn data(&self) -> &T { &self.data } /// Returns the location of the given data unit pub fn loc(&self) -> (usize, usize) { (self.loc.start, self.loc.end) } /// Returns the location of the given data unit adjusted by an offset. /// Used to determine new position of the unit within the file after /// content manipulation. pub fn loc_by_offset(&self, offset: isize) -> (usize, usize) { ( offset.saturating_add(self.loc.start as isize) as usize, // make the end location exclusive offset.saturating_add(self.loc.end as isize + 1) as usize, ) } } impl From> for Location { fn from(src: Match) -> Self { Location { start: src.start(), end: src.end() } } } impl From for Location { fn from(src: Loc) -> Self { Location { start: src.1, end: src.2 } } } fn read_node(file: impl AsRef) -> Result { let file = file.as_ref(); let source = Source::read(file)?; let data = parse_data(source.as_ref()); Ok(Node { path: file.to_path_buf(), source, data }) } /// Extracts the useful data from a solidity source /// /// This will attempt to parse the solidity AST and extract the imports and version pragma. If /// parsing fails, we'll fall back to extract that info via regex fn parse_data(content: &str) -> SolData { let mut version = None; let mut imports = Vec::>::new(); match solang_parser::parse(content, 0) { Ok((units, _)) => { for unit in units.0 { match unit { SourceUnitPart::PragmaDirective(loc, _, pragma, value) => { if pragma.name == "solidity" { // we're only interested in the solidity version pragma version = Some(SolDataUnit::new(value.string, loc.into())); } } SourceUnitPart::ImportDirective(_, import) => { let (import, loc) = match import { Import::Plain(s, l) => (s, l), Import::GlobalSymbol(s, _, l) => (s, l), Import::Rename(s, _, l) => (s, l), }; imports.push(SolDataUnit::new(PathBuf::from(import.string), loc.into())); } _ => {} } } } Err(err) => { tracing::trace!( "failed to parse solidity ast: \"{:?}\". Falling back to regex to extract data", err ); version = capture_outer_and_inner(content, &utils::RE_SOL_PRAGMA_VERSION, &["version"]) .first() .map(|(cap, name)| { SolDataUnit::new(name.as_str().to_owned(), cap.to_owned().into()) }); imports = capture_outer_and_inner(content, &utils::RE_SOL_IMPORT, &["p1", "p2", "p3"]) .iter() .map(|(cap, m)| SolDataUnit::new(PathBuf::from(m.as_str()), cap.to_owned().into())) .collect(); } }; let license = content.lines().next().and_then(|line| { capture_outer_and_inner(line, &utils::RE_SOL_SDPX_LICENSE_IDENTIFIER, &["license"]) .first() .map(|(cap, l)| SolDataUnit::new(l.as_str().to_owned(), cap.to_owned().into())) }); let version_req = version.as_ref().and_then(|v| Solc::version_req(v.data()).ok()); SolData { version_req, version, imports, license } } /// Given the regex and the target string, find all occurrences /// of named groups within the string. This method returns /// the tuple of matches `(a, b)` where `a` is the match for the /// entire regex and `b` is the match for the first named group. /// /// NOTE: This method will return the match for the first named /// group, so the order of passed named groups matters. fn capture_outer_and_inner<'a>( content: &'a str, regex: ®ex::Regex, names: &[&str], ) -> Vec<(regex::Match<'a>, regex::Match<'a>)> { regex .captures_iter(content) .filter_map(|cap| { let cap_match = names.iter().find_map(|name| cap.name(name)); cap_match.and_then(|m| cap.get(0).map(|outer| (outer.to_owned(), m))) }) .collect() } #[cfg(test)] mod tests { use super::*; #[test] fn can_resolve_hardhat_dependency_graph() { let root = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("test-data/hardhat-sample"); let paths = ProjectPathsConfig::hardhat(root).unwrap(); let graph = Graph::resolve(&paths).unwrap(); assert_eq!(graph.edges.num_input_files, 1); assert_eq!(graph.files().len(), 2); assert_eq!( graph.files().clone(), HashMap::from([ (paths.sources.join("Greeter.sol"), 0), (paths.root.join("node_modules/hardhat/console.sol"), 1), ]) ); } #[test] fn can_resolve_dapp_dependency_graph() { let root = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("test-data/dapp-sample"); let paths = ProjectPathsConfig::dapptools(root).unwrap(); let graph = Graph::resolve(&paths).unwrap(); assert_eq!(graph.edges.num_input_files, 2); assert_eq!(graph.files().len(), 3); assert_eq!( graph.files().clone(), HashMap::from([ (paths.sources.join("Dapp.sol"), 0), (paths.sources.join("Dapp.t.sol"), 1), (paths.root.join("lib/ds-test/src/test.sol"), 2), ]) ); let dapp_test = graph.node(1); assert_eq!(dapp_test.path, paths.sources.join("Dapp.t.sol")); assert_eq!( dapp_test.data.imports.iter().map(|i| i.data()).collect::>(), vec![&PathBuf::from("ds-test/test.sol"), &PathBuf::from("./Dapp.sol")] ); assert_eq!(graph.imported_nodes(1).to_vec(), vec![2, 0]); } }