From 7463e2da11cab1a2b41ddb31a49f6f6efb98651c Mon Sep 17 00:00:00 2001 From: Matthias Seitz Date: Sat, 5 Feb 2022 15:07:37 +0100 Subject: [PATCH] feat(solc): compiler pipeline improvements (#866) * feat(solc): try to unify solc versions * chore: some tracing * feat: add compile exact --- ethers-solc/src/artifacts.rs | 18 +++- ethers-solc/src/cache.rs | 46 +++++---- ethers-solc/src/compile/contracts.rs | 11 ++ ethers-solc/src/compile/mod.rs | 40 +++++++- ethers-solc/src/compile/project.rs | 36 ++++++- ethers-solc/src/resolver.rs | 145 +++++++++++++++++++++++++-- 6 files changed, 262 insertions(+), 34 deletions(-) diff --git a/ethers-solc/src/artifacts.rs b/ethers-solc/src/artifacts.rs index e0281d74..643f5370 100644 --- a/ethers-solc/src/artifacts.rs +++ b/ethers-solc/src/artifacts.rs @@ -5,7 +5,7 @@ use colored::Colorize; use md5::Digest; use semver::Version; use std::{ - collections::BTreeMap, + collections::{BTreeMap, HashSet}, convert::TryFrom, fmt, fs, path::{Path, PathBuf}, @@ -595,6 +595,22 @@ impl CompilerOutput { pub fn split(self) -> (SourceFiles, OutputContracts) { (SourceFiles(self.sources), OutputContracts(self.contracts)) } + + /// Retains only those files the given iterator yields + /// + /// In other words, removes all contracts for files not included in the iterator + pub fn retain_files<'a, I>(&mut self, files: I) + where + I: IntoIterator, + { + let files: HashSet<_> = files.into_iter().collect(); + + self.contracts.retain(|f, _| files.contains(f.as_str())); + self.sources.retain(|f, _| files.contains(f.as_str())); + self.errors.retain(|err| { + err.source_location.as_ref().map(|s| files.contains(s.file.as_str())).unwrap_or(true) + }); + } } /// A wrapper helper type for the `Contracts` type alias diff --git a/ethers-solc/src/cache.rs b/ethers-solc/src/cache.rs index bf1ec42b..c894b517 100644 --- a/ethers-solc/src/cache.rs +++ b/ethers-solc/src/cache.rs @@ -47,10 +47,21 @@ impl SolFilesCache { self.files.is_empty() } + /// How many entries the cache contains where each entry represents a sourc file pub fn len(&self) -> usize { self.files.len() } + /// How many `Artifacts` this cache references, where a source file can have multiple artifacts + pub fn artifacts_len(&self) -> usize { + self.entries().map(|entry| entry.artifacts().count()).sum() + } + + /// Returns an iterator over all `CacheEntry` this cache contains + pub fn entries(&self) -> impl Iterator { + self.files.values() + } + /// Returns the corresponding `CacheEntry` for the file if it exists pub fn entry(&self, file: impl AsRef) -> Option<&CacheEntry> { self.files.get(file.as_ref()) @@ -117,7 +128,7 @@ impl SolFilesCache { let file = fs::File::create(path).map_err(|err| SolcError::io(err, path))?; tracing::trace!( "writing cache with {} entries to json file: \"{}\"", - self.files.len(), + self.len(), path.display() ); serde_json::to_writer_pretty(file, self)?; @@ -528,7 +539,7 @@ pub(crate) struct ArtifactsCacheInner<'a, T: ArtifactOutput> { /// [`crate::ArtifactOutput::on_output()`] all artifacts, their disk paths, are determined and /// can be populated before the updated [`crate::SolFilesCache`] is finally written to disk, /// see [`Cache::finish()`] - pub dirty_entries: HashMap)>, + pub dirty_source_files: HashMap)>, /// the file hashes pub content_hashes: HashMap, } @@ -562,11 +573,11 @@ impl<'a, T: ArtifactOutput> ArtifactsCacheInner<'a, T> { /// /// If there is already an entry available for the file the given version is added to the set fn insert_new_cache_entry(&mut self, file: &Path, source: &Source, version: Version) { - if let Some((_, versions)) = self.dirty_entries.get_mut(file) { + if let Some((_, versions)) = self.dirty_source_files.get_mut(file) { versions.insert(version); } else { let entry = self.create_cache_entry(file, source); - self.dirty_entries.insert(file.to_path_buf(), (entry, HashSet::from([version]))); + self.dirty_source_files.insert(file.to_path_buf(), (entry, HashSet::from([version]))); } } @@ -619,22 +630,20 @@ impl<'a, T: ArtifactOutput> ArtifactsCacheInner<'a, T> { if let Some(hash) = self.content_hashes.get(file) { if let Some(entry) = self.cache.entry(&file) { if entry.content_hash.as_bytes() != hash.as_bytes() { - tracing::trace!( - "changed content hash for cached artifact \"{}\"", - file.display() - ); + tracing::trace!("changed content hash for source file \"{}\"", file.display()); return true } if self.project.solc_config != entry.solc_config { - tracing::trace!( - "changed solc config for cached artifact \"{}\"", - file.display() - ); + tracing::trace!("changed solc config for source file \"{}\"", file.display()); return true } if !entry.contains_version(version) { - tracing::trace!("missing linked artifacts for version \"{}\"", version); + tracing::trace!( + "missing linked artifacts for source file `{}` for version \"{}\"", + file.display(), + version + ); return true } @@ -689,7 +698,7 @@ impl<'a, T: ArtifactOutput> ArtifactsCache<'a, T> { // read all artifacts let cached_artifacts = if project.paths.artifacts.exists() { - tracing::trace!("reading artifacts from cache.."); + tracing::trace!("reading artifacts from cache..."); // if we failed to read the whole set of artifacts we use an empty set let artifacts = cache.read_artifacts::().unwrap_or_default(); tracing::trace!("read {} artifacts from cache", artifacts.artifact_files().count()); @@ -704,7 +713,7 @@ impl<'a, T: ArtifactOutput> ArtifactsCache<'a, T> { edges, project, filtered: Default::default(), - dirty_entries: Default::default(), + dirty_source_files: Default::default(), content_hashes: Default::default(), }; @@ -755,7 +764,7 @@ impl<'a, T: ArtifactOutput> ArtifactsCache<'a, T> { let ArtifactsCacheInner { mut cache, mut cached_artifacts, - mut dirty_entries, + mut dirty_source_files, filtered, project, .. @@ -771,7 +780,7 @@ impl<'a, T: ArtifactOutput> ArtifactsCache<'a, T> { // the versions, so we add the artifacts on a file by file basis for (file, artifacts) in written_artifacts.as_ref() { let file_path = Path::new(&file); - if let Some((entry, versions)) = dirty_entries.get_mut(file_path) { + if let Some((entry, versions)) = dirty_source_files.get_mut(file_path) { entry.insert_artifacts(artifacts.iter().map(|(name, artifacts)| { let artifacts = artifacts .iter() @@ -800,7 +809,8 @@ impl<'a, T: ArtifactOutput> ArtifactsCache<'a, T> { } // add the new cache entries to the cache file - cache.extend(dirty_entries.into_iter().map(|(file, (entry, _))| (file, entry))); + cache + .extend(dirty_source_files.into_iter().map(|(file, (entry, _))| (file, entry))); cache.strip_artifact_files_prefixes(project.artifacts_path()); // write to disk diff --git a/ethers-solc/src/compile/contracts.rs b/ethers-solc/src/compile/contracts.rs index 47e6db53..78655cc9 100644 --- a/ethers-solc/src/compile/contracts.rs +++ b/ethers-solc/src/compile/contracts.rs @@ -96,6 +96,17 @@ impl VersionedContracts { }) } + /// Returns an iterator over (`file`, `name`, `Contract`, `Version`) + pub fn contracts_with_files_and_version( + &self, + ) -> impl Iterator { + self.0.iter().flat_map(|(file, contracts)| { + contracts.iter().flat_map(move |(name, c)| { + c.iter().map(move |c| (file, name, &c.contract, &c.version)) + }) + }) + } + /// Returns an iterator over all contracts and their source names. /// /// ``` diff --git a/ethers-solc/src/compile/mod.rs b/ethers-solc/src/compile/mod.rs index a26f0fa7..9b7fd5e5 100644 --- a/ethers-solc/src/compile/mod.rs +++ b/ethers-solc/src/compile/mod.rs @@ -4,11 +4,10 @@ use crate::{ utils, CompilerInput, CompilerOutput, }; use semver::{Version, VersionReq}; -use serde::{de::DeserializeOwned, Serialize}; +use serde::{de::DeserializeOwned, Deserialize, Serialize}; use std::{ fmt, - fmt::Formatter, io::BufRead, path::{Path, PathBuf}, process::{Command, Output, Stdio}, @@ -173,7 +172,7 @@ impl From for Version { } impl fmt::Display for SolcVersion { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", self.as_ref()) } } @@ -186,7 +185,7 @@ impl fmt::Display for SolcVersion { /// 1. `SOLC_PATH` environment variable /// 2. [svm](https://github.com/roynalnaruto/svm-rs)'s `global_version` (set via `svm use `), stored at `/.global_version` /// 3. `solc` otherwise -#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Ord)] +#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Ord, Serialize, Deserialize)] pub struct Solc { /// Path to the `solc` executable pub solc: PathBuf, @@ -213,6 +212,16 @@ impl Default for Solc { } } +impl fmt::Display for Solc { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.solc.display())?; + if !self.args.is_empty() { + write!(f, " {}", self.args.join(" "))?; + } + Ok(()) + } +} + impl Solc { /// A new instance which points to `solc` pub fn new(path: impl Into) -> Self { @@ -466,6 +475,29 @@ impl Solc { self.compile(&CompilerInput::new(path)?) } + /// Same as [`Self::compile()`], but only returns those files which are included in the + /// `CompilerInput`. + /// + /// In other words, this removes those files from the `CompilerOutput` that are __not__ included + /// in the provided `CompilerInput`. + /// + /// # Example + /// + /// ```no_run + /// # fn main() -> Result<(), Box> { + /// use ethers_solc::{CompilerInput, Solc}; + /// let solc = Solc::default(); + /// let input = CompilerInput::new("./contracts")?; + /// let output = solc.compile_exact(&input)?; + /// # Ok(()) + /// # } + /// ``` + pub fn compile_exact(&self, input: &CompilerInput) -> Result { + let mut out = self.compile(input)?; + out.retain_files(input.sources.keys().filter_map(|p| p.to_str())); + Ok(out) + } + /// Run `solc --stand-json` and return the `solc`'s output as /// `CompilerOutput` /// diff --git a/ethers-solc/src/compile/project.rs b/ethers-solc/src/compile/project.rs index 5708494d..cfdb5b82 100644 --- a/ethers-solc/src/compile/project.rs +++ b/ethers-solc/src/compile/project.rs @@ -301,6 +301,15 @@ impl CompilerSources { CompilerSources::Parallel(input, j) => compile_parallel(input, j, settings, paths), } } + + #[cfg(test)] + #[allow(unused)] + fn sources(&self) -> &VersionedSources { + match self { + CompilerSources::Sequential(v) => v, + CompilerSources::Parallel(v, _) => v, + } + } } /// Compiles the input set sequentially and returns an aggregated set of the solc `CompilerOutput`s @@ -350,7 +359,11 @@ fn compile_parallel( paths: &ProjectPathsConfig, ) -> Result { debug_assert!(num_jobs > 1); - tracing::trace!("compile sources in parallel using up to {} solc jobs", num_jobs); + tracing::trace!( + "compile {} sources in parallel using up to {} solc jobs", + input.len(), + num_jobs + ); let mut jobs = Vec::with_capacity(input.len()); for (solc, (version, sources)) in input { @@ -384,6 +397,8 @@ fn compile_parallel( .collect::>>() })?; + // TODO need to do post filtering as the output can contain more files than provided in the + // input let mut aggregated = AggregatedCompilerOutput::default(); aggregated.extend_all(outputs); @@ -395,6 +410,7 @@ fn compile_parallel( mod tests { use super::*; use crate::{project_util::TempProject, MinimalCombinedArtifacts}; + use std::path::PathBuf; #[allow(unused)] @@ -415,7 +431,7 @@ mod tests { let prep = compiler.preprocess().unwrap(); let cache = prep.cache.as_cached().unwrap(); // 3 contracts - assert_eq!(cache.dirty_entries.len(), 3); + assert_eq!(cache.dirty_source_files.len(), 3); assert!(cache.filtered.is_empty()); assert!(cache.cache.is_empty()); @@ -435,6 +451,20 @@ mod tests { let inner = project.project(); let compiler = ProjectCompiler::new(inner).unwrap(); let prep = compiler.preprocess().unwrap(); - assert!(prep.cache.as_cached().unwrap().dirty_entries.is_empty()) + assert!(prep.cache.as_cached().unwrap().dirty_source_files.is_empty()) + } + + #[test] + #[ignore] + fn can_compile_real_project() { + init_tracing(); + let paths = ProjectPathsConfig::builder() + .root("../../foundry-integration-tests/testdata/solmate") + .build() + .unwrap(); + let project = Project::builder().paths(paths).build().unwrap(); + let compiler = ProjectCompiler::new(&project).unwrap(); + let out = compiler.compile().unwrap(); + println!("{}", out); } } diff --git a/ethers-solc/src/resolver.rs b/ethers-solc/src/resolver.rs index df46fcb4..4b5f51bd 100644 --- a/ethers-solc/src/resolver.rs +++ b/ethers-solc/src/resolver.rs @@ -14,6 +14,26 @@ //! Finding all dependencies is fairly simple, we're simply doing a DFS, starting the source //! contracts //! +//! ## Solc version auto-detection +//! +//! Solving a constraint graph is an NP-hard problem. The algorithm for finding the "best" solution +//! makes several assumptions and tries to find a version of "Solc" that is compatible with all +//! source files. +//! +//! The algorithm employed here is fairly simple, we simply do a DFS over all the source files and +//! find the set of Solc versions that the file and all its imports are compatible with, and then we +//! try to find a single Solc version that is compatible with all the files. This is effectively the +//! intersection of all version sets. +//! +//! We always try to activate the highest (installed) solc version first. Uninstalled solc is only +//! used if this version is the only compatible version for a single file or in the intersection of +//! all version sets. +//! +//! This leads to finding the optimal version, if there is one. If there is no single Solc version +//! that is compatible with all sources and their imports, then suddenly this becomes a very +//! difficult problem, because what would be the "best" solution. In this case, just choose the +//! latest (installed) Solc version and try to minimize the number of Solc versions used. +//! //! ## Performance //! //! Note that this is a relatively performance-critical portion of the ethers-solc preprocessing. @@ -375,14 +395,21 @@ impl Graph { let mut erroneous_nodes = std::collections::HashSet::with_capacity(self.edges.num_input_files); + // the sorted list of all versions let all_versions = if offline { Solc::installed_versions() } else { Solc::all_versions() }; - // stores all versions and their nodes + // stores all versions and their nodes that can be compiled let mut versioned_nodes = HashMap::new(); + // stores all files and the versions they're compatible with + let mut all_candidates = Vec::with_capacity(self.edges.num_input_files); + // walking through the node's dep tree and filtering the versions along the way for idx in 0..self.edges.num_input_files { let mut candidates = all_versions.iter().collect::>(); + // dbg!(candidates.len()); + // remove all incompatible versions from the candidates list by checking the node and + // all its imports self.retain_compatible_versions(idx, &mut candidates); if candidates.is_empty() && !erroneous_nodes.contains(&idx) { @@ -394,17 +421,35 @@ impl Graph { )); erroneous_nodes.insert(idx); } else { - let candidate = (*candidates - .iter() - .rev() - .find(|v| v.is_installed()) - .or_else(|| candidates.iter().last()) - .unwrap()) - .clone(); + // found viable candidates, pick the most recent version that's already installed + let candidate = + if let Some(pos) = candidates.iter().rposition(|v| v.is_installed()) { + candidates[pos] + } else { + candidates.last().expect("not empty; qed.") + } + .clone(); + + // also store all possible candidates to optimize the set + all_candidates.push((idx, candidates.into_iter().collect::>())); + versioned_nodes.entry(candidate).or_insert_with(|| Vec::with_capacity(1)).push(idx); } } + // detected multiple versions but there might still exist a single version that satisfies + // all sources + if versioned_nodes.len() > 1 { + versioned_nodes = Self::resolve_multiple_versions(all_candidates); + } + + if versioned_nodes.len() == 1 { + tracing::trace!( + "found exact solc version for all sources \"{}\"", + versioned_nodes.keys().next().unwrap() + ); + } + if errors.is_empty() { tracing::trace!( "resolved {} versions {:?}", @@ -417,6 +462,90 @@ impl Graph { Err(crate::error::SolcError::msg(errors.join("\n"))) } } + + /// Tries to find the "best" set of versions to nodes, See [Solc version + /// auto-detection](#solc-version-auto-detection) + /// + /// This is a bit inefficient but is fine, the max. number of versions is ~80 and there's + /// a high chance that the number of source files is <50, even for larger projects. + fn resolve_multiple_versions( + all_candidates: Vec<(usize, HashSet<&crate::SolcVersion>)>, + ) -> HashMap> { + // returns the intersection as sorted set of nodes + fn intersection<'a>( + mut sets: Vec<&HashSet<&'a crate::SolcVersion>>, + ) -> Vec<&'a crate::SolcVersion> { + if sets.is_empty() { + return Vec::new() + } + + let mut result = sets.pop().cloned().expect("not empty; qed.").clone(); + if sets.len() > 1 { + result.retain(|item| sets.iter().all(|set| set.contains(item))); + } + + let mut v = result.into_iter().collect::>(); + v.sort_unstable(); + v + } + + /// returns the highest version that is installed + /// if the candidates set only contains uninstalled versions then this returns the highest + /// uninstalled version + fn remove_candidate(candidates: &mut Vec<&crate::SolcVersion>) -> crate::SolcVersion { + debug_assert!(!candidates.is_empty()); + + if let Some(pos) = candidates.iter().rposition(|v| v.is_installed()) { + candidates.remove(pos) + } else { + candidates.pop().expect("not empty; qed.") + } + .clone() + } + + let all_sets = all_candidates.iter().map(|(_, versions)| versions).collect(); + + // find all versions that satisfy all nodes + let mut intersection = intersection(all_sets); + if !intersection.is_empty() { + let exact_version = remove_candidate(&mut intersection); + let all_nodes = all_candidates.into_iter().map(|(node, _)| node).collect(); + tracing::trace!( + "resolved solc version compatible with all sources \"{}\"", + exact_version + ); + return HashMap::from([(exact_version, all_nodes)]) + } + + // no version satisfies all nodes + let mut versioned_nodes: HashMap> = HashMap::new(); + + // try to minimize the set of versions, this is guaranteed to lead to `versioned_nodes.len() + // > 1` as no solc version exists that can satisfy all sources + for (node, versions) in all_candidates { + // need to sort them again + let mut versions = versions.into_iter().collect::>(); + versions.sort_unstable(); + + let candidate = + if let Some(idx) = versions.iter().rposition(|v| versioned_nodes.contains_key(v)) { + // use a version that's already in the set + versions.remove(idx).clone() + } else { + // use the highest version otherwise + remove_candidate(&mut versions) + }; + + versioned_nodes.entry(candidate).or_insert_with(|| Vec::with_capacity(1)).push(node); + } + + tracing::trace!( + "no solc version can satisfy all source files, resolved multiple versions \"{:?}\"", + versioned_nodes.keys() + ); + + versioned_nodes + } } /// An iterator over a node and its dependencies