feat(solc): compiler pipeline improvements (#866)

* feat(solc): try to unify solc versions

* chore: some tracing

* feat: add compile exact
This commit is contained in:
Matthias Seitz 2022-02-05 15:07:37 +01:00 committed by GitHub
parent b2b891dcf2
commit 7463e2da11
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 262 additions and 34 deletions

View File

@ -5,7 +5,7 @@ use colored::Colorize;
use md5::Digest;
use semver::Version;
use std::{
collections::BTreeMap,
collections::{BTreeMap, HashSet},
convert::TryFrom,
fmt, fs,
path::{Path, PathBuf},
@ -595,6 +595,22 @@ impl CompilerOutput {
pub fn split(self) -> (SourceFiles, OutputContracts) {
(SourceFiles(self.sources), OutputContracts(self.contracts))
}
/// Retains only those files the given iterator yields
///
/// In other words, removes all contracts for files not included in the iterator
pub fn retain_files<'a, I>(&mut self, files: I)
where
I: IntoIterator<Item = &'a str>,
{
let files: HashSet<_> = files.into_iter().collect();
self.contracts.retain(|f, _| files.contains(f.as_str()));
self.sources.retain(|f, _| files.contains(f.as_str()));
self.errors.retain(|err| {
err.source_location.as_ref().map(|s| files.contains(s.file.as_str())).unwrap_or(true)
});
}
}
/// A wrapper helper type for the `Contracts` type alias

View File

@ -47,10 +47,21 @@ impl SolFilesCache {
self.files.is_empty()
}
/// How many entries the cache contains where each entry represents a sourc file
pub fn len(&self) -> usize {
self.files.len()
}
/// How many `Artifacts` this cache references, where a source file can have multiple artifacts
pub fn artifacts_len(&self) -> usize {
self.entries().map(|entry| entry.artifacts().count()).sum()
}
/// Returns an iterator over all `CacheEntry` this cache contains
pub fn entries(&self) -> impl Iterator<Item = &CacheEntry> {
self.files.values()
}
/// Returns the corresponding `CacheEntry` for the file if it exists
pub fn entry(&self, file: impl AsRef<Path>) -> Option<&CacheEntry> {
self.files.get(file.as_ref())
@ -117,7 +128,7 @@ impl SolFilesCache {
let file = fs::File::create(path).map_err(|err| SolcError::io(err, path))?;
tracing::trace!(
"writing cache with {} entries to json file: \"{}\"",
self.files.len(),
self.len(),
path.display()
);
serde_json::to_writer_pretty(file, self)?;
@ -528,7 +539,7 @@ pub(crate) struct ArtifactsCacheInner<'a, T: ArtifactOutput> {
/// [`crate::ArtifactOutput::on_output()`] all artifacts, their disk paths, are determined and
/// can be populated before the updated [`crate::SolFilesCache`] is finally written to disk,
/// see [`Cache::finish()`]
pub dirty_entries: HashMap<PathBuf, (CacheEntry, HashSet<Version>)>,
pub dirty_source_files: HashMap<PathBuf, (CacheEntry, HashSet<Version>)>,
/// the file hashes
pub content_hashes: HashMap<PathBuf, String>,
}
@ -562,11 +573,11 @@ impl<'a, T: ArtifactOutput> ArtifactsCacheInner<'a, T> {
///
/// If there is already an entry available for the file the given version is added to the set
fn insert_new_cache_entry(&mut self, file: &Path, source: &Source, version: Version) {
if let Some((_, versions)) = self.dirty_entries.get_mut(file) {
if let Some((_, versions)) = self.dirty_source_files.get_mut(file) {
versions.insert(version);
} else {
let entry = self.create_cache_entry(file, source);
self.dirty_entries.insert(file.to_path_buf(), (entry, HashSet::from([version])));
self.dirty_source_files.insert(file.to_path_buf(), (entry, HashSet::from([version])));
}
}
@ -619,22 +630,20 @@ impl<'a, T: ArtifactOutput> ArtifactsCacheInner<'a, T> {
if let Some(hash) = self.content_hashes.get(file) {
if let Some(entry) = self.cache.entry(&file) {
if entry.content_hash.as_bytes() != hash.as_bytes() {
tracing::trace!(
"changed content hash for cached artifact \"{}\"",
file.display()
);
tracing::trace!("changed content hash for source file \"{}\"", file.display());
return true
}
if self.project.solc_config != entry.solc_config {
tracing::trace!(
"changed solc config for cached artifact \"{}\"",
file.display()
);
tracing::trace!("changed solc config for source file \"{}\"", file.display());
return true
}
if !entry.contains_version(version) {
tracing::trace!("missing linked artifacts for version \"{}\"", version);
tracing::trace!(
"missing linked artifacts for source file `{}` for version \"{}\"",
file.display(),
version
);
return true
}
@ -689,7 +698,7 @@ impl<'a, T: ArtifactOutput> ArtifactsCache<'a, T> {
// read all artifacts
let cached_artifacts = if project.paths.artifacts.exists() {
tracing::trace!("reading artifacts from cache..");
tracing::trace!("reading artifacts from cache...");
// if we failed to read the whole set of artifacts we use an empty set
let artifacts = cache.read_artifacts::<T::Artifact>().unwrap_or_default();
tracing::trace!("read {} artifacts from cache", artifacts.artifact_files().count());
@ -704,7 +713,7 @@ impl<'a, T: ArtifactOutput> ArtifactsCache<'a, T> {
edges,
project,
filtered: Default::default(),
dirty_entries: Default::default(),
dirty_source_files: Default::default(),
content_hashes: Default::default(),
};
@ -755,7 +764,7 @@ impl<'a, T: ArtifactOutput> ArtifactsCache<'a, T> {
let ArtifactsCacheInner {
mut cache,
mut cached_artifacts,
mut dirty_entries,
mut dirty_source_files,
filtered,
project,
..
@ -771,7 +780,7 @@ impl<'a, T: ArtifactOutput> ArtifactsCache<'a, T> {
// the versions, so we add the artifacts on a file by file basis
for (file, artifacts) in written_artifacts.as_ref() {
let file_path = Path::new(&file);
if let Some((entry, versions)) = dirty_entries.get_mut(file_path) {
if let Some((entry, versions)) = dirty_source_files.get_mut(file_path) {
entry.insert_artifacts(artifacts.iter().map(|(name, artifacts)| {
let artifacts = artifacts
.iter()
@ -800,7 +809,8 @@ impl<'a, T: ArtifactOutput> ArtifactsCache<'a, T> {
}
// add the new cache entries to the cache file
cache.extend(dirty_entries.into_iter().map(|(file, (entry, _))| (file, entry)));
cache
.extend(dirty_source_files.into_iter().map(|(file, (entry, _))| (file, entry)));
cache.strip_artifact_files_prefixes(project.artifacts_path());
// write to disk

View File

@ -96,6 +96,17 @@ impl VersionedContracts {
})
}
/// Returns an iterator over (`file`, `name`, `Contract`, `Version`)
pub fn contracts_with_files_and_version(
&self,
) -> impl Iterator<Item = (&String, &String, &Contract, &Version)> {
self.0.iter().flat_map(|(file, contracts)| {
contracts.iter().flat_map(move |(name, c)| {
c.iter().map(move |c| (file, name, &c.contract, &c.version))
})
})
}
/// Returns an iterator over all contracts and their source names.
///
/// ```

View File

@ -4,11 +4,10 @@ use crate::{
utils, CompilerInput, CompilerOutput,
};
use semver::{Version, VersionReq};
use serde::{de::DeserializeOwned, Serialize};
use serde::{de::DeserializeOwned, Deserialize, Serialize};
use std::{
fmt,
fmt::Formatter,
io::BufRead,
path::{Path, PathBuf},
process::{Command, Output, Stdio},
@ -173,7 +172,7 @@ impl From<SolcVersion> for Version {
}
impl fmt::Display for SolcVersion {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.as_ref())
}
}
@ -186,7 +185,7 @@ impl fmt::Display for SolcVersion {
/// 1. `SOLC_PATH` environment variable
/// 2. [svm](https://github.com/roynalnaruto/svm-rs)'s `global_version` (set via `svm use <version>`), stored at `<svm_home>/.global_version`
/// 3. `solc` otherwise
#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Ord)]
#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Ord, Serialize, Deserialize)]
pub struct Solc {
/// Path to the `solc` executable
pub solc: PathBuf,
@ -213,6 +212,16 @@ impl Default for Solc {
}
}
impl fmt::Display for Solc {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.solc.display())?;
if !self.args.is_empty() {
write!(f, " {}", self.args.join(" "))?;
}
Ok(())
}
}
impl Solc {
/// A new instance which points to `solc`
pub fn new(path: impl Into<PathBuf>) -> Self {
@ -466,6 +475,29 @@ impl Solc {
self.compile(&CompilerInput::new(path)?)
}
/// Same as [`Self::compile()`], but only returns those files which are included in the
/// `CompilerInput`.
///
/// In other words, this removes those files from the `CompilerOutput` that are __not__ included
/// in the provided `CompilerInput`.
///
/// # Example
///
/// ```no_run
/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
/// use ethers_solc::{CompilerInput, Solc};
/// let solc = Solc::default();
/// let input = CompilerInput::new("./contracts")?;
/// let output = solc.compile_exact(&input)?;
/// # Ok(())
/// # }
/// ```
pub fn compile_exact(&self, input: &CompilerInput) -> Result<CompilerOutput> {
let mut out = self.compile(input)?;
out.retain_files(input.sources.keys().filter_map(|p| p.to_str()));
Ok(out)
}
/// Run `solc --stand-json` and return the `solc`'s output as
/// `CompilerOutput`
///

View File

@ -301,6 +301,15 @@ impl CompilerSources {
CompilerSources::Parallel(input, j) => compile_parallel(input, j, settings, paths),
}
}
#[cfg(test)]
#[allow(unused)]
fn sources(&self) -> &VersionedSources {
match self {
CompilerSources::Sequential(v) => v,
CompilerSources::Parallel(v, _) => v,
}
}
}
/// Compiles the input set sequentially and returns an aggregated set of the solc `CompilerOutput`s
@ -350,7 +359,11 @@ fn compile_parallel(
paths: &ProjectPathsConfig,
) -> Result<AggregatedCompilerOutput> {
debug_assert!(num_jobs > 1);
tracing::trace!("compile sources in parallel using up to {} solc jobs", num_jobs);
tracing::trace!(
"compile {} sources in parallel using up to {} solc jobs",
input.len(),
num_jobs
);
let mut jobs = Vec::with_capacity(input.len());
for (solc, (version, sources)) in input {
@ -384,6 +397,8 @@ fn compile_parallel(
.collect::<Result<Vec<_>>>()
})?;
// TODO need to do post filtering as the output can contain more files than provided in the
// input
let mut aggregated = AggregatedCompilerOutput::default();
aggregated.extend_all(outputs);
@ -395,6 +410,7 @@ fn compile_parallel(
mod tests {
use super::*;
use crate::{project_util::TempProject, MinimalCombinedArtifacts};
use std::path::PathBuf;
#[allow(unused)]
@ -415,7 +431,7 @@ mod tests {
let prep = compiler.preprocess().unwrap();
let cache = prep.cache.as_cached().unwrap();
// 3 contracts
assert_eq!(cache.dirty_entries.len(), 3);
assert_eq!(cache.dirty_source_files.len(), 3);
assert!(cache.filtered.is_empty());
assert!(cache.cache.is_empty());
@ -435,6 +451,20 @@ mod tests {
let inner = project.project();
let compiler = ProjectCompiler::new(inner).unwrap();
let prep = compiler.preprocess().unwrap();
assert!(prep.cache.as_cached().unwrap().dirty_entries.is_empty())
assert!(prep.cache.as_cached().unwrap().dirty_source_files.is_empty())
}
#[test]
#[ignore]
fn can_compile_real_project() {
init_tracing();
let paths = ProjectPathsConfig::builder()
.root("../../foundry-integration-tests/testdata/solmate")
.build()
.unwrap();
let project = Project::builder().paths(paths).build().unwrap();
let compiler = ProjectCompiler::new(&project).unwrap();
let out = compiler.compile().unwrap();
println!("{}", out);
}
}

View File

@ -14,6 +14,26 @@
//! Finding all dependencies is fairly simple, we're simply doing a DFS, starting the source
//! contracts
//!
//! ## Solc version auto-detection
//!
//! Solving a constraint graph is an NP-hard problem. The algorithm for finding the "best" solution
//! makes several assumptions and tries to find a version of "Solc" that is compatible with all
//! source files.
//!
//! The algorithm employed here is fairly simple, we simply do a DFS over all the source files and
//! find the set of Solc versions that the file and all its imports are compatible with, and then we
//! try to find a single Solc version that is compatible with all the files. This is effectively the
//! intersection of all version sets.
//!
//! We always try to activate the highest (installed) solc version first. Uninstalled solc is only
//! used if this version is the only compatible version for a single file or in the intersection of
//! all version sets.
//!
//! This leads to finding the optimal version, if there is one. If there is no single Solc version
//! that is compatible with all sources and their imports, then suddenly this becomes a very
//! difficult problem, because what would be the "best" solution. In this case, just choose the
//! latest (installed) Solc version and try to minimize the number of Solc versions used.
//!
//! ## Performance
//!
//! Note that this is a relatively performance-critical portion of the ethers-solc preprocessing.
@ -375,14 +395,21 @@ impl Graph {
let mut erroneous_nodes =
std::collections::HashSet::with_capacity(self.edges.num_input_files);
// the sorted list of all versions
let all_versions = if offline { Solc::installed_versions() } else { Solc::all_versions() };
// stores all versions and their nodes
// stores all versions and their nodes that can be compiled
let mut versioned_nodes = HashMap::new();
// stores all files and the versions they're compatible with
let mut all_candidates = Vec::with_capacity(self.edges.num_input_files);
// walking through the node's dep tree and filtering the versions along the way
for idx in 0..self.edges.num_input_files {
let mut candidates = all_versions.iter().collect::<Vec<_>>();
// dbg!(candidates.len());
// remove all incompatible versions from the candidates list by checking the node and
// all its imports
self.retain_compatible_versions(idx, &mut candidates);
if candidates.is_empty() && !erroneous_nodes.contains(&idx) {
@ -394,17 +421,35 @@ impl Graph {
));
erroneous_nodes.insert(idx);
} else {
let candidate = (*candidates
.iter()
.rev()
.find(|v| v.is_installed())
.or_else(|| candidates.iter().last())
.unwrap())
.clone();
// found viable candidates, pick the most recent version that's already installed
let candidate =
if let Some(pos) = candidates.iter().rposition(|v| v.is_installed()) {
candidates[pos]
} else {
candidates.last().expect("not empty; qed.")
}
.clone();
// also store all possible candidates to optimize the set
all_candidates.push((idx, candidates.into_iter().collect::<HashSet<_>>()));
versioned_nodes.entry(candidate).or_insert_with(|| Vec::with_capacity(1)).push(idx);
}
}
// detected multiple versions but there might still exist a single version that satisfies
// all sources
if versioned_nodes.len() > 1 {
versioned_nodes = Self::resolve_multiple_versions(all_candidates);
}
if versioned_nodes.len() == 1 {
tracing::trace!(
"found exact solc version for all sources \"{}\"",
versioned_nodes.keys().next().unwrap()
);
}
if errors.is_empty() {
tracing::trace!(
"resolved {} versions {:?}",
@ -417,6 +462,90 @@ impl Graph {
Err(crate::error::SolcError::msg(errors.join("\n")))
}
}
/// Tries to find the "best" set of versions to nodes, See [Solc version
/// auto-detection](#solc-version-auto-detection)
///
/// This is a bit inefficient but is fine, the max. number of versions is ~80 and there's
/// a high chance that the number of source files is <50, even for larger projects.
fn resolve_multiple_versions(
all_candidates: Vec<(usize, HashSet<&crate::SolcVersion>)>,
) -> HashMap<crate::SolcVersion, Vec<usize>> {
// returns the intersection as sorted set of nodes
fn intersection<'a>(
mut sets: Vec<&HashSet<&'a crate::SolcVersion>>,
) -> Vec<&'a crate::SolcVersion> {
if sets.is_empty() {
return Vec::new()
}
let mut result = sets.pop().cloned().expect("not empty; qed.").clone();
if sets.len() > 1 {
result.retain(|item| sets.iter().all(|set| set.contains(item)));
}
let mut v = result.into_iter().collect::<Vec<_>>();
v.sort_unstable();
v
}
/// returns the highest version that is installed
/// if the candidates set only contains uninstalled versions then this returns the highest
/// uninstalled version
fn remove_candidate(candidates: &mut Vec<&crate::SolcVersion>) -> crate::SolcVersion {
debug_assert!(!candidates.is_empty());
if let Some(pos) = candidates.iter().rposition(|v| v.is_installed()) {
candidates.remove(pos)
} else {
candidates.pop().expect("not empty; qed.")
}
.clone()
}
let all_sets = all_candidates.iter().map(|(_, versions)| versions).collect();
// find all versions that satisfy all nodes
let mut intersection = intersection(all_sets);
if !intersection.is_empty() {
let exact_version = remove_candidate(&mut intersection);
let all_nodes = all_candidates.into_iter().map(|(node, _)| node).collect();
tracing::trace!(
"resolved solc version compatible with all sources \"{}\"",
exact_version
);
return HashMap::from([(exact_version, all_nodes)])
}
// no version satisfies all nodes
let mut versioned_nodes: HashMap<crate::SolcVersion, Vec<usize>> = HashMap::new();
// try to minimize the set of versions, this is guaranteed to lead to `versioned_nodes.len()
// > 1` as no solc version exists that can satisfy all sources
for (node, versions) in all_candidates {
// need to sort them again
let mut versions = versions.into_iter().collect::<Vec<_>>();
versions.sort_unstable();
let candidate =
if let Some(idx) = versions.iter().rposition(|v| versioned_nodes.contains_key(v)) {
// use a version that's already in the set
versions.remove(idx).clone()
} else {
// use the highest version otherwise
remove_candidate(&mut versions)
};
versioned_nodes.entry(candidate).or_insert_with(|| Vec::with_capacity(1)).push(node);
}
tracing::trace!(
"no solc version can satisfy all source files, resolved multiple versions \"{:?}\"",
versioned_nodes.keys()
);
versioned_nodes
}
}
/// An iterator over a node and its dependencies