From 38b984c35361c205ae9694136870c24f4c31e541 Mon Sep 17 00:00:00 2001 From: Matthias Seitz Date: Tue, 5 Apr 2022 00:50:10 +0200 Subject: [PATCH] fix(solc): extend sparse mode to linked references (#1107) * refactor: move sol parsing to separate mod * refactor: make parse a struct function * feat: add inline function * chore: rename sparse filter * feat: move data to edges type * refactor: supply graph in compile function * feat: resolve link references * test: add sparse mode test --- ethers-solc/src/cache.rs | 8 + ethers-solc/src/compile/project.rs | 26 +-- ethers-solc/src/filter.rs | 148 ++++++++++----- ethers-solc/src/project_util/mod.rs | 10 +- ethers-solc/src/resolver/mod.rs | 264 +++++++-------------------- ethers-solc/src/resolver/parse.rs | 271 ++++++++++++++++++++++++++++ ethers-solc/tests/project.rs | 42 ++++- 7 files changed, 505 insertions(+), 264 deletions(-) create mode 100644 ethers-solc/src/resolver/parse.rs diff --git a/ethers-solc/src/cache.rs b/ethers-solc/src/cache.rs index e1f2ed58..f87c4bd4 100644 --- a/ethers-solc/src/cache.rs +++ b/ethers-solc/src/cache.rs @@ -807,6 +807,14 @@ impl<'a, T: ArtifactOutput> ArtifactsCache<'a, T> { Ok(cache) } + /// Returns the graph data for this project + pub fn graph(&self) -> &GraphEdges { + match self { + ArtifactsCache::Ephemeral(graph, _) => graph, + ArtifactsCache::Cached(inner) => &inner.edges, + } + } + #[cfg(test)] #[allow(unused)] #[doc(hidden)] diff --git a/ethers-solc/src/compile/project.rs b/ethers-solc/src/compile/project.rs index 7169582c..b3716b6d 100644 --- a/ethers-solc/src/compile/project.rs +++ b/ethers-solc/src/compile/project.rs @@ -114,7 +114,7 @@ use crate::{ }; use rayon::prelude::*; -use crate::filter::SparseOutputFileFilter; +use crate::filter::SparseOutputFilter; use std::{collections::btree_map::BTreeMap, path::PathBuf, time::Instant}; #[derive(Debug)] @@ -125,7 +125,7 @@ pub struct ProjectCompiler<'a, T: ArtifactOutput> { /// how to compile all the sources sources: CompilerSources, /// How to select solc [`crate::artifacts::CompilerOutput`] for files - sparse_output: SparseOutputFileFilter, + sparse_output: SparseOutputFilter, } impl<'a, T: ArtifactOutput> ProjectCompiler<'a, T> { @@ -184,7 +184,7 @@ impl<'a, T: ArtifactOutput> ProjectCompiler<'a, T> { /// Applies the specified filter to be applied when selecting solc output for /// specific files to be compiled - pub fn with_sparse_output(mut self, sparse_output: impl Into) -> Self { + pub fn with_sparse_output(mut self, sparse_output: impl Into) -> Self { self.sparse_output = sparse_output.into(); self } @@ -232,7 +232,7 @@ struct PreprocessedState<'a, T: ArtifactOutput> { sources: FilteredCompilerSources, /// cache that holds [CacheEntry] object if caching is enabled and the project is recompiled cache: ArtifactsCache<'a, T>, - sparse_output: SparseOutputFileFilter, + sparse_output: SparseOutputFilter, } impl<'a, T: ArtifactOutput> PreprocessedState<'a, T> { @@ -243,6 +243,7 @@ impl<'a, T: ArtifactOutput> PreprocessedState<'a, T> { &cache.project().solc_config.settings, &cache.project().paths, sparse_output, + cache.graph(), )?; Ok(CompiledState { output, cache }) @@ -372,14 +373,15 @@ impl FilteredCompilerSources { self, settings: &Settings, paths: &ProjectPathsConfig, - sparse_output: SparseOutputFileFilter, + sparse_output: SparseOutputFilter, + graph: &GraphEdges, ) -> Result { match self { FilteredCompilerSources::Sequential(input) => { - compile_sequential(input, settings, paths, sparse_output) + compile_sequential(input, settings, paths, sparse_output, graph) } FilteredCompilerSources::Parallel(input, j) => { - compile_parallel(input, j, settings, paths, sparse_output) + compile_parallel(input, j, settings, paths, sparse_output, graph) } } } @@ -399,7 +401,8 @@ fn compile_sequential( input: VersionedFilteredSources, settings: &Settings, paths: &ProjectPathsConfig, - sparse_output: SparseOutputFileFilter, + sparse_output: SparseOutputFilter, + graph: &GraphEdges, ) -> Result { let mut aggregated = AggregatedCompilerOutput::default(); tracing::trace!("compiling {} jobs sequentially", input.len()); @@ -425,7 +428,7 @@ fn compile_sequential( // depending on the composition of the filtered sources, the output selection can be // optimized let mut opt_settings = settings.clone(); - let sources = sparse_output.sparse_sources(filtered_sources, &mut opt_settings); + let sources = sparse_output.sparse_sources(filtered_sources, &mut opt_settings, graph); for input in CompilerInput::with_sources(sources) { let actually_dirty = input @@ -475,7 +478,8 @@ fn compile_parallel( num_jobs: usize, settings: &Settings, paths: &ProjectPathsConfig, - sparse_output: SparseOutputFileFilter, + sparse_output: SparseOutputFilter, + graph: &GraphEdges, ) -> Result { debug_assert!(num_jobs > 1); tracing::trace!( @@ -501,7 +505,7 @@ fn compile_parallel( // depending on the composition of the filtered sources, the output selection can be // optimized let mut opt_settings = settings.clone(); - let sources = sparse_output.sparse_sources(filtered_sources, &mut opt_settings); + let sources = sparse_output.sparse_sources(filtered_sources, &mut opt_settings, graph); for input in CompilerInput::with_sources(sources) { let actually_dirty = input diff --git a/ethers-solc/src/filter.rs b/ethers-solc/src/filter.rs index ad6ee238..0e1d2056 100644 --- a/ethers-solc/src/filter.rs +++ b/ethers-solc/src/filter.rs @@ -2,6 +2,7 @@ use crate::{ artifacts::{output_selection::OutputSelection, Settings}, + resolver::GraphEdges, Source, Sources, }; use std::{ @@ -52,7 +53,7 @@ impl FileFilter for TestFileFilter { /// A type that can apply a filter to a set of preprocessed [FilteredSources] in order to set sparse /// output for specific files -pub enum SparseOutputFileFilter { +pub enum SparseOutputFilter { /// Sets the configured [OutputSelection] for dirty files only. /// /// In other words, we request the output of solc only for files that have been detected as @@ -62,79 +63,130 @@ pub enum SparseOutputFileFilter { Custom(Box), } -impl SparseOutputFileFilter { +impl SparseOutputFilter { /// While solc needs all the files to compile the actual _dirty_ files, we can tell solc to /// output everything for those dirty files as currently configured in the settings, but output /// nothing for the other files that are _not_ dirty. /// /// This will modify the [OutputSelection] of the [Settings] so that we explicitly select the /// files' output based on their state. - pub fn sparse_sources(&self, sources: FilteredSources, settings: &mut Settings) -> Sources { - fn apply( - sources: &FilteredSources, - settings: &mut Settings, - f: impl Fn(&PathBuf, &FilteredSource) -> bool, - ) { - let selection = settings - .output_selection - .as_mut() - .remove("*") - .unwrap_or_else(OutputSelection::default_file_output_selection); - - for (file, source) in sources.0.iter() { - if f(file, source) { - settings - .output_selection - .as_mut() - .insert(format!("{}", file.display()), selection.clone()); - } else { - tracing::trace!("using pruned output selection for {}", file.display()); - settings.output_selection.as_mut().insert( - format!("{}", file.display()), - OutputSelection::empty_file_output_select(), - ); - } - } - } - + /// + /// This also takes the project's graph as input, this allows us to check if the files the + /// filter matches depend on libraries that need to be linked + pub fn sparse_sources( + &self, + sources: FilteredSources, + settings: &mut Settings, + graph: &GraphEdges, + ) -> Sources { match self { - SparseOutputFileFilter::AllDirty => { + SparseOutputFilter::AllDirty => { if !sources.all_dirty() { - // settings can be optimized - tracing::trace!( - "optimizing output selection for {}/{} sources", - sources.clean().count(), - sources.len() - ); - apply(&sources, settings, |_, source| source.is_dirty()) + Self::all_dirty(&sources, settings) } } - SparseOutputFileFilter::Custom(f) => { - tracing::trace!("optimizing output selection with custom filter",); - apply(&sources, settings, |p, source| source.is_dirty() && f.is_match(p)); + SparseOutputFilter::Custom(f) => { + Self::apply_custom_filter(&sources, settings, graph, f) } }; sources.into() } + + /// applies a custom filter and prunes the output of those source files for which the filter + /// returns `false`. + /// + /// However, this could in accidentally pruning required link references (imported libraries) + /// that will be required at runtime. For example if the filter only matches test files + /// `*.t.sol` files and a test file makes use of a library that won't be inlined, then the + /// libraries bytecode will be missing. Therefore, we detect all linkReferences of a file + /// and treat them as if the filter would also apply to those. + #[allow(clippy::borrowed_box)] + fn apply_custom_filter( + sources: &FilteredSources, + settings: &mut Settings, + graph: &GraphEdges, + f: &Box, + ) { + tracing::trace!("optimizing output selection with custom filter",); + let selection = settings + .output_selection + .as_mut() + .remove("*") + .unwrap_or_else(OutputSelection::default_file_output_selection); + + for (file, source) in sources.0.iter() { + let key = format!("{}", file.display()); + if source.is_dirty() && f.is_match(file) { + settings.output_selection.as_mut().insert(key, selection.clone()); + + // the filter might not cover link references that will be required by the file, so + // we check if the file has any libraries that won't be inlined and include them as + // well + for link in graph.get_link_references(file) { + settings + .output_selection + .as_mut() + .insert(format!("{}", link.display()), selection.clone()); + } + } else if !settings.output_selection.as_ref().contains_key(&key) { + tracing::trace!("using pruned output selection for {}", file.display()); + settings + .output_selection + .as_mut() + .insert(key, OutputSelection::empty_file_output_select()); + } + } + } + + /// prunes all clean sources and only selects an output for dirty sources + fn all_dirty(sources: &FilteredSources, settings: &mut Settings) { + // settings can be optimized + tracing::trace!( + "optimizing output selection for {}/{} sources", + sources.clean().count(), + sources.len() + ); + + let selection = settings + .output_selection + .as_mut() + .remove("*") + .unwrap_or_else(OutputSelection::default_file_output_selection); + + for (file, source) in sources.0.iter() { + if source.is_dirty() { + settings + .output_selection + .as_mut() + .insert(format!("{}", file.display()), selection.clone()); + } else { + tracing::trace!("using pruned output selection for {}", file.display()); + settings.output_selection.as_mut().insert( + format!("{}", file.display()), + OutputSelection::empty_file_output_select(), + ); + } + } + } } -impl From> for SparseOutputFileFilter { +impl From> for SparseOutputFilter { fn from(f: Box) -> Self { - SparseOutputFileFilter::Custom(f) + SparseOutputFilter::Custom(f) } } -impl Default for SparseOutputFileFilter { +impl Default for SparseOutputFilter { fn default() -> Self { - SparseOutputFileFilter::AllDirty + SparseOutputFilter::AllDirty } } -impl fmt::Debug for SparseOutputFileFilter { +impl fmt::Debug for SparseOutputFilter { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match self { - SparseOutputFileFilter::AllDirty => f.write_str("AllDirty"), - SparseOutputFileFilter::Custom(_) => f.write_str("Custom"), + SparseOutputFilter::AllDirty => f.write_str("AllDirty"), + SparseOutputFilter::Custom(_) => f.write_str("Custom"), } } } diff --git a/ethers-solc/src/project_util/mod.rs b/ethers-solc/src/project_util/mod.rs index a35a41aa..a415c96a 100644 --- a/ethers-solc/src/project_util/mod.rs +++ b/ethers-solc/src/project_util/mod.rs @@ -7,7 +7,8 @@ use crate::{ project_util::mock::{MockProjectGenerator, MockProjectSettings}, utils::tempdir, Artifact, ArtifactOutput, Artifacts, ConfigurableArtifacts, ConfigurableContractArtifact, - PathStyle, Project, ProjectCompileOutput, ProjectPathsConfig, SolFilesCache, SolcIoError, + FileFilter, PathStyle, Project, ProjectCompileOutput, ProjectPathsConfig, SolFilesCache, + SolcIoError, }; use fs_extra::{dir, file}; use std::{ @@ -69,6 +70,13 @@ impl TempProject { self.project().compile() } + pub fn compile_sparse( + &self, + filter: F, + ) -> Result> { + self.project().compile_sparse(filter) + } + pub fn flatten(&self, target: &Path) -> Result { self.project().flatten(target) } diff --git a/ethers-solc/src/resolver/mod.rs b/ethers-solc/src/resolver/mod.rs index e5d1cbae..621533ef 100644 --- a/ethers-solc/src/resolver/mod.rs +++ b/ethers-solc/src/resolver/mod.rs @@ -52,21 +52,23 @@ use std::{ path::{Path, PathBuf}, }; +use parse::{SolData, SolDataUnit}; use rayon::prelude::*; -use regex::Match; + use semver::VersionReq; -use solang_parser::pt::{Import, Loc, SourceUnitPart}; use crate::{error::Result, utils, ProjectPathsConfig, Solc, SolcError, Source, Sources}; +mod parse; mod tree; + pub use tree::{print, Charset, TreeOptions}; /// The underlying edges of the graph which only contains the raw relationship data. /// /// This is kept separate from the `Graph` as the `Node`s get consumed when the `Solc` to `Sources` /// set is determined. -#[derive(Debug, Clone)] +#[derive(Debug)] pub struct GraphEdges { /// The indices of `edges` correspond to the `nodes`. That is, `edges[0]` /// is the set of outgoing edges for `nodes[0]`. @@ -77,6 +79,8 @@ pub struct GraphEdges { rev_indices: HashMap, /// the identified version requirement of a file versions: HashMap>, + /// the extracted data from the source file + data: HashMap, /// with how many input files we started with, corresponds to `let input_files = /// nodes[..num_input_files]`. /// @@ -152,6 +156,22 @@ impl GraphEdges { .and_then(|idx| self.versions.get(idx)) .and_then(|v| v.as_ref()) } + + /// Returns those library files that will be required as `linkReferences` by the given file + /// + /// This is a preprocess function that attempts to resolve those libraries that will the + /// solidity `file` will be required to link. And further restrict this list to libraries + /// that won't be inlined See also [SolLibrary](parse::SolLibrary) + pub fn get_link_references(&self, file: impl AsRef) -> HashSet<&PathBuf> { + let mut link_references = HashSet::new(); + for import in self.all_imported_nodes(self.node_id(file)) { + let data = &self.data[&import]; + if data.has_link_references() { + link_references.insert(&self.rev_indices[&import]); + } + } + link_references + } } /// Represents a fully-resolved solidity dependency graph. Each node in the graph @@ -159,6 +179,7 @@ impl GraphEdges { /// See also #[derive(Debug)] pub struct Graph { + /// all nodes in the project, a `Node` represents a single file nodes: Vec, /// relationship of the nodes edges: GraphEdges, @@ -222,11 +243,25 @@ impl Graph { self.node_ids(start).map(move |idx| self.node(idx)) } + fn split(self) -> (Vec<(PathBuf, Source)>, GraphEdges) { + let Graph { nodes, mut edges, .. } = self; + // need to move the extracted data to the edges, essentially splitting the node so we have + // access to the data at a later stage in the compile pipeline + let mut sources = Vec::new(); + for (idx, node) in nodes.into_iter().enumerate() { + let Node { path, source, data } = node; + sources.push((path, source)); + edges.data.insert(idx, data); + } + + (sources, edges) + } + /// Consumes the `Graph`, effectively splitting the `nodes` and the `GraphEdges` off and /// returning the `nodes` converted to `Sources` pub fn into_sources(self) -> (Sources, GraphEdges) { - let Graph { nodes, edges, .. } = self; - (nodes.into_iter().map(|node| (node.path, node.source)).collect(), edges) + let (sources, edges) = self.split(); + (sources.into_iter().collect(), edges) } /// Returns an iterator that yields only those nodes that represent input files. @@ -255,7 +290,7 @@ impl Graph { resolved_imports.push(idx); } else { // imported file is not part of the input files - let node = read_node(&target)?; + let node = Node::read(&target)?; unresolved.push_back((target.clone(), node)); let idx = index.len(); index.insert(target, idx); @@ -269,7 +304,7 @@ impl Graph { let mut unresolved: VecDeque<(PathBuf, Node)> = sources .into_par_iter() .map(|(path, source)| { - let data = parse_data(source.as_ref(), &path); + let data = SolData::parse(source.as_ref(), &path); (path.clone(), Node { path, source, data }) }) .collect(); @@ -317,6 +352,7 @@ impl Graph { .enumerate() .map(|(idx, node)| (idx, node.data.version_req.clone())) .collect(), + data: Default::default(), }; Ok(Graph { nodes, edges, root: paths.root.clone() }) } @@ -344,7 +380,7 @@ impl Graph { /// cache entry for them as well. This can be optimized however fn insert_imports( idx: usize, - all_nodes: &mut HashMap, + all_nodes: &mut HashMap, sources: &mut Sources, edges: &[Vec], num_input_files: usize, @@ -354,8 +390,8 @@ impl Graph { // nodes are handled separately if dep >= num_input_files { // library import - if let Some(node) = all_nodes.remove(&dep) { - sources.insert(node.path, node.source); + if let Some((path, source)) = all_nodes.remove(&dep) { + sources.insert(path, source); insert_imports(dep, all_nodes, sources, edges, num_input_files); } } @@ -363,8 +399,10 @@ impl Graph { } let versioned_nodes = self.get_input_node_versions(offline)?; - let Self { nodes, edges, .. } = self; + let (nodes, edges) = self.split(); + let mut versioned_sources = HashMap::with_capacity(versioned_nodes.len()); + let mut all_nodes = nodes.into_iter().enumerate().collect::>(); // determine the `Sources` set for each solc version @@ -373,8 +411,8 @@ impl Graph { // we only process input nodes (from sources, tests for example) for idx in input_node_indices { // insert the input node in the sources set and remove it from the available set - let node = all_nodes.remove(&idx).expect("node is preset. qed"); - sources.insert(node.path, node.source); + let (path, source) = all_nodes.remove(&idx).expect("node is preset. qed"); + sources.insert(path, source); insert_imports( idx, &mut all_nodes, @@ -692,12 +730,23 @@ impl VersionedSources { #[derive(Debug)] pub struct Node { + /// path of the solidity file path: PathBuf, + /// content of the solidity file source: Source, + /// parsed data data: SolData, } impl Node { + /// Reads the content of the file and returns a [Node] containing relevant information + pub fn read(file: impl AsRef) -> crate::Result { + let file = file.as_ref(); + let source = Source::read(file).map_err(SolcError::Resolve)?; + let data = SolData::parse(source.as_ref(), file); + Ok(Self { path: file.to_path_buf(), source, data }) + } + pub fn content(&self) -> &str { &self.source.content } @@ -732,199 +781,10 @@ impl<'a> fmt::Display for DisplayNode<'a> { } } -#[derive(Debug, Clone)] -#[allow(unused)] -struct SolData { - license: Option>, - version: Option>, - imports: Vec>, - version_req: Option, -} - -impl SolData { - #[allow(unused)] - fn fmt_version( - &self, - f: &mut W, - ) -> std::result::Result<(), std::fmt::Error> { - if let Some(ref version) = self.version { - write!(f, "({})", version.data)?; - } - Ok(()) - } -} - -#[derive(Debug, Clone)] -pub struct SolDataUnit { - loc: Location, - data: T, -} -#[derive(Debug, Clone)] -pub struct Location { - pub start: usize, - pub end: usize, -} - -/// Solidity Data Unit decorated with its location within the file -impl SolDataUnit { - pub fn new(data: T, loc: Location) -> Self { - Self { data, loc } - } - - /// Returns the underlying data for the unit - pub fn data(&self) -> &T { - &self.data - } - - /// Returns the location of the given data unit - pub fn loc(&self) -> (usize, usize) { - (self.loc.start, self.loc.end) - } - - /// Returns the location of the given data unit adjusted by an offset. - /// Used to determine new position of the unit within the file after - /// content manipulation. - pub fn loc_by_offset(&self, offset: isize) -> (usize, usize) { - ( - offset.saturating_add(self.loc.start as isize) as usize, - // make the end location exclusive - offset.saturating_add(self.loc.end as isize + 1) as usize, - ) - } -} - -impl From> for Location { - fn from(src: Match) -> Self { - Location { start: src.start(), end: src.end() } - } -} - -impl From for Location { - fn from(src: Loc) -> Self { - match src { - Loc::File(_, start, end) => Location { start, end }, - _ => Location { start: 0, end: 0 }, - } - } -} - -fn read_node(file: impl AsRef) -> Result { - let file = file.as_ref(); - let source = Source::read(file).map_err(SolcError::Resolve)?; - let data = parse_data(source.as_ref(), file); - Ok(Node { path: file.to_path_buf(), source, data }) -} - -/// Extracts the useful data from a solidity source -/// -/// This will attempt to parse the solidity AST and extract the imports and version pragma. If -/// parsing fails, we'll fall back to extract that info via regex -fn parse_data(content: &str, file: &Path) -> SolData { - let mut version = None; - let mut imports = Vec::>::new(); - match solang_parser::parse(content, 0) { - Ok((units, _)) => { - for unit in units.0 { - match unit { - SourceUnitPart::PragmaDirective(loc, _, pragma, value) => { - if pragma.name == "solidity" { - // we're only interested in the solidity version pragma - version = Some(SolDataUnit::new(value.string, loc.into())); - } - } - SourceUnitPart::ImportDirective(_, import) => { - let (import, loc) = match import { - Import::Plain(s, l) => (s, l), - Import::GlobalSymbol(s, _, l) => (s, l), - Import::Rename(s, _, l) => (s, l), - }; - imports.push(SolDataUnit::new(PathBuf::from(import.string), loc.into())); - } - _ => {} - } - } - } - Err(err) => { - tracing::trace!( - "failed to parse \"{}\" ast: \"{:?}\". Falling back to regex to extract data", - file.display(), - err - ); - version = capture_outer_and_inner(content, &utils::RE_SOL_PRAGMA_VERSION, &["version"]) - .first() - .map(|(cap, name)| { - SolDataUnit::new(name.as_str().to_owned(), cap.to_owned().into()) - }); - imports = capture_imports(content); - } - }; - let license = content.lines().next().and_then(|line| { - capture_outer_and_inner(line, &utils::RE_SOL_SDPX_LICENSE_IDENTIFIER, &["license"]) - .first() - .map(|(cap, l)| SolDataUnit::new(l.as_str().to_owned(), cap.to_owned().into())) - }); - let version_req = version.as_ref().and_then(|v| Solc::version_req(v.data()).ok()); - SolData { version_req, version, imports, license } -} - -/// Given the regex and the target string, find all occurrences -/// of named groups within the string. This method returns -/// the tuple of matches `(a, b)` where `a` is the match for the -/// entire regex and `b` is the match for the first named group. -/// -/// NOTE: This method will return the match for the first named -/// group, so the order of passed named groups matters. -fn capture_outer_and_inner<'a>( - content: &'a str, - regex: ®ex::Regex, - names: &[&str], -) -> Vec<(regex::Match<'a>, regex::Match<'a>)> { - regex - .captures_iter(content) - .filter_map(|cap| { - let cap_match = names.iter().find_map(|name| cap.name(name)); - cap_match.and_then(|m| cap.get(0).map(|outer| (outer.to_owned(), m))) - }) - .collect() -} - -fn capture_imports(content: &str) -> Vec> { - capture_outer_and_inner(content, &utils::RE_SOL_IMPORT, &["p1", "p2", "p3", "p4"]) - .iter() - .map(|(cap, m)| SolDataUnit::new(PathBuf::from(m.as_str()), cap.to_owned().into())) - .collect() -} - #[cfg(test)] mod tests { use super::*; - #[test] - fn can_capture_curly_imports() { - let content = r#" -import { T } from "../Test.sol"; -import {ReentrancyGuard} from "@openzeppelin/contracts/utils/ReentrancyGuard.sol"; -import {DsTest} from "ds-test/test.sol"; -"#; - - let captured_imports = - capture_imports(content).into_iter().map(|s| s.data).collect::>(); - - let expected = - utils::find_import_paths(content).map(|m| m.as_str().into()).collect::>(); - - assert_eq!(captured_imports, expected); - - assert_eq!( - captured_imports, - vec![ - PathBuf::from("../Test.sol"), - "@openzeppelin/contracts/utils/ReentrancyGuard.sol".into(), - "ds-test/test.sol".into(), - ] - ); - } - #[test] fn can_resolve_hardhat_dependency_graph() { let root = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("test-data/hardhat-sample"); diff --git a/ethers-solc/src/resolver/parse.rs b/ethers-solc/src/resolver/parse.rs new file mode 100644 index 00000000..eba8b739 --- /dev/null +++ b/ethers-solc/src/resolver/parse.rs @@ -0,0 +1,271 @@ +use crate::{utils, Solc}; +use regex::Match; +use semver::VersionReq; +use solang_parser::pt::{ + ContractPart, ContractTy, FunctionAttribute, FunctionDefinition, Import, Loc, SourceUnitPart, + Visibility, +}; +use std::path::{Path, PathBuf}; + +/// Represents various information about a solidity file parsed via [solang_parser] +#[derive(Debug)] +#[allow(unused)] +pub struct SolData { + pub license: Option>, + pub version: Option>, + pub imports: Vec>, + pub version_req: Option, + pub libraries: Vec, + pub contracts: Vec, +} + +impl SolData { + #[allow(unused)] + pub fn fmt_version( + &self, + f: &mut W, + ) -> std::result::Result<(), std::fmt::Error> { + if let Some(ref version) = self.version { + write!(f, "({})", version.data)?; + } + Ok(()) + } + + /// Extracts the useful data from a solidity source + /// + /// This will attempt to parse the solidity AST and extract the imports and version pragma. If + /// parsing fails, we'll fall back to extract that info via regex + pub fn parse(content: &str, file: &Path) -> Self { + let mut version = None; + let mut imports = Vec::>::new(); + let mut libraries = Vec::new(); + let mut contracts = Vec::new(); + + match solang_parser::parse(content, 0) { + Ok((units, _)) => { + for unit in units.0 { + match unit { + SourceUnitPart::PragmaDirective(loc, _, pragma, value) => { + if pragma.name == "solidity" { + // we're only interested in the solidity version pragma + version = Some(SolDataUnit::new(value.string, loc.into())); + } + } + SourceUnitPart::ImportDirective(_, import) => { + let (import, loc) = match import { + Import::Plain(s, l) => (s, l), + Import::GlobalSymbol(s, _, l) => (s, l), + Import::Rename(s, _, l) => (s, l), + }; + imports + .push(SolDataUnit::new(PathBuf::from(import.string), loc.into())); + } + SourceUnitPart::ContractDefinition(def) => { + let functions = def + .parts + .into_iter() + .filter_map(|part| match part { + ContractPart::FunctionDefinition(f) => Some(*f), + _ => None, + }) + .collect(); + let name = def.name.name; + match def.ty { + ContractTy::Contract(_) => { + contracts.push(SolContract { name, functions }); + } + ContractTy::Library(_) => { + libraries.push(SolLibrary { name, functions }); + } + _ => {} + } + } + _ => {} + } + } + } + Err(err) => { + tracing::trace!( + "failed to parse \"{}\" ast: \"{:?}\". Falling back to regex to extract data", + file.display(), + err + ); + version = + capture_outer_and_inner(content, &utils::RE_SOL_PRAGMA_VERSION, &["version"]) + .first() + .map(|(cap, name)| { + SolDataUnit::new(name.as_str().to_owned(), cap.to_owned().into()) + }); + imports = capture_imports(content); + } + }; + let license = content.lines().next().and_then(|line| { + capture_outer_and_inner(line, &utils::RE_SOL_SDPX_LICENSE_IDENTIFIER, &["license"]) + .first() + .map(|(cap, l)| SolDataUnit::new(l.as_str().to_owned(), cap.to_owned().into())) + }); + let version_req = version.as_ref().and_then(|v| Solc::version_req(v.data()).ok()); + + Self { version_req, version, imports, license, libraries, contracts } + } + + /// Returns `true` if the solidity file associated with this type contains a solidity library + /// that won't be inlined + pub fn has_link_references(&self) -> bool { + self.libraries.iter().any(|lib| !lib.is_inlined()) + } +} + +/// Minimal representation of a contract inside a solidity file +#[derive(Debug)] +pub struct SolContract { + pub name: String, + pub functions: Vec, +} + +/// Minimal representation of a contract inside a solidity file +#[derive(Debug)] +pub struct SolLibrary { + pub name: String, + pub functions: Vec, +} + +impl SolLibrary { + /// Returns `true` if all functions of this library will be inlined. + /// + /// This checks if all functions are either internal or private, because internal functions can + /// only be accessed from within the current contract or contracts deriving from it. They cannot + /// be accessed externally. Since they are not exposed to the outside through the contract’s + /// ABI, they can take parameters of internal types like mappings or storage references. + /// + /// See also + pub fn is_inlined(&self) -> bool { + for f in self.functions.iter() { + for attr in f.attributes.iter() { + if let FunctionAttribute::Visibility(vis) = attr { + match vis { + Visibility::External(_) | Visibility::Public(_) => return false, + _ => {} + } + } + } + } + true + } +} + +/// Represents an item in a solidity file with its location in the file +#[derive(Debug, Clone)] +pub struct SolDataUnit { + loc: Location, + data: T, +} + +/// Location in a text file buffer +#[derive(Debug, Clone)] +pub struct Location { + pub start: usize, + pub end: usize, +} + +/// Solidity Data Unit decorated with its location within the file +impl SolDataUnit { + pub fn new(data: T, loc: Location) -> Self { + Self { data, loc } + } + + /// Returns the underlying data for the unit + pub fn data(&self) -> &T { + &self.data + } + + /// Returns the location of the given data unit + pub fn loc(&self) -> (usize, usize) { + (self.loc.start, self.loc.end) + } + + /// Returns the location of the given data unit adjusted by an offset. + /// Used to determine new position of the unit within the file after + /// content manipulation. + pub fn loc_by_offset(&self, offset: isize) -> (usize, usize) { + ( + offset.saturating_add(self.loc.start as isize) as usize, + // make the end location exclusive + offset.saturating_add(self.loc.end as isize + 1) as usize, + ) + } +} + +impl From> for Location { + fn from(src: Match) -> Self { + Location { start: src.start(), end: src.end() } + } +} + +impl From for Location { + fn from(src: Loc) -> Self { + match src { + Loc::File(_, start, end) => Location { start, end }, + _ => Location { start: 0, end: 0 }, + } + } +} + +/// Given the regex and the target string, find all occurrences +/// of named groups within the string. This method returns +/// the tuple of matches `(a, b)` where `a` is the match for the +/// entire regex and `b` is the match for the first named group. +/// +/// NOTE: This method will return the match for the first named +/// group, so the order of passed named groups matters. +fn capture_outer_and_inner<'a>( + content: &'a str, + regex: ®ex::Regex, + names: &[&str], +) -> Vec<(regex::Match<'a>, regex::Match<'a>)> { + regex + .captures_iter(content) + .filter_map(|cap| { + let cap_match = names.iter().find_map(|name| cap.name(name)); + cap_match.and_then(|m| cap.get(0).map(|outer| (outer.to_owned(), m))) + }) + .collect() +} + +pub fn capture_imports(content: &str) -> Vec> { + capture_outer_and_inner(content, &utils::RE_SOL_IMPORT, &["p1", "p2", "p3", "p4"]) + .iter() + .map(|(cap, m)| SolDataUnit::new(PathBuf::from(m.as_str()), cap.to_owned().into())) + .collect() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn can_capture_curly_imports() { + let content = r#" +import { T } from "../Test.sol"; +import {ReentrancyGuard} from "@openzeppelin/contracts/utils/ReentrancyGuard.sol"; +import {DsTest} from "ds-test/test.sol"; +"#; + + let captured_imports = + capture_imports(content).into_iter().map(|s| s.data).collect::>(); + + let expected = + utils::find_import_paths(content).map(|m| m.as_str().into()).collect::>(); + + assert_eq!(captured_imports, expected); + + assert_eq!( + captured_imports, + vec![ + PathBuf::from("../Test.sol"), + "@openzeppelin/contracts/utils/ReentrancyGuard.sol".into(), + "ds-test/test.sol".into(), + ] + ); + } +} diff --git a/ethers-solc/tests/project.rs b/ethers-solc/tests/project.rs index 4188067f..5c7bf890 100644 --- a/ethers-solc/tests/project.rs +++ b/ethers-solc/tests/project.rs @@ -12,7 +12,7 @@ use ethers_solc::{ project_util::*, remappings::Remapping, ConfigurableArtifacts, ExtraOutputValues, Graph, Project, ProjectCompileOutput, - ProjectPathsConfig, + ProjectPathsConfig, TestFileFilter, }; use pretty_assertions::assert_eq; @@ -737,7 +737,6 @@ fn can_recompile_with_changes() { #[test] fn can_recompile_with_lowercase_names() { - init_tracing(); let tmp = TempProject::dapptools().unwrap(); tmp.add_source( @@ -843,3 +842,42 @@ fn can_recompile_unchanged_with_empty_files() { assert!(compiled.find("A").is_some()); assert!(compiled.find("C").is_some()); } + +#[test] +fn can_compile_sparse_with_link_references() { + let tmp = TempProject::dapptools().unwrap(); + + tmp.add_source( + "ATest.t.sol", + r#" + pragma solidity =0.8.12; + import {MyLib} from "./mylib.sol"; + contract ATest { + function test_mylib() public returns (uint256) { + return MyLib.doStuff(); + } + } + "#, + ) + .unwrap(); + + tmp.add_source( + "mylib.sol", + r#" + pragma solidity =0.8.12; + library MyLib { + function doStuff() external pure returns (uint256) {return 1337;} + } + "#, + ) + .unwrap(); + + let mut compiled = tmp.compile_sparse(TestFileFilter::default()).unwrap(); + assert!(!compiled.has_compiler_errors()); + + println!("{}", compiled); + assert!(compiled.find("ATest").is_some()); + assert!(compiled.find("MyLib").is_some()); + let lib = compiled.remove("MyLib").unwrap(); + assert!(lib.bytecode.is_some()); +}