fix(solc): extend sparse mode to linked references (#1107)

* refactor: move sol parsing to separate mod * refactor: make parse a struct function * feat: add inline function * chore: rename sparse filter * feat: move data to edges type * refactor: supply graph in compile function * feat: resolve link references * test: add sparse mode test
2022-04-05 00:50:10 +02:00 · 2022-04-05 00:50:10 +02:00 · 38b984c353
parent 3edbcc1967
commit 38b984c353
7 changed files with 505 additions and 264 deletions
--- a/ethers-solc/src/cache.rs
+++ b/ethers-solc/src/cache.rs
@ -807,6 +807,14 @@ impl<'a, T: ArtifactOutput> ArtifactsCache<'a, T> {
        Ok(cache)
    }

+    /// Returns the graph data for this project
+    pub fn graph(&self) -> &GraphEdges {
+        match self {
+            ArtifactsCache::Ephemeral(graph, _) => graph,
+            ArtifactsCache::Cached(inner) => &inner.edges,
+        }
+    }
+
    #[cfg(test)]
    #[allow(unused)]
    #[doc(hidden)]
--- a/ethers-solc/src/compile/project.rs
+++ b/ethers-solc/src/compile/project.rs
@ -114,7 +114,7 @@ use crate::{
 };
 use rayon::prelude::*;

-use crate::filter::SparseOutputFileFilter;
+use crate::filter::SparseOutputFilter;
 use std::{collections::btree_map::BTreeMap, path::PathBuf, time::Instant};

 #[derive(Debug)]
@ -125,7 +125,7 @@ pub struct ProjectCompiler<'a, T: ArtifactOutput> {
    /// how to compile all the sources
    sources: CompilerSources,
    /// How to select solc [`crate::artifacts::CompilerOutput`] for files
-    sparse_output: SparseOutputFileFilter,
+    sparse_output: SparseOutputFilter,
 }

 impl<'a, T: ArtifactOutput> ProjectCompiler<'a, T> {
@ -184,7 +184,7 @@ impl<'a, T: ArtifactOutput> ProjectCompiler<'a, T> {

    /// Applies the specified filter to be applied when selecting solc output for
    /// specific files to be compiled
-    pub fn with_sparse_output(mut self, sparse_output: impl Into<SparseOutputFileFilter>) -> Self {
+    pub fn with_sparse_output(mut self, sparse_output: impl Into<SparseOutputFilter>) -> Self {
        self.sparse_output = sparse_output.into();
        self
    }
@ -232,7 +232,7 @@ struct PreprocessedState<'a, T: ArtifactOutput> {
    sources: FilteredCompilerSources,
    /// cache that holds [CacheEntry] object if caching is enabled and the project is recompiled
    cache: ArtifactsCache<'a, T>,
-    sparse_output: SparseOutputFileFilter,
+    sparse_output: SparseOutputFilter,
 }

 impl<'a, T: ArtifactOutput> PreprocessedState<'a, T> {
@ -243,6 +243,7 @@ impl<'a, T: ArtifactOutput> PreprocessedState<'a, T> {
            &cache.project().solc_config.settings,
            &cache.project().paths,
            sparse_output,
+            cache.graph(),
        )?;

        Ok(CompiledState { output, cache })
@ -372,14 +373,15 @@ impl FilteredCompilerSources {
        self,
        settings: &Settings,
        paths: &ProjectPathsConfig,
-        sparse_output: SparseOutputFileFilter,
+        sparse_output: SparseOutputFilter,
+        graph: &GraphEdges,
    ) -> Result<AggregatedCompilerOutput> {
        match self {
            FilteredCompilerSources::Sequential(input) => {
-                compile_sequential(input, settings, paths, sparse_output)
+                compile_sequential(input, settings, paths, sparse_output, graph)
            }
            FilteredCompilerSources::Parallel(input, j) => {
-                compile_parallel(input, j, settings, paths, sparse_output)
+                compile_parallel(input, j, settings, paths, sparse_output, graph)
            }
        }
    }
@ -399,7 +401,8 @@ fn compile_sequential(
    input: VersionedFilteredSources,
    settings: &Settings,
    paths: &ProjectPathsConfig,
-    sparse_output: SparseOutputFileFilter,
+    sparse_output: SparseOutputFilter,
+    graph: &GraphEdges,
 ) -> Result<AggregatedCompilerOutput> {
    let mut aggregated = AggregatedCompilerOutput::default();
    tracing::trace!("compiling {} jobs sequentially", input.len());
@ -425,7 +428,7 @@ fn compile_sequential(
        // depending on the composition of the filtered sources, the output selection can be
        // optimized
        let mut opt_settings = settings.clone();
-        let sources = sparse_output.sparse_sources(filtered_sources, &mut opt_settings);
+        let sources = sparse_output.sparse_sources(filtered_sources, &mut opt_settings, graph);

        for input in CompilerInput::with_sources(sources) {
            let actually_dirty = input
@ -475,7 +478,8 @@ fn compile_parallel(
    num_jobs: usize,
    settings: &Settings,
    paths: &ProjectPathsConfig,
-    sparse_output: SparseOutputFileFilter,
+    sparse_output: SparseOutputFilter,
+    graph: &GraphEdges,
 ) -> Result<AggregatedCompilerOutput> {
    debug_assert!(num_jobs > 1);
    tracing::trace!(
@ -501,7 +505,7 @@ fn compile_parallel(
        // depending on the composition of the filtered sources, the output selection can be
        // optimized
        let mut opt_settings = settings.clone();
-        let sources = sparse_output.sparse_sources(filtered_sources, &mut opt_settings);
+        let sources = sparse_output.sparse_sources(filtered_sources, &mut opt_settings, graph);

        for input in CompilerInput::with_sources(sources) {
            let actually_dirty = input
--- a/ethers-solc/src/filter.rs
+++ b/ethers-solc/src/filter.rs
@ -2,6 +2,7 @@

 use crate::{
    artifacts::{output_selection::OutputSelection, Settings},
+    resolver::GraphEdges,
    Source, Sources,
 };
 use std::{
@ -52,7 +53,7 @@ impl FileFilter for TestFileFilter {

 /// A type that can apply a filter to a set of preprocessed [FilteredSources] in order to set sparse
 /// output for specific files
-pub enum SparseOutputFileFilter {
+pub enum SparseOutputFilter {
    /// Sets the configured [OutputSelection] for dirty files only.
    ///
    /// In other words, we request the output of solc only for files that have been detected as
@ -62,79 +63,130 @@ pub enum SparseOutputFileFilter {
    Custom(Box<dyn FileFilter>),
 }

-impl SparseOutputFileFilter {
+impl SparseOutputFilter {
    /// While solc needs all the files to compile the actual _dirty_ files, we can tell solc to
    /// output everything for those dirty files as currently configured in the settings, but output
    /// nothing for the other files that are _not_ dirty.
    ///
    /// This will modify the [OutputSelection] of the [Settings] so that we explicitly select the
    /// files' output based on their state.
-    pub fn sparse_sources(&self, sources: FilteredSources, settings: &mut Settings) -> Sources {
-        fn apply(
-            sources: &FilteredSources,
-            settings: &mut Settings,
-            f: impl Fn(&PathBuf, &FilteredSource) -> bool,
-        ) {
-            let selection = settings
-                .output_selection
-                .as_mut()
-                .remove("*")
-                .unwrap_or_else(OutputSelection::default_file_output_selection);
-
-            for (file, source) in sources.0.iter() {
-                if f(file, source) {
-                    settings
-                        .output_selection
-                        .as_mut()
-                        .insert(format!("{}", file.display()), selection.clone());
-                } else {
-                    tracing::trace!("using pruned output selection for {}", file.display());
-                    settings.output_selection.as_mut().insert(
-                        format!("{}", file.display()),
-                        OutputSelection::empty_file_output_select(),
-                    );
-                }
-            }
-        }
-
+    ///
+    /// This also takes the project's graph as input, this allows us to check if the files the
+    /// filter matches depend on libraries that need to be linked
+    pub fn sparse_sources(
+        &self,
+        sources: FilteredSources,
+        settings: &mut Settings,
+        graph: &GraphEdges,
+    ) -> Sources {
        match self {
-            SparseOutputFileFilter::AllDirty => {
+            SparseOutputFilter::AllDirty => {
                if !sources.all_dirty() {
-                    // settings can be optimized
-                    tracing::trace!(
-                        "optimizing output selection for {}/{} sources",
-                        sources.clean().count(),
-                        sources.len()
-                    );
-                    apply(&sources, settings, |_, source| source.is_dirty())
+                    Self::all_dirty(&sources, settings)
                }
            }
-            SparseOutputFileFilter::Custom(f) => {
-                tracing::trace!("optimizing output selection with custom filter",);
-                apply(&sources, settings, |p, source| source.is_dirty() && f.is_match(p));
+            SparseOutputFilter::Custom(f) => {
+                Self::apply_custom_filter(&sources, settings, graph, f)
            }
        };
        sources.into()
    }
+
+    /// applies a custom filter and prunes the output of those source files for which the filter
+    /// returns `false`.
+    ///
+    /// However, this could in accidentally pruning required link references (imported libraries)
+    /// that will be required at runtime. For example if the filter only matches test files
+    /// `*.t.sol` files and a test file makes use of a library that won't be inlined, then the
+    /// libraries bytecode will be missing. Therefore, we detect all linkReferences of a file
+    /// and treat them as if the filter would also apply to those.
+    #[allow(clippy::borrowed_box)]
+    fn apply_custom_filter(
+        sources: &FilteredSources,
+        settings: &mut Settings,
+        graph: &GraphEdges,
+        f: &Box<dyn FileFilter>,
+    ) {
+        tracing::trace!("optimizing output selection with custom filter",);
+        let selection = settings
+            .output_selection
+            .as_mut()
+            .remove("*")
+            .unwrap_or_else(OutputSelection::default_file_output_selection);
+
+        for (file, source) in sources.0.iter() {
+            let key = format!("{}", file.display());
+            if source.is_dirty() && f.is_match(file) {
+                settings.output_selection.as_mut().insert(key, selection.clone());
+
+                // the filter might not cover link references that will be required by the file, so
+                // we check if the file has any libraries that won't be inlined and include them as
+                // well
+                for link in graph.get_link_references(file) {
+                    settings
+                        .output_selection
+                        .as_mut()
+                        .insert(format!("{}", link.display()), selection.clone());
+                }
+            } else if !settings.output_selection.as_ref().contains_key(&key) {
+                tracing::trace!("using pruned output selection for {}", file.display());
+                settings
+                    .output_selection
+                    .as_mut()
+                    .insert(key, OutputSelection::empty_file_output_select());
+            }
+        }
+    }
+
+    /// prunes all clean sources and only selects an output for dirty sources
+    fn all_dirty(sources: &FilteredSources, settings: &mut Settings) {
+        // settings can be optimized
+        tracing::trace!(
+            "optimizing output selection for {}/{} sources",
+            sources.clean().count(),
+            sources.len()
+        );
+
+        let selection = settings
+            .output_selection
+            .as_mut()
+            .remove("*")
+            .unwrap_or_else(OutputSelection::default_file_output_selection);
+
+        for (file, source) in sources.0.iter() {
+            if source.is_dirty() {
+                settings
+                    .output_selection
+                    .as_mut()
+                    .insert(format!("{}", file.display()), selection.clone());
+            } else {
+                tracing::trace!("using pruned output selection for {}", file.display());
+                settings.output_selection.as_mut().insert(
+                    format!("{}", file.display()),
+                    OutputSelection::empty_file_output_select(),
+                );
+            }
+        }
+    }
 }

-impl From<Box<dyn FileFilter>> for SparseOutputFileFilter {
+impl From<Box<dyn FileFilter>> for SparseOutputFilter {
    fn from(f: Box<dyn FileFilter>) -> Self {
-        SparseOutputFileFilter::Custom(f)
+        SparseOutputFilter::Custom(f)
    }
 }

-impl Default for SparseOutputFileFilter {
+impl Default for SparseOutputFilter {
    fn default() -> Self {
-        SparseOutputFileFilter::AllDirty
+        SparseOutputFilter::AllDirty
    }
 }

-impl fmt::Debug for SparseOutputFileFilter {
+impl fmt::Debug for SparseOutputFilter {
    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
        match self {
-            SparseOutputFileFilter::AllDirty => f.write_str("AllDirty"),
-            SparseOutputFileFilter::Custom(_) => f.write_str("Custom"),
+            SparseOutputFilter::AllDirty => f.write_str("AllDirty"),
+            SparseOutputFilter::Custom(_) => f.write_str("Custom"),
        }
    }
 }
--- a/ethers-solc/src/project_util/mod.rs
+++ b/ethers-solc/src/project_util/mod.rs
@ -7,7 +7,8 @@ use crate::{
    project_util::mock::{MockProjectGenerator, MockProjectSettings},
    utils::tempdir,
    Artifact, ArtifactOutput, Artifacts, ConfigurableArtifacts, ConfigurableContractArtifact,
-    PathStyle, Project, ProjectCompileOutput, ProjectPathsConfig, SolFilesCache, SolcIoError,
+    FileFilter, PathStyle, Project, ProjectCompileOutput, ProjectPathsConfig, SolFilesCache,
+    SolcIoError,
 };
 use fs_extra::{dir, file};
 use std::{
@ -69,6 +70,13 @@ impl<T: ArtifactOutput> TempProject<T> {
        self.project().compile()
    }

+    pub fn compile_sparse<F: FileFilter + 'static>(
+        &self,
+        filter: F,
+    ) -> Result<ProjectCompileOutput<T>> {
+        self.project().compile_sparse(filter)
+    }
+
    pub fn flatten(&self, target: &Path) -> Result<String> {
        self.project().flatten(target)
    }
--- a/ethers-solc/src/resolver/mod.rs
+++ b/ethers-solc/src/resolver/mod.rs
@ -52,21 +52,23 @@ use std::{
    path::{Path, PathBuf},
 };

+use parse::{SolData, SolDataUnit};
 use rayon::prelude::*;
-use regex::Match;
+
 use semver::VersionReq;
-use solang_parser::pt::{Import, Loc, SourceUnitPart};

 use crate::{error::Result, utils, ProjectPathsConfig, Solc, SolcError, Source, Sources};

+mod parse;
 mod tree;
+
 pub use tree::{print, Charset, TreeOptions};

 /// The underlying edges of the graph which only contains the raw relationship data.
 ///
 /// This is kept separate from the `Graph` as the `Node`s get consumed when the `Solc` to `Sources`
 /// set is determined.
-#[derive(Debug, Clone)]
+#[derive(Debug)]
 pub struct GraphEdges {
    /// The indices of `edges` correspond to the `nodes`. That is, `edges[0]`
    /// is the set of outgoing edges for `nodes[0]`.
@ -77,6 +79,8 @@ pub struct GraphEdges {
    rev_indices: HashMap<usize, PathBuf>,
    /// the identified version requirement of a file
    versions: HashMap<usize, Option<VersionReq>>,
+    /// the extracted data from the source file
+    data: HashMap<usize, SolData>,
    /// with how many input files we started with, corresponds to `let input_files =
    /// nodes[..num_input_files]`.
    ///
@ -152,6 +156,22 @@ impl GraphEdges {
            .and_then(|idx| self.versions.get(idx))
            .and_then(|v| v.as_ref())
    }
+
+    /// Returns those library files that will be required as `linkReferences` by the given file
+    ///
+    /// This is a preprocess function that attempts to resolve those libraries that will the
+    /// solidity `file` will be required to link. And further restrict this list to libraries
+    /// that won't be inlined See also [SolLibrary](parse::SolLibrary)
+    pub fn get_link_references(&self, file: impl AsRef<Path>) -> HashSet<&PathBuf> {
+        let mut link_references = HashSet::new();
+        for import in self.all_imported_nodes(self.node_id(file)) {
+            let data = &self.data[&import];
+            if data.has_link_references() {
+                link_references.insert(&self.rev_indices[&import]);
+            }
+        }
+        link_references
+    }
 }

 /// Represents a fully-resolved solidity dependency graph. Each node in the graph
@ -159,6 +179,7 @@ impl GraphEdges {
 /// See also <https://docs.soliditylang.org/en/latest/layout-of-source-files.html?highlight=import#importing-other-source-files>
 #[derive(Debug)]
 pub struct Graph {
+    /// all nodes in the project, a `Node` represents a single file
    nodes: Vec<Node>,
    /// relationship of the nodes
    edges: GraphEdges,
@ -222,11 +243,25 @@ impl Graph {
        self.node_ids(start).map(move |idx| self.node(idx))
    }

+    fn split(self) -> (Vec<(PathBuf, Source)>, GraphEdges) {
+        let Graph { nodes, mut edges, .. } = self;
+        // need to move the extracted data to the edges, essentially splitting the node so we have
+        // access to the data at a later stage in the compile pipeline
+        let mut sources = Vec::new();
+        for (idx, node) in nodes.into_iter().enumerate() {
+            let Node { path, source, data } = node;
+            sources.push((path, source));
+            edges.data.insert(idx, data);
+        }
+
+        (sources, edges)
+    }
+
    /// Consumes the `Graph`, effectively splitting the `nodes` and the `GraphEdges` off and
    /// returning the `nodes` converted to `Sources`
    pub fn into_sources(self) -> (Sources, GraphEdges) {
-        let Graph { nodes, edges, .. } = self;
-        (nodes.into_iter().map(|node| (node.path, node.source)).collect(), edges)
+        let (sources, edges) = self.split();
+        (sources.into_iter().collect(), edges)
    }

    /// Returns an iterator that yields only those nodes that represent input files.
@ -255,7 +290,7 @@ impl Graph {
                resolved_imports.push(idx);
            } else {
                // imported file is not part of the input files
-                let node = read_node(&target)?;
+                let node = Node::read(&target)?;
                unresolved.push_back((target.clone(), node));
                let idx = index.len();
                index.insert(target, idx);
@ -269,7 +304,7 @@ impl Graph {
        let mut unresolved: VecDeque<(PathBuf, Node)> = sources
            .into_par_iter()
            .map(|(path, source)| {
-                let data = parse_data(source.as_ref(), &path);
+                let data = SolData::parse(source.as_ref(), &path);
                (path.clone(), Node { path, source, data })
            })
            .collect();
@ -317,6 +352,7 @@ impl Graph {
                .enumerate()
                .map(|(idx, node)| (idx, node.data.version_req.clone()))
                .collect(),
+            data: Default::default(),
        };
        Ok(Graph { nodes, edges, root: paths.root.clone() })
    }
@ -344,7 +380,7 @@ impl Graph {
        /// cache entry for them as well. This can be optimized however
        fn insert_imports(
            idx: usize,
-            all_nodes: &mut HashMap<usize, Node>,
+            all_nodes: &mut HashMap<usize, (PathBuf, Source)>,
            sources: &mut Sources,
            edges: &[Vec<usize>],
            num_input_files: usize,
@ -354,8 +390,8 @@ impl Graph {
                // nodes are handled separately
                if dep >= num_input_files {
                    // library import
-                    if let Some(node) = all_nodes.remove(&dep) {
-                        sources.insert(node.path, node.source);
+                    if let Some((path, source)) = all_nodes.remove(&dep) {
+                        sources.insert(path, source);
                        insert_imports(dep, all_nodes, sources, edges, num_input_files);
                    }
                }
@ -363,8 +399,10 @@ impl Graph {
        }

        let versioned_nodes = self.get_input_node_versions(offline)?;
-        let Self { nodes, edges, .. } = self;
+        let (nodes, edges) = self.split();
+
        let mut versioned_sources = HashMap::with_capacity(versioned_nodes.len());
+
        let mut all_nodes = nodes.into_iter().enumerate().collect::<HashMap<_, _>>();

        // determine the `Sources` set for each solc version
@ -373,8 +411,8 @@ impl Graph {
            // we only process input nodes (from sources, tests for example)
            for idx in input_node_indices {
                // insert the input node in the sources set and remove it from the available set
-                let node = all_nodes.remove(&idx).expect("node is preset. qed");
-                sources.insert(node.path, node.source);
+                let (path, source) = all_nodes.remove(&idx).expect("node is preset. qed");
+                sources.insert(path, source);
                insert_imports(
                    idx,
                    &mut all_nodes,
@ -692,12 +730,23 @@ impl VersionedSources {

 #[derive(Debug)]
 pub struct Node {
+    /// path of the solidity  file
    path: PathBuf,
+    /// content of the solidity file
    source: Source,
+    /// parsed data
    data: SolData,
 }

 impl Node {
+    /// Reads the content of the file and returns a [Node] containing relevant information
+    pub fn read(file: impl AsRef<Path>) -> crate::Result<Self> {
+        let file = file.as_ref();
+        let source = Source::read(file).map_err(SolcError::Resolve)?;
+        let data = SolData::parse(source.as_ref(), file);
+        Ok(Self { path: file.to_path_buf(), source, data })
+    }
+
    pub fn content(&self) -> &str {
        &self.source.content
    }
@ -732,199 +781,10 @@ impl<'a> fmt::Display for DisplayNode<'a> {
    }
 }

-#[derive(Debug, Clone)]
-#[allow(unused)]
-struct SolData {
-    license: Option<SolDataUnit<String>>,
-    version: Option<SolDataUnit<String>>,
-    imports: Vec<SolDataUnit<PathBuf>>,
-    version_req: Option<VersionReq>,
-}
-
-impl SolData {
-    #[allow(unused)]
-    fn fmt_version<W: std::fmt::Write>(
-        &self,
-        f: &mut W,
-    ) -> std::result::Result<(), std::fmt::Error> {
-        if let Some(ref version) = self.version {
-            write!(f, "({})", version.data)?;
-        }
-        Ok(())
-    }
-}
-
-#[derive(Debug, Clone)]
-pub struct SolDataUnit<T> {
-    loc: Location,
-    data: T,
-}
-#[derive(Debug, Clone)]
-pub struct Location {
-    pub start: usize,
-    pub end: usize,
-}
-
-/// Solidity Data Unit decorated with its location within the file
-impl<T> SolDataUnit<T> {
-    pub fn new(data: T, loc: Location) -> Self {
-        Self { data, loc }
-    }
-
-    /// Returns the underlying data for the unit
-    pub fn data(&self) -> &T {
-        &self.data
-    }
-
-    /// Returns the location of the given data unit
-    pub fn loc(&self) -> (usize, usize) {
-        (self.loc.start, self.loc.end)
-    }
-
-    /// Returns the location of the given data unit adjusted by an offset.
-    /// Used to determine new position of the unit within the file after
-    /// content manipulation.
-    pub fn loc_by_offset(&self, offset: isize) -> (usize, usize) {
-        (
-            offset.saturating_add(self.loc.start as isize) as usize,
-            // make the end location exclusive
-            offset.saturating_add(self.loc.end as isize + 1) as usize,
-        )
-    }
-}
-
-impl From<Match<'_>> for Location {
-    fn from(src: Match) -> Self {
-        Location { start: src.start(), end: src.end() }
-    }
-}
-
-impl From<Loc> for Location {
-    fn from(src: Loc) -> Self {
-        match src {
-            Loc::File(_, start, end) => Location { start, end },
-            _ => Location { start: 0, end: 0 },
-        }
-    }
-}
-
-fn read_node(file: impl AsRef<Path>) -> Result<Node> {
-    let file = file.as_ref();
-    let source = Source::read(file).map_err(SolcError::Resolve)?;
-    let data = parse_data(source.as_ref(), file);
-    Ok(Node { path: file.to_path_buf(), source, data })
-}
-
-/// Extracts the useful data from a solidity source
-///
-/// This will attempt to parse the solidity AST and extract the imports and version pragma. If
-/// parsing fails, we'll fall back to extract that info via regex
-fn parse_data(content: &str, file: &Path) -> SolData {
-    let mut version = None;
-    let mut imports = Vec::<SolDataUnit<PathBuf>>::new();
-    match solang_parser::parse(content, 0) {
-        Ok((units, _)) => {
-            for unit in units.0 {
-                match unit {
-                    SourceUnitPart::PragmaDirective(loc, _, pragma, value) => {
-                        if pragma.name == "solidity" {
-                            // we're only interested in the solidity version pragma
-                            version = Some(SolDataUnit::new(value.string, loc.into()));
-                        }
-                    }
-                    SourceUnitPart::ImportDirective(_, import) => {
-                        let (import, loc) = match import {
-                            Import::Plain(s, l) => (s, l),
-                            Import::GlobalSymbol(s, _, l) => (s, l),
-                            Import::Rename(s, _, l) => (s, l),
-                        };
-                        imports.push(SolDataUnit::new(PathBuf::from(import.string), loc.into()));
-                    }
-                    _ => {}
-                }
-            }
-        }
-        Err(err) => {
-            tracing::trace!(
-                "failed to parse \"{}\" ast: \"{:?}\". Falling back to regex to extract data",
-                file.display(),
-                err
-            );
-            version = capture_outer_and_inner(content, &utils::RE_SOL_PRAGMA_VERSION, &["version"])
-                .first()
-                .map(|(cap, name)| {
-                    SolDataUnit::new(name.as_str().to_owned(), cap.to_owned().into())
-                });
-            imports = capture_imports(content);
-        }
-    };
-    let license = content.lines().next().and_then(|line| {
-        capture_outer_and_inner(line, &utils::RE_SOL_SDPX_LICENSE_IDENTIFIER, &["license"])
-            .first()
-            .map(|(cap, l)| SolDataUnit::new(l.as_str().to_owned(), cap.to_owned().into()))
-    });
-    let version_req = version.as_ref().and_then(|v| Solc::version_req(v.data()).ok());
-    SolData { version_req, version, imports, license }
-}
-
-/// Given the regex and the target string, find all occurrences
-/// of named groups within the string. This method returns
-/// the tuple of matches `(a, b)` where `a` is the match for the
-/// entire regex and `b` is the match for the first named group.
-///
-/// NOTE: This method will return the match for the first named
-/// group, so the order of passed named groups matters.
-fn capture_outer_and_inner<'a>(
-    content: &'a str,
-    regex: &regex::Regex,
-    names: &[&str],
-) -> Vec<(regex::Match<'a>, regex::Match<'a>)> {
-    regex
-        .captures_iter(content)
-        .filter_map(|cap| {
-            let cap_match = names.iter().find_map(|name| cap.name(name));
-            cap_match.and_then(|m| cap.get(0).map(|outer| (outer.to_owned(), m)))
-        })
-        .collect()
-}
-
-fn capture_imports(content: &str) -> Vec<SolDataUnit<PathBuf>> {
-    capture_outer_and_inner(content, &utils::RE_SOL_IMPORT, &["p1", "p2", "p3", "p4"])
-        .iter()
-        .map(|(cap, m)| SolDataUnit::new(PathBuf::from(m.as_str()), cap.to_owned().into()))
-        .collect()
-}
-
 #[cfg(test)]
 mod tests {
    use super::*;

-    #[test]
-    fn can_capture_curly_imports() {
-        let content = r#"
-import { T } from "../Test.sol";
-import {ReentrancyGuard} from "@openzeppelin/contracts/utils/ReentrancyGuard.sol";
-import {DsTest} from "ds-test/test.sol";
-"#;
-
-        let captured_imports =
-            capture_imports(content).into_iter().map(|s| s.data).collect::<Vec<_>>();
-
-        let expected =
-            utils::find_import_paths(content).map(|m| m.as_str().into()).collect::<Vec<PathBuf>>();
-
-        assert_eq!(captured_imports, expected);
-
-        assert_eq!(
-            captured_imports,
-            vec![
-                PathBuf::from("../Test.sol"),
-                "@openzeppelin/contracts/utils/ReentrancyGuard.sol".into(),
-                "ds-test/test.sol".into(),
-            ]
-        );
-    }
-
    #[test]
    fn can_resolve_hardhat_dependency_graph() {
        let root = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("test-data/hardhat-sample");
--- a/ethers-solc/src/resolver/parse.rs
+++ b/ethers-solc/src/resolver/parse.rs
@ -0,0 +1,271 @@
+use crate::{utils, Solc};
+use regex::Match;
+use semver::VersionReq;
+use solang_parser::pt::{
+    ContractPart, ContractTy, FunctionAttribute, FunctionDefinition, Import, Loc, SourceUnitPart,
+    Visibility,
+};
+use std::path::{Path, PathBuf};
+
+/// Represents various information about a solidity file parsed via [solang_parser]
+#[derive(Debug)]
+#[allow(unused)]
+pub struct SolData {
+    pub license: Option<SolDataUnit<String>>,
+    pub version: Option<SolDataUnit<String>>,
+    pub imports: Vec<SolDataUnit<PathBuf>>,
+    pub version_req: Option<VersionReq>,
+    pub libraries: Vec<SolLibrary>,
+    pub contracts: Vec<SolContract>,
+}
+
+impl SolData {
+    #[allow(unused)]
+    pub fn fmt_version<W: std::fmt::Write>(
+        &self,
+        f: &mut W,
+    ) -> std::result::Result<(), std::fmt::Error> {
+        if let Some(ref version) = self.version {
+            write!(f, "({})", version.data)?;
+        }
+        Ok(())
+    }
+
+    /// Extracts the useful data from a solidity source
+    ///
+    /// This will attempt to parse the solidity AST and extract the imports and version pragma. If
+    /// parsing fails, we'll fall back to extract that info via regex
+    pub fn parse(content: &str, file: &Path) -> Self {
+        let mut version = None;
+        let mut imports = Vec::<SolDataUnit<PathBuf>>::new();
+        let mut libraries = Vec::new();
+        let mut contracts = Vec::new();
+
+        match solang_parser::parse(content, 0) {
+            Ok((units, _)) => {
+                for unit in units.0 {
+                    match unit {
+                        SourceUnitPart::PragmaDirective(loc, _, pragma, value) => {
+                            if pragma.name == "solidity" {
+                                // we're only interested in the solidity version pragma
+                                version = Some(SolDataUnit::new(value.string, loc.into()));
+                            }
+                        }
+                        SourceUnitPart::ImportDirective(_, import) => {
+                            let (import, loc) = match import {
+                                Import::Plain(s, l) => (s, l),
+                                Import::GlobalSymbol(s, _, l) => (s, l),
+                                Import::Rename(s, _, l) => (s, l),
+                            };
+                            imports
+                                .push(SolDataUnit::new(PathBuf::from(import.string), loc.into()));
+                        }
+                        SourceUnitPart::ContractDefinition(def) => {
+                            let functions = def
+                                .parts
+                                .into_iter()
+                                .filter_map(|part| match part {
+                                    ContractPart::FunctionDefinition(f) => Some(*f),
+                                    _ => None,
+                                })
+                                .collect();
+                            let name = def.name.name;
+                            match def.ty {
+                                ContractTy::Contract(_) => {
+                                    contracts.push(SolContract { name, functions });
+                                }
+                                ContractTy::Library(_) => {
+                                    libraries.push(SolLibrary { name, functions });
+                                }
+                                _ => {}
+                            }
+                        }
+                        _ => {}
+                    }
+                }
+            }
+            Err(err) => {
+                tracing::trace!(
+                    "failed to parse \"{}\" ast: \"{:?}\". Falling back to regex to extract data",
+                    file.display(),
+                    err
+                );
+                version =
+                    capture_outer_and_inner(content, &utils::RE_SOL_PRAGMA_VERSION, &["version"])
+                        .first()
+                        .map(|(cap, name)| {
+                            SolDataUnit::new(name.as_str().to_owned(), cap.to_owned().into())
+                        });
+                imports = capture_imports(content);
+            }
+        };
+        let license = content.lines().next().and_then(|line| {
+            capture_outer_and_inner(line, &utils::RE_SOL_SDPX_LICENSE_IDENTIFIER, &["license"])
+                .first()
+                .map(|(cap, l)| SolDataUnit::new(l.as_str().to_owned(), cap.to_owned().into()))
+        });
+        let version_req = version.as_ref().and_then(|v| Solc::version_req(v.data()).ok());
+
+        Self { version_req, version, imports, license, libraries, contracts }
+    }
+
+    /// Returns `true` if the solidity file associated with this type contains a solidity library
+    /// that won't be inlined
+    pub fn has_link_references(&self) -> bool {
+        self.libraries.iter().any(|lib| !lib.is_inlined())
+    }
+}
+
+/// Minimal representation of a contract inside a solidity file
+#[derive(Debug)]
+pub struct SolContract {
+    pub name: String,
+    pub functions: Vec<FunctionDefinition>,
+}
+
+/// Minimal representation of a contract inside a solidity file
+#[derive(Debug)]
+pub struct SolLibrary {
+    pub name: String,
+    pub functions: Vec<FunctionDefinition>,
+}
+
+impl SolLibrary {
+    /// Returns `true` if all functions of this library will be inlined.
+    ///
+    /// This checks if all functions are either internal or private, because internal functions can
+    /// only be accessed from within the current contract or contracts deriving from it. They cannot
+    /// be accessed externally. Since they are not exposed to the outside through the contract’s
+    /// ABI, they can take parameters of internal types like mappings or storage references.
+    ///
+    /// See also <https://docs.soliditylang.org/en/latest/contracts.html#libraries>
+    pub fn is_inlined(&self) -> bool {
+        for f in self.functions.iter() {
+            for attr in f.attributes.iter() {
+                if let FunctionAttribute::Visibility(vis) = attr {
+                    match vis {
+                        Visibility::External(_) | Visibility::Public(_) => return false,
+                        _ => {}
+                    }
+                }
+            }
+        }
+        true
+    }
+}
+
+/// Represents an item in a solidity file with its location in the file
+#[derive(Debug, Clone)]
+pub struct SolDataUnit<T> {
+    loc: Location,
+    data: T,
+}
+
+/// Location in a text file buffer
+#[derive(Debug, Clone)]
+pub struct Location {
+    pub start: usize,
+    pub end: usize,
+}
+
+/// Solidity Data Unit decorated with its location within the file
+impl<T> SolDataUnit<T> {
+    pub fn new(data: T, loc: Location) -> Self {
+        Self { data, loc }
+    }
+
+    /// Returns the underlying data for the unit
+    pub fn data(&self) -> &T {
+        &self.data
+    }
+
+    /// Returns the location of the given data unit
+    pub fn loc(&self) -> (usize, usize) {
+        (self.loc.start, self.loc.end)
+    }
+
+    /// Returns the location of the given data unit adjusted by an offset.
+    /// Used to determine new position of the unit within the file after
+    /// content manipulation.
+    pub fn loc_by_offset(&self, offset: isize) -> (usize, usize) {
+        (
+            offset.saturating_add(self.loc.start as isize) as usize,
+            // make the end location exclusive
+            offset.saturating_add(self.loc.end as isize + 1) as usize,
+        )
+    }
+}
+
+impl From<Match<'_>> for Location {
+    fn from(src: Match) -> Self {
+        Location { start: src.start(), end: src.end() }
+    }
+}
+
+impl From<Loc> for Location {
+    fn from(src: Loc) -> Self {
+        match src {
+            Loc::File(_, start, end) => Location { start, end },
+            _ => Location { start: 0, end: 0 },
+        }
+    }
+}
+
+/// Given the regex and the target string, find all occurrences
+/// of named groups within the string. This method returns
+/// the tuple of matches `(a, b)` where `a` is the match for the
+/// entire regex and `b` is the match for the first named group.
+///
+/// NOTE: This method will return the match for the first named
+/// group, so the order of passed named groups matters.
+fn capture_outer_and_inner<'a>(
+    content: &'a str,
+    regex: &regex::Regex,
+    names: &[&str],
+) -> Vec<(regex::Match<'a>, regex::Match<'a>)> {
+    regex
+        .captures_iter(content)
+        .filter_map(|cap| {
+            let cap_match = names.iter().find_map(|name| cap.name(name));
+            cap_match.and_then(|m| cap.get(0).map(|outer| (outer.to_owned(), m)))
+        })
+        .collect()
+}
+
+pub fn capture_imports(content: &str) -> Vec<SolDataUnit<PathBuf>> {
+    capture_outer_and_inner(content, &utils::RE_SOL_IMPORT, &["p1", "p2", "p3", "p4"])
+        .iter()
+        .map(|(cap, m)| SolDataUnit::new(PathBuf::from(m.as_str()), cap.to_owned().into()))
+        .collect()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn can_capture_curly_imports() {
+        let content = r#"
+import { T } from "../Test.sol";
+import {ReentrancyGuard} from "@openzeppelin/contracts/utils/ReentrancyGuard.sol";
+import {DsTest} from "ds-test/test.sol";
+"#;
+
+        let captured_imports =
+            capture_imports(content).into_iter().map(|s| s.data).collect::<Vec<_>>();
+
+        let expected =
+            utils::find_import_paths(content).map(|m| m.as_str().into()).collect::<Vec<PathBuf>>();
+
+        assert_eq!(captured_imports, expected);
+
+        assert_eq!(
+            captured_imports,
+            vec![
+                PathBuf::from("../Test.sol"),
+                "@openzeppelin/contracts/utils/ReentrancyGuard.sol".into(),
+                "ds-test/test.sol".into(),
+            ]
+        );
+    }
+}
--- a/ethers-solc/tests/project.rs
+++ b/ethers-solc/tests/project.rs
@ -12,7 +12,7 @@ use ethers_solc::{
    project_util::*,
    remappings::Remapping,
    ConfigurableArtifacts, ExtraOutputValues, Graph, Project, ProjectCompileOutput,
-    ProjectPathsConfig,
+    ProjectPathsConfig, TestFileFilter,
 };
 use pretty_assertions::assert_eq;

@ -737,7 +737,6 @@ fn can_recompile_with_changes() {

 #[test]
 fn can_recompile_with_lowercase_names() {
-    init_tracing();
    let tmp = TempProject::dapptools().unwrap();

    tmp.add_source(
@ -843,3 +842,42 @@ fn can_recompile_unchanged_with_empty_files() {
    assert!(compiled.find("A").is_some());
    assert!(compiled.find("C").is_some());
 }
+
+#[test]
+fn can_compile_sparse_with_link_references() {
+    let tmp = TempProject::dapptools().unwrap();
+
+    tmp.add_source(
+        "ATest.t.sol",
+        r#"
+    pragma solidity =0.8.12;
+    import {MyLib} from "./mylib.sol";
+    contract ATest {
+      function test_mylib() public returns (uint256) {
+         return MyLib.doStuff();
+      }
+    }
+   "#,
+    )
+    .unwrap();
+
+    tmp.add_source(
+        "mylib.sol",
+        r#"
+    pragma solidity =0.8.12;
+    library MyLib {
+       function doStuff() external pure returns (uint256) {return 1337;}
+    }
+   "#,
+    )
+    .unwrap();
+
+    let mut compiled = tmp.compile_sparse(TestFileFilter::default()).unwrap();
+    assert!(!compiled.has_compiler_errors());
+
+    println!("{}", compiled);
+    assert!(compiled.find("ATest").is_some());
+    assert!(compiled.find("MyLib").is_some());
+    let lib = compiled.remove("MyLib").unwrap();
+    assert!(lib.bytecode.is_some());
+}