fix(solc): extend sparse mode to linked references (#1107)

* refactor: move sol parsing to separate mod

* refactor: make parse a struct function

* feat: add inline function

* chore: rename sparse filter

* feat: move data to edges type

* refactor: supply graph in compile function

* feat: resolve link references

* test: add sparse mode test
This commit is contained in:
Matthias Seitz 2022-04-05 00:50:10 +02:00 committed by GitHub
parent 3edbcc1967
commit 38b984c353
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 505 additions and 264 deletions

View File

@ -807,6 +807,14 @@ impl<'a, T: ArtifactOutput> ArtifactsCache<'a, T> {
Ok(cache)
}
/// Returns the graph data for this project
pub fn graph(&self) -> &GraphEdges {
match self {
ArtifactsCache::Ephemeral(graph, _) => graph,
ArtifactsCache::Cached(inner) => &inner.edges,
}
}
#[cfg(test)]
#[allow(unused)]
#[doc(hidden)]

View File

@ -114,7 +114,7 @@ use crate::{
};
use rayon::prelude::*;
use crate::filter::SparseOutputFileFilter;
use crate::filter::SparseOutputFilter;
use std::{collections::btree_map::BTreeMap, path::PathBuf, time::Instant};
#[derive(Debug)]
@ -125,7 +125,7 @@ pub struct ProjectCompiler<'a, T: ArtifactOutput> {
/// how to compile all the sources
sources: CompilerSources,
/// How to select solc [`crate::artifacts::CompilerOutput`] for files
sparse_output: SparseOutputFileFilter,
sparse_output: SparseOutputFilter,
}
impl<'a, T: ArtifactOutput> ProjectCompiler<'a, T> {
@ -184,7 +184,7 @@ impl<'a, T: ArtifactOutput> ProjectCompiler<'a, T> {
/// Applies the specified filter to be applied when selecting solc output for
/// specific files to be compiled
pub fn with_sparse_output(mut self, sparse_output: impl Into<SparseOutputFileFilter>) -> Self {
pub fn with_sparse_output(mut self, sparse_output: impl Into<SparseOutputFilter>) -> Self {
self.sparse_output = sparse_output.into();
self
}
@ -232,7 +232,7 @@ struct PreprocessedState<'a, T: ArtifactOutput> {
sources: FilteredCompilerSources,
/// cache that holds [CacheEntry] object if caching is enabled and the project is recompiled
cache: ArtifactsCache<'a, T>,
sparse_output: SparseOutputFileFilter,
sparse_output: SparseOutputFilter,
}
impl<'a, T: ArtifactOutput> PreprocessedState<'a, T> {
@ -243,6 +243,7 @@ impl<'a, T: ArtifactOutput> PreprocessedState<'a, T> {
&cache.project().solc_config.settings,
&cache.project().paths,
sparse_output,
cache.graph(),
)?;
Ok(CompiledState { output, cache })
@ -372,14 +373,15 @@ impl FilteredCompilerSources {
self,
settings: &Settings,
paths: &ProjectPathsConfig,
sparse_output: SparseOutputFileFilter,
sparse_output: SparseOutputFilter,
graph: &GraphEdges,
) -> Result<AggregatedCompilerOutput> {
match self {
FilteredCompilerSources::Sequential(input) => {
compile_sequential(input, settings, paths, sparse_output)
compile_sequential(input, settings, paths, sparse_output, graph)
}
FilteredCompilerSources::Parallel(input, j) => {
compile_parallel(input, j, settings, paths, sparse_output)
compile_parallel(input, j, settings, paths, sparse_output, graph)
}
}
}
@ -399,7 +401,8 @@ fn compile_sequential(
input: VersionedFilteredSources,
settings: &Settings,
paths: &ProjectPathsConfig,
sparse_output: SparseOutputFileFilter,
sparse_output: SparseOutputFilter,
graph: &GraphEdges,
) -> Result<AggregatedCompilerOutput> {
let mut aggregated = AggregatedCompilerOutput::default();
tracing::trace!("compiling {} jobs sequentially", input.len());
@ -425,7 +428,7 @@ fn compile_sequential(
// depending on the composition of the filtered sources, the output selection can be
// optimized
let mut opt_settings = settings.clone();
let sources = sparse_output.sparse_sources(filtered_sources, &mut opt_settings);
let sources = sparse_output.sparse_sources(filtered_sources, &mut opt_settings, graph);
for input in CompilerInput::with_sources(sources) {
let actually_dirty = input
@ -475,7 +478,8 @@ fn compile_parallel(
num_jobs: usize,
settings: &Settings,
paths: &ProjectPathsConfig,
sparse_output: SparseOutputFileFilter,
sparse_output: SparseOutputFilter,
graph: &GraphEdges,
) -> Result<AggregatedCompilerOutput> {
debug_assert!(num_jobs > 1);
tracing::trace!(
@ -501,7 +505,7 @@ fn compile_parallel(
// depending on the composition of the filtered sources, the output selection can be
// optimized
let mut opt_settings = settings.clone();
let sources = sparse_output.sparse_sources(filtered_sources, &mut opt_settings);
let sources = sparse_output.sparse_sources(filtered_sources, &mut opt_settings, graph);
for input in CompilerInput::with_sources(sources) {
let actually_dirty = input

View File

@ -2,6 +2,7 @@
use crate::{
artifacts::{output_selection::OutputSelection, Settings},
resolver::GraphEdges,
Source, Sources,
};
use std::{
@ -52,7 +53,7 @@ impl FileFilter for TestFileFilter {
/// A type that can apply a filter to a set of preprocessed [FilteredSources] in order to set sparse
/// output for specific files
pub enum SparseOutputFileFilter {
pub enum SparseOutputFilter {
/// Sets the configured [OutputSelection] for dirty files only.
///
/// In other words, we request the output of solc only for files that have been detected as
@ -62,79 +63,130 @@ pub enum SparseOutputFileFilter {
Custom(Box<dyn FileFilter>),
}
impl SparseOutputFileFilter {
impl SparseOutputFilter {
/// While solc needs all the files to compile the actual _dirty_ files, we can tell solc to
/// output everything for those dirty files as currently configured in the settings, but output
/// nothing for the other files that are _not_ dirty.
///
/// This will modify the [OutputSelection] of the [Settings] so that we explicitly select the
/// files' output based on their state.
pub fn sparse_sources(&self, sources: FilteredSources, settings: &mut Settings) -> Sources {
fn apply(
sources: &FilteredSources,
settings: &mut Settings,
f: impl Fn(&PathBuf, &FilteredSource) -> bool,
) {
let selection = settings
.output_selection
.as_mut()
.remove("*")
.unwrap_or_else(OutputSelection::default_file_output_selection);
for (file, source) in sources.0.iter() {
if f(file, source) {
settings
.output_selection
.as_mut()
.insert(format!("{}", file.display()), selection.clone());
} else {
tracing::trace!("using pruned output selection for {}", file.display());
settings.output_selection.as_mut().insert(
format!("{}", file.display()),
OutputSelection::empty_file_output_select(),
);
}
}
}
///
/// This also takes the project's graph as input, this allows us to check if the files the
/// filter matches depend on libraries that need to be linked
pub fn sparse_sources(
&self,
sources: FilteredSources,
settings: &mut Settings,
graph: &GraphEdges,
) -> Sources {
match self {
SparseOutputFileFilter::AllDirty => {
SparseOutputFilter::AllDirty => {
if !sources.all_dirty() {
// settings can be optimized
tracing::trace!(
"optimizing output selection for {}/{} sources",
sources.clean().count(),
sources.len()
);
apply(&sources, settings, |_, source| source.is_dirty())
Self::all_dirty(&sources, settings)
}
}
SparseOutputFileFilter::Custom(f) => {
tracing::trace!("optimizing output selection with custom filter",);
apply(&sources, settings, |p, source| source.is_dirty() && f.is_match(p));
SparseOutputFilter::Custom(f) => {
Self::apply_custom_filter(&sources, settings, graph, f)
}
};
sources.into()
}
/// applies a custom filter and prunes the output of those source files for which the filter
/// returns `false`.
///
/// However, this could in accidentally pruning required link references (imported libraries)
/// that will be required at runtime. For example if the filter only matches test files
/// `*.t.sol` files and a test file makes use of a library that won't be inlined, then the
/// libraries bytecode will be missing. Therefore, we detect all linkReferences of a file
/// and treat them as if the filter would also apply to those.
#[allow(clippy::borrowed_box)]
fn apply_custom_filter(
sources: &FilteredSources,
settings: &mut Settings,
graph: &GraphEdges,
f: &Box<dyn FileFilter>,
) {
tracing::trace!("optimizing output selection with custom filter",);
let selection = settings
.output_selection
.as_mut()
.remove("*")
.unwrap_or_else(OutputSelection::default_file_output_selection);
for (file, source) in sources.0.iter() {
let key = format!("{}", file.display());
if source.is_dirty() && f.is_match(file) {
settings.output_selection.as_mut().insert(key, selection.clone());
// the filter might not cover link references that will be required by the file, so
// we check if the file has any libraries that won't be inlined and include them as
// well
for link in graph.get_link_references(file) {
settings
.output_selection
.as_mut()
.insert(format!("{}", link.display()), selection.clone());
}
} else if !settings.output_selection.as_ref().contains_key(&key) {
tracing::trace!("using pruned output selection for {}", file.display());
settings
.output_selection
.as_mut()
.insert(key, OutputSelection::empty_file_output_select());
}
}
}
/// prunes all clean sources and only selects an output for dirty sources
fn all_dirty(sources: &FilteredSources, settings: &mut Settings) {
// settings can be optimized
tracing::trace!(
"optimizing output selection for {}/{} sources",
sources.clean().count(),
sources.len()
);
let selection = settings
.output_selection
.as_mut()
.remove("*")
.unwrap_or_else(OutputSelection::default_file_output_selection);
for (file, source) in sources.0.iter() {
if source.is_dirty() {
settings
.output_selection
.as_mut()
.insert(format!("{}", file.display()), selection.clone());
} else {
tracing::trace!("using pruned output selection for {}", file.display());
settings.output_selection.as_mut().insert(
format!("{}", file.display()),
OutputSelection::empty_file_output_select(),
);
}
}
}
}
impl From<Box<dyn FileFilter>> for SparseOutputFileFilter {
impl From<Box<dyn FileFilter>> for SparseOutputFilter {
fn from(f: Box<dyn FileFilter>) -> Self {
SparseOutputFileFilter::Custom(f)
SparseOutputFilter::Custom(f)
}
}
impl Default for SparseOutputFileFilter {
impl Default for SparseOutputFilter {
fn default() -> Self {
SparseOutputFileFilter::AllDirty
SparseOutputFilter::AllDirty
}
}
impl fmt::Debug for SparseOutputFileFilter {
impl fmt::Debug for SparseOutputFilter {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match self {
SparseOutputFileFilter::AllDirty => f.write_str("AllDirty"),
SparseOutputFileFilter::Custom(_) => f.write_str("Custom"),
SparseOutputFilter::AllDirty => f.write_str("AllDirty"),
SparseOutputFilter::Custom(_) => f.write_str("Custom"),
}
}
}

View File

@ -7,7 +7,8 @@ use crate::{
project_util::mock::{MockProjectGenerator, MockProjectSettings},
utils::tempdir,
Artifact, ArtifactOutput, Artifacts, ConfigurableArtifacts, ConfigurableContractArtifact,
PathStyle, Project, ProjectCompileOutput, ProjectPathsConfig, SolFilesCache, SolcIoError,
FileFilter, PathStyle, Project, ProjectCompileOutput, ProjectPathsConfig, SolFilesCache,
SolcIoError,
};
use fs_extra::{dir, file};
use std::{
@ -69,6 +70,13 @@ impl<T: ArtifactOutput> TempProject<T> {
self.project().compile()
}
pub fn compile_sparse<F: FileFilter + 'static>(
&self,
filter: F,
) -> Result<ProjectCompileOutput<T>> {
self.project().compile_sparse(filter)
}
pub fn flatten(&self, target: &Path) -> Result<String> {
self.project().flatten(target)
}

View File

@ -52,21 +52,23 @@ use std::{
path::{Path, PathBuf},
};
use parse::{SolData, SolDataUnit};
use rayon::prelude::*;
use regex::Match;
use semver::VersionReq;
use solang_parser::pt::{Import, Loc, SourceUnitPart};
use crate::{error::Result, utils, ProjectPathsConfig, Solc, SolcError, Source, Sources};
mod parse;
mod tree;
pub use tree::{print, Charset, TreeOptions};
/// The underlying edges of the graph which only contains the raw relationship data.
///
/// This is kept separate from the `Graph` as the `Node`s get consumed when the `Solc` to `Sources`
/// set is determined.
#[derive(Debug, Clone)]
#[derive(Debug)]
pub struct GraphEdges {
/// The indices of `edges` correspond to the `nodes`. That is, `edges[0]`
/// is the set of outgoing edges for `nodes[0]`.
@ -77,6 +79,8 @@ pub struct GraphEdges {
rev_indices: HashMap<usize, PathBuf>,
/// the identified version requirement of a file
versions: HashMap<usize, Option<VersionReq>>,
/// the extracted data from the source file
data: HashMap<usize, SolData>,
/// with how many input files we started with, corresponds to `let input_files =
/// nodes[..num_input_files]`.
///
@ -152,6 +156,22 @@ impl GraphEdges {
.and_then(|idx| self.versions.get(idx))
.and_then(|v| v.as_ref())
}
/// Returns those library files that will be required as `linkReferences` by the given file
///
/// This is a preprocess function that attempts to resolve those libraries that will the
/// solidity `file` will be required to link. And further restrict this list to libraries
/// that won't be inlined See also [SolLibrary](parse::SolLibrary)
pub fn get_link_references(&self, file: impl AsRef<Path>) -> HashSet<&PathBuf> {
let mut link_references = HashSet::new();
for import in self.all_imported_nodes(self.node_id(file)) {
let data = &self.data[&import];
if data.has_link_references() {
link_references.insert(&self.rev_indices[&import]);
}
}
link_references
}
}
/// Represents a fully-resolved solidity dependency graph. Each node in the graph
@ -159,6 +179,7 @@ impl GraphEdges {
/// See also <https://docs.soliditylang.org/en/latest/layout-of-source-files.html?highlight=import#importing-other-source-files>
#[derive(Debug)]
pub struct Graph {
/// all nodes in the project, a `Node` represents a single file
nodes: Vec<Node>,
/// relationship of the nodes
edges: GraphEdges,
@ -222,11 +243,25 @@ impl Graph {
self.node_ids(start).map(move |idx| self.node(idx))
}
fn split(self) -> (Vec<(PathBuf, Source)>, GraphEdges) {
let Graph { nodes, mut edges, .. } = self;
// need to move the extracted data to the edges, essentially splitting the node so we have
// access to the data at a later stage in the compile pipeline
let mut sources = Vec::new();
for (idx, node) in nodes.into_iter().enumerate() {
let Node { path, source, data } = node;
sources.push((path, source));
edges.data.insert(idx, data);
}
(sources, edges)
}
/// Consumes the `Graph`, effectively splitting the `nodes` and the `GraphEdges` off and
/// returning the `nodes` converted to `Sources`
pub fn into_sources(self) -> (Sources, GraphEdges) {
let Graph { nodes, edges, .. } = self;
(nodes.into_iter().map(|node| (node.path, node.source)).collect(), edges)
let (sources, edges) = self.split();
(sources.into_iter().collect(), edges)
}
/// Returns an iterator that yields only those nodes that represent input files.
@ -255,7 +290,7 @@ impl Graph {
resolved_imports.push(idx);
} else {
// imported file is not part of the input files
let node = read_node(&target)?;
let node = Node::read(&target)?;
unresolved.push_back((target.clone(), node));
let idx = index.len();
index.insert(target, idx);
@ -269,7 +304,7 @@ impl Graph {
let mut unresolved: VecDeque<(PathBuf, Node)> = sources
.into_par_iter()
.map(|(path, source)| {
let data = parse_data(source.as_ref(), &path);
let data = SolData::parse(source.as_ref(), &path);
(path.clone(), Node { path, source, data })
})
.collect();
@ -317,6 +352,7 @@ impl Graph {
.enumerate()
.map(|(idx, node)| (idx, node.data.version_req.clone()))
.collect(),
data: Default::default(),
};
Ok(Graph { nodes, edges, root: paths.root.clone() })
}
@ -344,7 +380,7 @@ impl Graph {
/// cache entry for them as well. This can be optimized however
fn insert_imports(
idx: usize,
all_nodes: &mut HashMap<usize, Node>,
all_nodes: &mut HashMap<usize, (PathBuf, Source)>,
sources: &mut Sources,
edges: &[Vec<usize>],
num_input_files: usize,
@ -354,8 +390,8 @@ impl Graph {
// nodes are handled separately
if dep >= num_input_files {
// library import
if let Some(node) = all_nodes.remove(&dep) {
sources.insert(node.path, node.source);
if let Some((path, source)) = all_nodes.remove(&dep) {
sources.insert(path, source);
insert_imports(dep, all_nodes, sources, edges, num_input_files);
}
}
@ -363,8 +399,10 @@ impl Graph {
}
let versioned_nodes = self.get_input_node_versions(offline)?;
let Self { nodes, edges, .. } = self;
let (nodes, edges) = self.split();
let mut versioned_sources = HashMap::with_capacity(versioned_nodes.len());
let mut all_nodes = nodes.into_iter().enumerate().collect::<HashMap<_, _>>();
// determine the `Sources` set for each solc version
@ -373,8 +411,8 @@ impl Graph {
// we only process input nodes (from sources, tests for example)
for idx in input_node_indices {
// insert the input node in the sources set and remove it from the available set
let node = all_nodes.remove(&idx).expect("node is preset. qed");
sources.insert(node.path, node.source);
let (path, source) = all_nodes.remove(&idx).expect("node is preset. qed");
sources.insert(path, source);
insert_imports(
idx,
&mut all_nodes,
@ -692,12 +730,23 @@ impl VersionedSources {
#[derive(Debug)]
pub struct Node {
/// path of the solidity file
path: PathBuf,
/// content of the solidity file
source: Source,
/// parsed data
data: SolData,
}
impl Node {
/// Reads the content of the file and returns a [Node] containing relevant information
pub fn read(file: impl AsRef<Path>) -> crate::Result<Self> {
let file = file.as_ref();
let source = Source::read(file).map_err(SolcError::Resolve)?;
let data = SolData::parse(source.as_ref(), file);
Ok(Self { path: file.to_path_buf(), source, data })
}
pub fn content(&self) -> &str {
&self.source.content
}
@ -732,199 +781,10 @@ impl<'a> fmt::Display for DisplayNode<'a> {
}
}
#[derive(Debug, Clone)]
#[allow(unused)]
struct SolData {
license: Option<SolDataUnit<String>>,
version: Option<SolDataUnit<String>>,
imports: Vec<SolDataUnit<PathBuf>>,
version_req: Option<VersionReq>,
}
impl SolData {
#[allow(unused)]
fn fmt_version<W: std::fmt::Write>(
&self,
f: &mut W,
) -> std::result::Result<(), std::fmt::Error> {
if let Some(ref version) = self.version {
write!(f, "({})", version.data)?;
}
Ok(())
}
}
#[derive(Debug, Clone)]
pub struct SolDataUnit<T> {
loc: Location,
data: T,
}
#[derive(Debug, Clone)]
pub struct Location {
pub start: usize,
pub end: usize,
}
/// Solidity Data Unit decorated with its location within the file
impl<T> SolDataUnit<T> {
pub fn new(data: T, loc: Location) -> Self {
Self { data, loc }
}
/// Returns the underlying data for the unit
pub fn data(&self) -> &T {
&self.data
}
/// Returns the location of the given data unit
pub fn loc(&self) -> (usize, usize) {
(self.loc.start, self.loc.end)
}
/// Returns the location of the given data unit adjusted by an offset.
/// Used to determine new position of the unit within the file after
/// content manipulation.
pub fn loc_by_offset(&self, offset: isize) -> (usize, usize) {
(
offset.saturating_add(self.loc.start as isize) as usize,
// make the end location exclusive
offset.saturating_add(self.loc.end as isize + 1) as usize,
)
}
}
impl From<Match<'_>> for Location {
fn from(src: Match) -> Self {
Location { start: src.start(), end: src.end() }
}
}
impl From<Loc> for Location {
fn from(src: Loc) -> Self {
match src {
Loc::File(_, start, end) => Location { start, end },
_ => Location { start: 0, end: 0 },
}
}
}
fn read_node(file: impl AsRef<Path>) -> Result<Node> {
let file = file.as_ref();
let source = Source::read(file).map_err(SolcError::Resolve)?;
let data = parse_data(source.as_ref(), file);
Ok(Node { path: file.to_path_buf(), source, data })
}
/// Extracts the useful data from a solidity source
///
/// This will attempt to parse the solidity AST and extract the imports and version pragma. If
/// parsing fails, we'll fall back to extract that info via regex
fn parse_data(content: &str, file: &Path) -> SolData {
let mut version = None;
let mut imports = Vec::<SolDataUnit<PathBuf>>::new();
match solang_parser::parse(content, 0) {
Ok((units, _)) => {
for unit in units.0 {
match unit {
SourceUnitPart::PragmaDirective(loc, _, pragma, value) => {
if pragma.name == "solidity" {
// we're only interested in the solidity version pragma
version = Some(SolDataUnit::new(value.string, loc.into()));
}
}
SourceUnitPart::ImportDirective(_, import) => {
let (import, loc) = match import {
Import::Plain(s, l) => (s, l),
Import::GlobalSymbol(s, _, l) => (s, l),
Import::Rename(s, _, l) => (s, l),
};
imports.push(SolDataUnit::new(PathBuf::from(import.string), loc.into()));
}
_ => {}
}
}
}
Err(err) => {
tracing::trace!(
"failed to parse \"{}\" ast: \"{:?}\". Falling back to regex to extract data",
file.display(),
err
);
version = capture_outer_and_inner(content, &utils::RE_SOL_PRAGMA_VERSION, &["version"])
.first()
.map(|(cap, name)| {
SolDataUnit::new(name.as_str().to_owned(), cap.to_owned().into())
});
imports = capture_imports(content);
}
};
let license = content.lines().next().and_then(|line| {
capture_outer_and_inner(line, &utils::RE_SOL_SDPX_LICENSE_IDENTIFIER, &["license"])
.first()
.map(|(cap, l)| SolDataUnit::new(l.as_str().to_owned(), cap.to_owned().into()))
});
let version_req = version.as_ref().and_then(|v| Solc::version_req(v.data()).ok());
SolData { version_req, version, imports, license }
}
/// Given the regex and the target string, find all occurrences
/// of named groups within the string. This method returns
/// the tuple of matches `(a, b)` where `a` is the match for the
/// entire regex and `b` is the match for the first named group.
///
/// NOTE: This method will return the match for the first named
/// group, so the order of passed named groups matters.
fn capture_outer_and_inner<'a>(
content: &'a str,
regex: &regex::Regex,
names: &[&str],
) -> Vec<(regex::Match<'a>, regex::Match<'a>)> {
regex
.captures_iter(content)
.filter_map(|cap| {
let cap_match = names.iter().find_map(|name| cap.name(name));
cap_match.and_then(|m| cap.get(0).map(|outer| (outer.to_owned(), m)))
})
.collect()
}
fn capture_imports(content: &str) -> Vec<SolDataUnit<PathBuf>> {
capture_outer_and_inner(content, &utils::RE_SOL_IMPORT, &["p1", "p2", "p3", "p4"])
.iter()
.map(|(cap, m)| SolDataUnit::new(PathBuf::from(m.as_str()), cap.to_owned().into()))
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn can_capture_curly_imports() {
let content = r#"
import { T } from "../Test.sol";
import {ReentrancyGuard} from "@openzeppelin/contracts/utils/ReentrancyGuard.sol";
import {DsTest} from "ds-test/test.sol";
"#;
let captured_imports =
capture_imports(content).into_iter().map(|s| s.data).collect::<Vec<_>>();
let expected =
utils::find_import_paths(content).map(|m| m.as_str().into()).collect::<Vec<PathBuf>>();
assert_eq!(captured_imports, expected);
assert_eq!(
captured_imports,
vec![
PathBuf::from("../Test.sol"),
"@openzeppelin/contracts/utils/ReentrancyGuard.sol".into(),
"ds-test/test.sol".into(),
]
);
}
#[test]
fn can_resolve_hardhat_dependency_graph() {
let root = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("test-data/hardhat-sample");

View File

@ -0,0 +1,271 @@
use crate::{utils, Solc};
use regex::Match;
use semver::VersionReq;
use solang_parser::pt::{
ContractPart, ContractTy, FunctionAttribute, FunctionDefinition, Import, Loc, SourceUnitPart,
Visibility,
};
use std::path::{Path, PathBuf};
/// Represents various information about a solidity file parsed via [solang_parser]
#[derive(Debug)]
#[allow(unused)]
pub struct SolData {
pub license: Option<SolDataUnit<String>>,
pub version: Option<SolDataUnit<String>>,
pub imports: Vec<SolDataUnit<PathBuf>>,
pub version_req: Option<VersionReq>,
pub libraries: Vec<SolLibrary>,
pub contracts: Vec<SolContract>,
}
impl SolData {
#[allow(unused)]
pub fn fmt_version<W: std::fmt::Write>(
&self,
f: &mut W,
) -> std::result::Result<(), std::fmt::Error> {
if let Some(ref version) = self.version {
write!(f, "({})", version.data)?;
}
Ok(())
}
/// Extracts the useful data from a solidity source
///
/// This will attempt to parse the solidity AST and extract the imports and version pragma. If
/// parsing fails, we'll fall back to extract that info via regex
pub fn parse(content: &str, file: &Path) -> Self {
let mut version = None;
let mut imports = Vec::<SolDataUnit<PathBuf>>::new();
let mut libraries = Vec::new();
let mut contracts = Vec::new();
match solang_parser::parse(content, 0) {
Ok((units, _)) => {
for unit in units.0 {
match unit {
SourceUnitPart::PragmaDirective(loc, _, pragma, value) => {
if pragma.name == "solidity" {
// we're only interested in the solidity version pragma
version = Some(SolDataUnit::new(value.string, loc.into()));
}
}
SourceUnitPart::ImportDirective(_, import) => {
let (import, loc) = match import {
Import::Plain(s, l) => (s, l),
Import::GlobalSymbol(s, _, l) => (s, l),
Import::Rename(s, _, l) => (s, l),
};
imports
.push(SolDataUnit::new(PathBuf::from(import.string), loc.into()));
}
SourceUnitPart::ContractDefinition(def) => {
let functions = def
.parts
.into_iter()
.filter_map(|part| match part {
ContractPart::FunctionDefinition(f) => Some(*f),
_ => None,
})
.collect();
let name = def.name.name;
match def.ty {
ContractTy::Contract(_) => {
contracts.push(SolContract { name, functions });
}
ContractTy::Library(_) => {
libraries.push(SolLibrary { name, functions });
}
_ => {}
}
}
_ => {}
}
}
}
Err(err) => {
tracing::trace!(
"failed to parse \"{}\" ast: \"{:?}\". Falling back to regex to extract data",
file.display(),
err
);
version =
capture_outer_and_inner(content, &utils::RE_SOL_PRAGMA_VERSION, &["version"])
.first()
.map(|(cap, name)| {
SolDataUnit::new(name.as_str().to_owned(), cap.to_owned().into())
});
imports = capture_imports(content);
}
};
let license = content.lines().next().and_then(|line| {
capture_outer_and_inner(line, &utils::RE_SOL_SDPX_LICENSE_IDENTIFIER, &["license"])
.first()
.map(|(cap, l)| SolDataUnit::new(l.as_str().to_owned(), cap.to_owned().into()))
});
let version_req = version.as_ref().and_then(|v| Solc::version_req(v.data()).ok());
Self { version_req, version, imports, license, libraries, contracts }
}
/// Returns `true` if the solidity file associated with this type contains a solidity library
/// that won't be inlined
pub fn has_link_references(&self) -> bool {
self.libraries.iter().any(|lib| !lib.is_inlined())
}
}
/// Minimal representation of a contract inside a solidity file
#[derive(Debug)]
pub struct SolContract {
pub name: String,
pub functions: Vec<FunctionDefinition>,
}
/// Minimal representation of a contract inside a solidity file
#[derive(Debug)]
pub struct SolLibrary {
pub name: String,
pub functions: Vec<FunctionDefinition>,
}
impl SolLibrary {
/// Returns `true` if all functions of this library will be inlined.
///
/// This checks if all functions are either internal or private, because internal functions can
/// only be accessed from within the current contract or contracts deriving from it. They cannot
/// be accessed externally. Since they are not exposed to the outside through the contracts
/// ABI, they can take parameters of internal types like mappings or storage references.
///
/// See also <https://docs.soliditylang.org/en/latest/contracts.html#libraries>
pub fn is_inlined(&self) -> bool {
for f in self.functions.iter() {
for attr in f.attributes.iter() {
if let FunctionAttribute::Visibility(vis) = attr {
match vis {
Visibility::External(_) | Visibility::Public(_) => return false,
_ => {}
}
}
}
}
true
}
}
/// Represents an item in a solidity file with its location in the file
#[derive(Debug, Clone)]
pub struct SolDataUnit<T> {
loc: Location,
data: T,
}
/// Location in a text file buffer
#[derive(Debug, Clone)]
pub struct Location {
pub start: usize,
pub end: usize,
}
/// Solidity Data Unit decorated with its location within the file
impl<T> SolDataUnit<T> {
pub fn new(data: T, loc: Location) -> Self {
Self { data, loc }
}
/// Returns the underlying data for the unit
pub fn data(&self) -> &T {
&self.data
}
/// Returns the location of the given data unit
pub fn loc(&self) -> (usize, usize) {
(self.loc.start, self.loc.end)
}
/// Returns the location of the given data unit adjusted by an offset.
/// Used to determine new position of the unit within the file after
/// content manipulation.
pub fn loc_by_offset(&self, offset: isize) -> (usize, usize) {
(
offset.saturating_add(self.loc.start as isize) as usize,
// make the end location exclusive
offset.saturating_add(self.loc.end as isize + 1) as usize,
)
}
}
impl From<Match<'_>> for Location {
fn from(src: Match) -> Self {
Location { start: src.start(), end: src.end() }
}
}
impl From<Loc> for Location {
fn from(src: Loc) -> Self {
match src {
Loc::File(_, start, end) => Location { start, end },
_ => Location { start: 0, end: 0 },
}
}
}
/// Given the regex and the target string, find all occurrences
/// of named groups within the string. This method returns
/// the tuple of matches `(a, b)` where `a` is the match for the
/// entire regex and `b` is the match for the first named group.
///
/// NOTE: This method will return the match for the first named
/// group, so the order of passed named groups matters.
fn capture_outer_and_inner<'a>(
content: &'a str,
regex: &regex::Regex,
names: &[&str],
) -> Vec<(regex::Match<'a>, regex::Match<'a>)> {
regex
.captures_iter(content)
.filter_map(|cap| {
let cap_match = names.iter().find_map(|name| cap.name(name));
cap_match.and_then(|m| cap.get(0).map(|outer| (outer.to_owned(), m)))
})
.collect()
}
pub fn capture_imports(content: &str) -> Vec<SolDataUnit<PathBuf>> {
capture_outer_and_inner(content, &utils::RE_SOL_IMPORT, &["p1", "p2", "p3", "p4"])
.iter()
.map(|(cap, m)| SolDataUnit::new(PathBuf::from(m.as_str()), cap.to_owned().into()))
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn can_capture_curly_imports() {
let content = r#"
import { T } from "../Test.sol";
import {ReentrancyGuard} from "@openzeppelin/contracts/utils/ReentrancyGuard.sol";
import {DsTest} from "ds-test/test.sol";
"#;
let captured_imports =
capture_imports(content).into_iter().map(|s| s.data).collect::<Vec<_>>();
let expected =
utils::find_import_paths(content).map(|m| m.as_str().into()).collect::<Vec<PathBuf>>();
assert_eq!(captured_imports, expected);
assert_eq!(
captured_imports,
vec![
PathBuf::from("../Test.sol"),
"@openzeppelin/contracts/utils/ReentrancyGuard.sol".into(),
"ds-test/test.sol".into(),
]
);
}
}

View File

@ -12,7 +12,7 @@ use ethers_solc::{
project_util::*,
remappings::Remapping,
ConfigurableArtifacts, ExtraOutputValues, Graph, Project, ProjectCompileOutput,
ProjectPathsConfig,
ProjectPathsConfig, TestFileFilter,
};
use pretty_assertions::assert_eq;
@ -737,7 +737,6 @@ fn can_recompile_with_changes() {
#[test]
fn can_recompile_with_lowercase_names() {
init_tracing();
let tmp = TempProject::dapptools().unwrap();
tmp.add_source(
@ -843,3 +842,42 @@ fn can_recompile_unchanged_with_empty_files() {
assert!(compiled.find("A").is_some());
assert!(compiled.find("C").is_some());
}
#[test]
fn can_compile_sparse_with_link_references() {
let tmp = TempProject::dapptools().unwrap();
tmp.add_source(
"ATest.t.sol",
r#"
pragma solidity =0.8.12;
import {MyLib} from "./mylib.sol";
contract ATest {
function test_mylib() public returns (uint256) {
return MyLib.doStuff();
}
}
"#,
)
.unwrap();
tmp.add_source(
"mylib.sol",
r#"
pragma solidity =0.8.12;
library MyLib {
function doStuff() external pure returns (uint256) {return 1337;}
}
"#,
)
.unwrap();
let mut compiled = tmp.compile_sparse(TestFileFilter::default()).unwrap();
assert!(!compiled.has_compiler_errors());
println!("{}", compiled);
assert!(compiled.find("ATest").is_some());
assert!(compiled.find("MyLib").is_some());
let lib = compiled.remove("MyLib").unwrap();
assert!(lib.bytecode.is_some());
}