feat(solc): flatten (#774)

* solc flatten implementation

* upd changelog

* upd docs

* revamp flattening

* clippy

* use resolve_import method

* extract recursive flatenning into a separate func

* change content iteration for flatten

* remove redundant result

* clean up solimport

* add comment to project.flatten

* add support for ver pragma loc

* address pr comments

* uncomment the test

* improve test cov

* add handling of sdpx license identifiers

* change arg name

* match license only at the beginning of the file

* add comments

* lint

* morrre comments
This commit is contained in:
Roman Krasiuk 2022-01-17 13:27:40 +01:00 committed by GitHub
parent 579311bfdd
commit afcba9567f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 316 additions and 48 deletions

View File

@ -34,6 +34,8 @@
### Unreleased
- Add ability to flatten file imports
[#774](https://github.com/gakonst/ethers-rs/pull/774)
- Add dependency graph and resolve all imported libraryfiles
[#750](https://github.com/gakonst/ethers-rs/pull/750)
- `Remapping::find_many` does not return a `Result` anymore

View File

@ -474,7 +474,7 @@ impl Source {
/// Returns all import statements of the file
pub fn parse_imports(&self) -> Vec<&str> {
utils::find_import_paths(self.as_ref())
utils::find_import_paths(self.as_ref()).map(|m| m.as_str()).collect()
}
}

View File

@ -326,10 +326,10 @@ impl SolFilesCacheBuilder {
.map_err(|err| SolcError::solc(err.to_string()))?
.as_millis() as u64;
let imports =
utils::find_import_paths(source.as_ref()).into_iter().map(str::to_string).collect();
utils::find_import_paths(source.as_ref()).map(|m| m.as_str().to_owned()).collect();
let version_pragmas = utils::find_version_pragma(source.as_ref())
.map(|v| vec![v.to_string()])
.map(|v| vec![v.as_str().to_string()])
.unwrap_or_default();
let entry = CacheEntry {

View File

@ -323,7 +323,7 @@ impl Solc {
pub fn source_version_req(source: &Source) -> Result<VersionReq> {
let version =
utils::find_version_pragma(&source.content).ok_or(SolcError::PragmaNotFound)?;
Self::version_req(version)
Self::version_req(version.as_str())
}
/// Returns the corresponding SemVer version requirement for the solidity version

View File

@ -4,6 +4,7 @@ use crate::{
error::{Result, SolcError, SolcIoError},
hh::HardhatArtifact,
remappings::Remapping,
resolver::Graph,
utils, CompilerOutput, Source, Sources,
};
use ethers_core::{abi::Abi, types::Bytes};
@ -11,8 +12,7 @@ use serde::{de::DeserializeOwned, Deserialize, Serialize};
use std::{
collections::BTreeMap,
convert::TryFrom,
fmt,
fmt::Formatter,
fmt::{self, Formatter},
fs, io,
path::{Component, Path, PathBuf},
};
@ -171,6 +171,68 @@ impl ProjectPathsConfig {
pub fn find_libs(root: impl AsRef<Path>) -> Vec<PathBuf> {
vec![utils::find_fave_or_alt_path(root, "lib", "node_modules")]
}
/// Flattens all file imports into a single string
pub fn flatten(&self, target: &Path) -> Result<String> {
tracing::trace!("flattening file");
let graph = Graph::resolve(self)?;
self.flatten_node(target, &graph, false, false)
}
/// Flattens a single node from the dependency graph
fn flatten_node(
&self,
target: &Path,
graph: &Graph,
strip_version_pragma: bool,
strip_license: bool,
) -> Result<String> {
let target_dir = target.parent().ok_or_else(|| {
SolcError::msg(format!("failed to get parent directory for \"{:?}\"", target.display()))
})?;
let target_index = graph.files().get(target).ok_or_else(|| {
SolcError::msg(format!("cannot resolve file at \"{:?}\"", target.display()))
})?;
let target_node = graph.node(*target_index);
let mut imports = target_node.imports().clone();
imports.sort_by_key(|x| x.loc().0);
let mut content = target_node.content().as_bytes().to_vec();
let mut offset = 0_isize;
if strip_license {
if let Some(license) = target_node.license() {
let (start, end) = license.loc_by_offset(offset);
content.splice(start..end, std::iter::empty());
offset -= (end - start) as isize;
}
}
if strip_version_pragma {
if let Some(version) = target_node.version() {
let (start, end) = version.loc_by_offset(offset);
content.splice(start..end, std::iter::empty());
offset -= (end - start) as isize;
}
}
for import in imports.iter() {
let import_path = self.resolve_import(target_dir, import.data())?;
let import_content = self.flatten_node(&import_path, graph, true, true)?;
let import_content = import_content.trim().as_bytes().to_owned();
let import_content_len = import_content.len() as isize;
let (start, end) = import.loc_by_offset(offset);
content.splice(start..end, import_content);
offset += import_content_len - ((end - start) as isize);
}
let result = String::from_utf8(content).map_err(|err| {
SolcError::msg(format!("failed to convert extended bytes to string: {}", err))
})?;
Ok(result)
}
}
impl fmt::Display for ProjectPathsConfig {

View File

@ -37,8 +37,12 @@ use crate::{
};
use error::Result;
use std::{
borrow::Cow, collections::BTreeMap, convert::TryInto, fmt, fs, marker::PhantomData,
path::PathBuf,
borrow::Cow,
collections::BTreeMap,
convert::TryInto,
fmt, fs,
marker::PhantomData,
path::{Path, PathBuf},
};
/// Utilities for creating, mocking and testing of (temporary) projects
@ -507,6 +511,18 @@ impl<Artifacts: ArtifactOutput> Project<Artifacts> {
}
Ok(())
}
/// Flattens the target file into a single string suitable for verification
///
/// This method uses a dependency graph to resolve imported files and substitute
/// import directives with the contents of target files. It will strip the pragma
/// version directives and SDPX license identifiers from imported files.
///
/// NOTE: the SDPX license identifier will be removed from the imported file
/// only if it is found at the beginning of the file.
pub fn flatten(&self, target: &Path) -> Result<String> {
self.paths.flatten(target)
}
}
enum PreprocessedJob<T: ArtifactOutput> {

View File

@ -59,6 +59,10 @@ impl<T: ArtifactOutput> TempProject<T> {
self.project().compile()
}
pub fn flatten(&self, target: &Path) -> Result<String> {
self.project().flatten(target)
}
pub fn project_mut(&mut self) -> &mut Project<T> {
&mut self.inner
}

View File

@ -32,8 +32,9 @@ use std::{
};
use rayon::prelude::*;
use regex::Match;
use semver::VersionReq;
use solang_parser::pt::{Import, SourceUnitPart};
use solang_parser::pt::{Import, Loc, SourceUnitPart};
use crate::{error::Result, utils, ProjectPathsConfig, Solc, Source, Sources};
@ -153,14 +154,12 @@ impl Graph {
};
for import in node.data.imports.iter() {
match paths.resolve_import(cwd, import) {
match paths.resolve_import(cwd, import.data()) {
Ok(import) => {
add_node(&mut unresolved, &mut index, &mut resolved_imports, import)?;
}
Err(err) => {
tracing::trace!("{}", err)
}
}
Err(err) => tracing::trace!("failed to resolve import component \"{:?}\"", err),
};
}
nodes.push(node);
edges.push(resolved_imports);
@ -422,12 +421,81 @@ pub struct Node {
data: SolData,
}
impl Node {
pub fn content(&self) -> &str {
&self.source.content
}
pub fn imports(&self) -> &Vec<SolDataUnit<PathBuf>> {
&self.data.imports
}
pub fn version(&self) -> &Option<SolDataUnit<String>> {
&self.data.version
}
pub fn license(&self) -> &Option<SolDataUnit<String>> {
&self.data.license
}
}
#[derive(Debug, Clone)]
#[allow(unused)]
struct SolData {
version: Option<String>,
license: Option<SolDataUnit<String>>,
version: Option<SolDataUnit<String>>,
imports: Vec<SolDataUnit<PathBuf>>,
version_req: Option<VersionReq>,
imports: Vec<PathBuf>,
}
#[derive(Debug, Clone)]
pub struct SolDataUnit<T> {
loc: Location,
data: T,
}
#[derive(Debug, Clone)]
pub struct Location {
pub start: usize,
pub end: usize,
}
/// Solidity Data Unit decorated with its location within the file
impl<T> SolDataUnit<T> {
pub fn new(data: T, loc: Location) -> Self {
Self { data, loc }
}
/// Returns the underlying data for the unit
pub fn data(&self) -> &T {
&self.data
}
/// Returns the location of the given data unit
pub fn loc(&self) -> (usize, usize) {
(self.loc.start, self.loc.end)
}
/// Returns the location of the given data unit adjusted by an offset.
/// Used to determine new position of the unit within the file after
/// content manipulation.
pub fn loc_by_offset(&self, offset: isize) -> (usize, usize) {
(
offset.saturating_add(self.loc.start as isize) as usize,
offset.saturating_add(self.loc.end as isize) as usize,
)
}
}
impl From<Match<'_>> for Location {
fn from(src: Match) -> Self {
Location { start: src.start(), end: src.end() }
}
}
impl From<Loc> for Location {
fn from(src: Loc) -> Self {
Location { start: src.1, end: src.2 }
}
}
fn read_node(file: impl AsRef<Path>) -> Result<Node> {
@ -443,24 +511,24 @@ fn read_node(file: impl AsRef<Path>) -> Result<Node> {
/// parsing fails, we'll fall back to extract that info via regex
fn parse_data(content: &str) -> SolData {
let mut version = None;
let mut imports = Vec::new();
let mut imports = Vec::<SolDataUnit<PathBuf>>::new();
match solang_parser::parse(content, 0) {
Ok(units) => {
for unit in units.0 {
match unit {
SourceUnitPart::PragmaDirective(_, _, pragma, value) => {
SourceUnitPart::PragmaDirective(loc, _, pragma, value) => {
if pragma.name == "solidity" {
// we're only interested in the solidity version pragma
version = Some(value.string);
version = Some(SolDataUnit::new(value.string, loc.into()));
}
}
SourceUnitPart::ImportDirective(_, import) => {
let import = match import {
Import::Plain(s, _) => s,
Import::GlobalSymbol(s, _, _) => s,
Import::Rename(s, _, _) => s,
let (import, loc) = match import {
Import::Plain(s, l) => (s, l),
Import::GlobalSymbol(s, _, l) => (s, l),
Import::Rename(s, _, l) => (s, l),
};
imports.push(PathBuf::from(import.string));
imports.push(SolDataUnit::new(PathBuf::from(import.string), loc.into()));
}
_ => {}
}
@ -471,21 +539,50 @@ fn parse_data(content: &str) -> SolData {
"failed to parse solidity ast: \"{:?}\". Falling back to regex to extract data",
err
);
version = utils::find_version_pragma(content).map(str::to_string);
imports = utils::find_import_paths(content)
.into_iter()
.map(|p| Path::new(p).to_path_buf())
.collect()
version = capture_outer_and_inner(content, &utils::RE_SOL_PRAGMA_VERSION, &["version"])
.first()
.map(|(cap, name)| {
SolDataUnit::new(name.as_str().to_owned(), cap.to_owned().into())
});
imports = capture_outer_and_inner(content, &utils::RE_SOL_IMPORT, &["p1", "p2", "p3"])
.iter()
.map(|(cap, m)| SolDataUnit::new(PathBuf::from(m.as_str()), cap.to_owned().into()))
.collect();
}
};
let version_req = if let Some(ref v) = version { Solc::version_req(v).ok() } else { None };
SolData { version_req, version, imports }
let license = content.lines().next().and_then(|line| {
capture_outer_and_inner(line, &utils::RE_SOL_SDPX_LICENSE_IDENTIFIER, &["license"])
.first()
.map(|(cap, l)| SolDataUnit::new(l.as_str().to_owned(), cap.to_owned().into()))
});
let version_req = version.as_ref().and_then(|v| Solc::version_req(v.data()).ok());
SolData { version_req, version, imports, license }
}
/// Given the regex and the target string, find all occurrences
/// of named groups within the string. This method returns
/// the tuple of matches `(a, b)` where `a` is the match for the
/// entire regex and `b` is the match for the first named group.
///
/// NOTE: This method will return the match for the first named
/// group, so the order of passed named groups matters.
fn capture_outer_and_inner<'a>(
content: &'a str,
regex: &regex::Regex,
names: &[&str],
) -> Vec<(regex::Match<'a>, regex::Match<'a>)> {
regex
.captures_iter(content)
.filter_map(|cap| {
let cap_match = names.iter().find_map(|name| cap.name(name));
cap_match.and_then(|m| cap.get(0).map(|outer| (outer.to_owned(), m)))
})
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::Path;
#[test]
fn can_resolve_hardhat_dependency_graph() {
@ -527,11 +624,8 @@ mod tests {
let dapp_test = graph.node(1);
assert_eq!(dapp_test.path, paths.sources.join("Dapp.t.sol"));
assert_eq!(
dapp_test.data.imports,
vec![
Path::new("ds-test/test.sol").to_path_buf(),
Path::new("./Dapp.sol").to_path_buf()
]
dapp_test.data.imports.iter().map(|i| i.data()).collect::<Vec<&PathBuf>>(),
vec![&PathBuf::from("ds-test/test.sol"), &PathBuf::from("./Dapp.sol")]
);
assert_eq!(graph.imported_nodes(1).to_vec(), vec![2, 0]);
}

View File

@ -2,43 +2,49 @@
use std::path::{Component, Path, PathBuf};
use crate::{error::SolcError, SolcIoError};
use crate::{
error::{self, SolcError},
ProjectPathsConfig, SolcIoError,
};
use once_cell::sync::Lazy;
use regex::Regex;
use regex::{Match, Regex};
use semver::Version;
use tiny_keccak::{Hasher, Keccak};
use walkdir::WalkDir;
/// A regex that matches the import path and identifier of a solidity import
/// statement with the named groups "path", "id".
// Adapted from https://github.com/nomiclabs/hardhat/blob/cced766c65b25d3d0beb39ef847246ac9618bdd9/packages/hardhat-core/src/internal/solidity/parse.ts#L100
pub static RE_SOL_IMPORT: Lazy<Regex> = Lazy::new(|| {
// Adapted from https://github.com/nomiclabs/hardhat/blob/cced766c65b25d3d0beb39ef847246ac9618bdd9/packages/hardhat-core/src/internal/solidity/parse.ts#L100
Regex::new(r#"import\s+(?:(?:"(?P<p1>[^;]*)"|'([^;]*)')(?:;|\s+as\s+(?P<id>[^;]*);)|.+from\s+(?:"(?P<p2>.*)"|'(?P<p3>.*)');)"#).unwrap()
});
/// A regex that matches the version part of a solidity pragma
/// as follows: `pragma solidity ^0.5.2;` => `^0.5.2`
/// statement with the named groups "path", "id".
/// statement with the named group "version".
// Adapted from https://github.com/nomiclabs/hardhat/blob/cced766c65b25d3d0beb39ef847246ac9618bdd9/packages/hardhat-core/src/internal/solidity/parse.ts#L119
pub static RE_SOL_PRAGMA_VERSION: Lazy<Regex> =
Lazy::new(|| Regex::new(r"pragma\s+solidity\s+(?P<version>.+?);").unwrap());
/// A regex that matches the SDPX license identifier
/// statement with the named group "license".
pub static RE_SOL_SDPX_LICENSE_IDENTIFIER: Lazy<Regex> =
Lazy::new(|| Regex::new(r"///?\s*SPDX-License-Identifier:\s*(?P<license>.+)").unwrap());
/// Returns all path parts from any solidity import statement in a string,
/// `import "./contracts/Contract.sol";` -> `"./contracts/Contract.sol"`.
///
/// See also https://docs.soliditylang.org/en/v0.8.9/grammar.html
pub fn find_import_paths(contract: &str) -> Vec<&str> {
pub fn find_import_paths(contract: &str) -> impl Iterator<Item = Match> {
RE_SOL_IMPORT
.captures_iter(contract)
.filter_map(|cap| cap.name("p1").or_else(|| cap.name("p2")).or_else(|| cap.name("p3")))
.map(|m| m.as_str())
.collect()
}
/// Returns the solidity version pragma from the given input:
/// `pragma solidity ^0.5.2;` => `^0.5.2`
pub fn find_version_pragma(contract: &str) -> Option<&str> {
RE_SOL_PRAGMA_VERSION.captures(contract)?.name("version").map(|m| m.as_str())
pub fn find_version_pragma(contract: &str) -> Option<Match> {
RE_SOL_PRAGMA_VERSION.captures(contract)?.name("version")
}
/// Returns a list of absolute paths to all the solidity files under the root, or the file itself,
@ -79,6 +85,37 @@ pub fn canonicalize(path: impl AsRef<Path>) -> Result<PathBuf, SolcIoError> {
dunce::canonicalize(&path).map_err(|err| SolcIoError::new(err, path))
}
/// Try to resolve import to a local file or library path
pub fn resolve_import_component(
import: &Path,
node_dir: &Path,
paths: &ProjectPathsConfig,
) -> error::Result<PathBuf> {
let component = match import.components().next() {
Some(inner) => inner,
None => {
return Err(SolcError::msg(format!(
"failed to resolve import at \"{:?}\"",
import.display()
)))
}
};
if component == Component::CurDir || component == Component::ParentDir {
// if the import is relative we assume it's already part of the processed input file set
canonicalize(node_dir.join(import)).map_err(|err| err.into())
} else {
// resolve library file
match paths.resolve_library_import(import.as_ref()) {
Some(lib) => Ok(lib),
None => Err(SolcError::msg(format!(
"failed to resolve library import \"{:?}\"",
import.display()
))),
}
}
}
/// Returns the path to the library if the source path is in fact determined to be a library path,
/// and it exists.
/// Note: this does not handle relative imports or remappings.
@ -305,7 +342,7 @@ import { T } from '../Test2.sol';
"##;
assert_eq!(
vec!["hardhat/console.sol", "../contract/Contract.sol", "../Test.sol", "../Test2.sol"],
find_import_paths(s)
find_import_paths(s).map(|m| m.as_str()).collect::<Vec<&str>>()
);
}
#[test]
@ -313,7 +350,7 @@ import { T } from '../Test2.sol';
let s = r##"//SPDX-License-Identifier: Unlicense
pragma solidity ^0.8.0;
"##;
assert_eq!(Some("^0.8.0"), find_version_pragma(s));
assert_eq!(Some("^0.8.0"), find_version_pragma(s).map(|s| s.as_str()));
}
#[test]

View File

@ -307,3 +307,56 @@ fn copy_dir_all(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> io::Result<()>
}
Ok(())
}
#[test]
fn can_flatten_file() {
let root = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("test-data/test-contract-libs");
let target = root.join("src").join("Foo.sol");
let paths = ProjectPathsConfig::builder()
.sources(root.join("src"))
.lib(root.join("lib1"))
.lib(root.join("lib2"));
let project = TempProject::<MinimalCombinedArtifacts>::new(paths).unwrap();
let result = project.flatten(&target);
assert!(result.is_ok());
let result = result.unwrap();
assert!(result.find("contract Foo").is_some());
assert!(result.find("contract Bar").is_some());
}
#[test]
fn can_flatten_file_with_external_lib() {
let root = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("test-data/hardhat-sample");
let paths = ProjectPathsConfig::builder()
.sources(root.join("contracts"))
.lib(root.join("node_modules"));
let project = TempProject::<MinimalCombinedArtifacts>::new(paths).unwrap();
let target = root.join("contracts").join("Greeter.sol");
let result = project.flatten(&target);
assert!(result.is_ok());
let result = result.unwrap();
assert!(result.find("library console").is_some());
assert!(result.find("contract Greeter").is_some());
}
#[test]
fn can_flatten_file_in_dapp_sample() {
let root = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("test-data/dapp-sample");
let paths = ProjectPathsConfig::builder().sources(root.join("src")).lib(root.join("lib"));
let project = TempProject::<MinimalCombinedArtifacts>::new(paths).unwrap();
let target = root.join("src/Dapp.t.sol");
let result = project.flatten(&target);
assert!(result.is_ok());
let result = result.unwrap();
assert!(result.find("contract DSTest").is_some());
assert!(result.find("contract Dapp").is_some());
assert!(result.find("contract DappTest").is_some());
}