From 96dd34287ef0ed68a7a06def14a433d936a79b69 Mon Sep 17 00:00:00 2001 From: Curtis Spencer <41347+jubos@users.noreply.github.com> Date: Sun, 6 Mar 2022 07:21:19 -0800 Subject: [PATCH] feat(etherscan): source tree support (#990) * Create a source tree of paths and their respective contents from the ContractMetadata. This is useful for file level analysis or writing the resulting files to disk. * Test writing to disk via cargo test * Test etherscan by uncommenting the ignore on can_fetch_contract_source_tree_for_multi_entry_contract and can_fetch_contract_source_tree_for_singleton_contract --- Cargo.lock | 1 + ethers-etherscan/Cargo.toml | 1 + ethers-etherscan/src/contract.rs | 92 +++++++++++++++++++++++++++- ethers-etherscan/src/errors.rs | 2 + ethers-etherscan/src/lib.rs | 1 + ethers-etherscan/src/source_tree.rs | 94 +++++++++++++++++++++++++++++ 6 files changed, 189 insertions(+), 2 deletions(-) create mode 100644 ethers-etherscan/src/source_tree.rs diff --git a/Cargo.lock b/Cargo.lock index bb57082d..e0cdb064 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1275,6 +1275,7 @@ dependencies = [ "serde-aux", "serde_json", "serial_test", + "tempfile", "thiserror", "tokio", ] diff --git a/ethers-etherscan/Cargo.toml b/ethers-etherscan/Cargo.toml index 6775eb9a..ef02f05a 100644 --- a/ethers-etherscan/Cargo.toml +++ b/ethers-etherscan/Cargo.toml @@ -23,6 +23,7 @@ serde-aux = { version = "3.0.1", default-features = false } thiserror = "1.0.29" [dev-dependencies] +tempfile = "3.3.0" tokio = { version = "1.5", features = ["macros", "rt-multi-thread", "time"] } serial_test = "0.6.0" diff --git a/ethers-etherscan/src/contract.rs b/ethers-etherscan/src/contract.rs index 6d7bf431..3f7885dd 100644 --- a/ethers-etherscan/src/contract.rs +++ b/ethers-etherscan/src/contract.rs @@ -1,10 +1,13 @@ -use std::collections::HashMap; +use std::{collections::HashMap, path::Path}; use serde::{Deserialize, Serialize}; use ethers_core::abi::{Abi, Address}; -use crate::{Client, EtherscanError, Response, Result}; +use crate::{ + source_tree::{SourceTree, SourceTreeEntry}, + Client, EtherscanError, Response, Result, +}; /// Arguments for verifying contracts #[derive(Debug, Clone, Serialize)] @@ -148,6 +151,16 @@ impl IntoIterator for ContractMetadata { } } +#[derive(Deserialize)] +struct EtherscanSourceEntry { + content: String, +} + +#[derive(Deserialize)] +struct EtherscanSourceJsonMetadata { + sources: HashMap, +} + impl ContractMetadata { /// All ABI from all contracts in the source file pub fn abis(&self) -> Result> { @@ -162,6 +175,42 @@ impl ContractMetadata { pub fn source_code(&self) -> String { self.items.iter().map(|c| c.source_code.as_str()).collect::>().join("\n") } + + /// Etherscan can either return one raw string that includes all of the solidity for a verified + /// contract or a json struct surrounded in an extra set of {} that includes a directory + /// structure with paths and source code. + fn get_sources_from_etherscan_source_value( + contract_name: &str, + etherscan_source: &str, + ) -> Result> { + if etherscan_source.starts_with("{{") && etherscan_source.ends_with("}}") { + let json = ðerscan_source[1..etherscan_source.len() - 1]; + let parsed: EtherscanSourceJsonMetadata = serde_json::from_str(json)?; + Ok(parsed + .sources + .into_iter() + .map(|(path, source_struct)| (path, source_struct.content)) + .collect()) + } else { + Ok(vec![(contract_name.to_string(), etherscan_source.to_string())]) + } + } + + pub fn source_tree(&self) -> Result { + let mut entries = vec![]; + for item in &self.items { + let contract_root = Path::new(&item.contract_name); + let source_paths = Self::get_sources_from_etherscan_source_value( + &item.contract_name, + &item.source_code, + )?; + for (path, contents) in source_paths { + let joined = contract_root.join(&path); + entries.push(SourceTreeEntry { path: joined, contents }); + } + } + Ok(SourceTree { entries }) + } } /// Etherscan contract metadata @@ -305,6 +354,45 @@ mod tests { .await } + /// Query a contract that has a single string source entry instead of underlying JSON metadata. + #[tokio::test] + #[serial] + #[ignore] + async fn can_fetch_contract_source_tree_for_singleton_contract() { + run_at_least_duration(Duration::from_millis(250), async { + let client = Client::new_from_env(Chain::Mainnet).unwrap(); + + let meta = client + .contract_source_code("0xBB9bc244D798123fDe783fCc1C72d3Bb8C189413".parse().unwrap()) + .await + .unwrap(); + + let source_tree = meta.source_tree().unwrap(); + assert_eq!(source_tree.entries.len(), 1); + }) + .await + } + + /// Query a contract that has many source entries as JSON metadata and ensure they are + /// reflected. + #[tokio::test] + #[serial] + #[ignore] + async fn can_fetch_contract_source_tree_for_multi_entry_contract() { + run_at_least_duration(Duration::from_millis(250), async { + let client = Client::new_from_env(Chain::Mainnet).unwrap(); + + let meta = client + .contract_source_code("0x8d04a8c79cEB0889Bdd12acdF3Fa9D207eD3Ff63".parse().unwrap()) + .await + .unwrap(); + + let source_tree = meta.source_tree().unwrap(); + assert_eq!(source_tree.entries.len(), 15); + }) + .await + } + #[tokio::test] #[serial] async fn can_flatten_and_verify_contract() { diff --git a/ethers-etherscan/src/errors.rs b/ethers-etherscan/src/errors.rs index ab6b6b0d..fcd59263 100644 --- a/ethers-etherscan/src/errors.rs +++ b/ethers-etherscan/src/errors.rs @@ -23,4 +23,6 @@ pub enum EtherscanError { Serde(#[from] serde_json::Error), #[error("Contract source code not verified: {0}")] ContractCodeNotVerified(Address), + #[error(transparent)] + IO(#[from] std::io::Error), } diff --git a/ethers-etherscan/src/lib.rs b/ethers-etherscan/src/lib.rs index 6f2ebb9f..b3e52e1c 100644 --- a/ethers-etherscan/src/lib.rs +++ b/ethers-etherscan/src/lib.rs @@ -12,6 +12,7 @@ pub mod account; pub mod contract; pub mod errors; pub mod gas; +pub mod source_tree; pub mod transaction; pub(crate) type Result = std::result::Result; diff --git a/ethers-etherscan/src/source_tree.rs b/ethers-etherscan/src/source_tree.rs new file mode 100644 index 00000000..5e0ef18a --- /dev/null +++ b/ethers-etherscan/src/source_tree.rs @@ -0,0 +1,94 @@ +use crate::Result; +use std::{ + fs::create_dir_all, + path::{Component, Path, PathBuf}, +}; + +#[derive(Debug)] +pub struct SourceTreeEntry { + pub path: PathBuf, + pub contents: String, +} + +#[derive(Debug)] +pub struct SourceTree { + pub entries: Vec, +} + +impl SourceTree { + /// Expand the source tree into the provided directory. This method sanitizes paths to ensure + /// that no directory traversal happens. + pub fn write_to(&self, dir: &Path) -> Result<()> { + create_dir_all(&dir)?; + for entry in &self.entries { + let sanitized_path = sanitize_path(&entry.path); + let joined = dir.join(sanitized_path); + if let Some(parent) = joined.parent() { + create_dir_all(parent)?; + std::fs::write(joined, &entry.contents)?; + } + } + Ok(()) + } +} + +/// Remove any components in a smart contract source path that could cause a directory traversal. +fn sanitize_path(path: &Path) -> PathBuf { + Path::new(path) + .components() + .filter(|x| x.as_os_str() != Component::ParentDir.as_os_str()) + .collect::() +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs::read_dir; + + #[test] + fn test_source_tree_write() { + let tempdir = tempfile::tempdir().unwrap(); + let st = SourceTree { + entries: vec![ + SourceTreeEntry { path: PathBuf::from("a/a.sol"), contents: String::from("Test") }, + SourceTreeEntry { + path: PathBuf::from("b/b.sol"), + contents: String::from("Test 2"), + }, + ], + }; + st.write_to(&tempdir.path()).unwrap(); + let written_paths = read_dir(tempdir.path()).unwrap(); + let paths: Vec = + written_paths.into_iter().filter_map(|x| x.ok()).map(|x| x.path()).collect(); + assert_eq!(paths.len(), 2); + assert!(paths.contains(&tempdir.path().join("a"))); + assert!(paths.contains(&tempdir.path().join("b"))); + } + + /// Ensure that the .. are ignored when writing the source tree to disk because of + /// sanitization. + #[test] + fn test_malformed_source_tree_write() { + let tempdir = tempfile::tempdir().unwrap(); + let st = SourceTree { + entries: vec![ + SourceTreeEntry { + path: PathBuf::from("../a/a.sol"), + contents: String::from("Test"), + }, + SourceTreeEntry { + path: PathBuf::from("../b/../b.sol"), + contents: String::from("Test 2"), + }, + ], + }; + st.write_to(&tempdir.path()).unwrap(); + let written_paths = read_dir(tempdir.path()).unwrap(); + let paths: Vec = + written_paths.into_iter().filter_map(|x| x.ok()).map(|x| x.path()).collect(); + assert_eq!(paths.len(), 2); + assert!(paths.contains(&tempdir.path().join("a"))); + assert!(paths.contains(&tempdir.path().join("b"))); + } +}