-
Notifications
You must be signed in to change notification settings - Fork 11
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Verify numeric instructions #37
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
use parity_wasm::elements::Instruction; | ||
use std::error; | ||
use std::fmt; | ||
|
||
#[derive(Debug)] | ||
pub enum InstructionError { | ||
GlobalNotFound, | ||
LocalNotFound, | ||
UnmatchedInstruction, | ||
InvalidOperation(Instruction), | ||
} | ||
|
||
impl fmt::Display for InstructionError { | ||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||
match self { | ||
InstructionError::GlobalNotFound => write!(f, "Global not found"), | ||
InstructionError::LocalNotFound => write!(f, "Local not found"), | ||
InstructionError::UnmatchedInstruction => write!(f, "Unmatched instruction"), | ||
InstructionError::InvalidOperation(i) => { | ||
write!(f, "{}", format!("Invalid operation: {:?}", i).as_str()) | ||
} | ||
} | ||
} | ||
} | ||
|
||
impl error::Error for InstructionError { | ||
fn description(&self) -> &str { | ||
match self { | ||
InstructionError::GlobalNotFound => "Global not found", | ||
InstructionError::LocalNotFound => "Local not found", | ||
InstructionError::UnmatchedInstruction => "Unmatched instruction", | ||
InstructionError::InvalidOperation(_) => "Invalid operation", | ||
} | ||
} | ||
|
||
fn cause(&self) -> Option<&error::Error> { | ||
None | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,7 @@ pub mod deployer; | |
pub mod dropsection; | ||
#[cfg(feature = "wabt")] | ||
pub mod fromwat; | ||
pub mod instructionerrors; | ||
pub mod remapimports; | ||
pub mod remapstart; | ||
pub mod repack; | ||
|
@@ -25,6 +26,11 @@ pub mod trimexports; | |
pub mod trimstartfunc; | ||
pub mod verifyexports; | ||
pub mod verifyimports; | ||
pub mod verifyinstructions; | ||
|
||
use crate::instructionerrors::*; | ||
|
||
use parity_wasm::elements::*; | ||
|
||
mod depgraph; | ||
|
||
|
@@ -72,6 +78,10 @@ pub trait ModuleValidator { | |
fn validate(&self, module: &Module) -> Result<bool, ModuleError>; | ||
} | ||
|
||
pub trait InstructionValidator { | ||
fn validate(&mut self, module: &Module) -> Result<bool, InstructionError>; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think since we have |
||
} | ||
|
||
pub trait ModulePreset { | ||
fn with_preset(preset: &str) -> Result<Self, ()> | ||
where | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,297 @@ | ||
use super::InstructionValidator; | ||
|
||
use parity_wasm::elements::Instruction; | ||
use parity_wasm::elements::Instruction::*; | ||
use parity_wasm::elements::*; | ||
|
||
use crate::instructionerrors::*; | ||
|
||
pub const GET_INST: [Instruction; 2] = [GetGlobal(0), GetLocal(0)]; | ||
|
||
pub const I32_BINOP: [Instruction; 15] = [ | ||
I32Add, I32Sub, I32Mul, I32DivS, I32DivU, I32RemS, I32RemU, I32And, I32Or, I32Xor, I32Shl, | ||
I32ShrS, I32ShrU, I32Rotl, I32Rotr, | ||
]; | ||
|
||
pub const I64_BINOP: [Instruction; 15] = [ | ||
I64Add, I64Sub, I64Mul, I64DivS, I64DivU, I64RemS, I64RemU, I64And, I64Or, I64Xor, I64Shl, | ||
I64ShrS, I64ShrU, I64Rotl, I64Rotr, | ||
]; | ||
|
||
pub const F32_BINOP: [Instruction; 7] = | ||
[F32Add, F32Sub, F32Mul, F32Div, F32Min, F32Max, F32Copysign]; | ||
|
||
pub const F64_BINOP: [Instruction; 7] = | ||
[F64Add, F64Sub, F64Mul, F64Div, F64Min, F64Max, F64Copysign]; | ||
|
||
use self::Filter::*; | ||
use std::mem::discriminant; | ||
|
||
type Pop = Vec<ValueType>; | ||
type Push = Vec<ValueType>; | ||
|
||
struct Signature { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you document these structs with triple-slash comments? That way There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @zixuanzh @Steampunkery please document the code. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
pop: Pop, | ||
push: Push, | ||
} | ||
|
||
pub enum Filter { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps it would be useful to have per-instruction granularity here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Per instruction granularity would be a tricky/impossible thing to implement because you need to simulate an entire stack to know if the right data types will be on that stack at the right time. If you mean just simulating a stack and filtering out certain instructions, then yes, I could do that. |
||
NumericInstructions, | ||
NoFilter, | ||
} | ||
|
||
/// Basic struct for validating modules | ||
pub struct VerifyInstructions { | ||
filter: Filter, | ||
stack: Vec<ValueType>, | ||
} | ||
|
||
impl VerifyInstructions { | ||
pub fn new(filter: Filter) -> Self { | ||
VerifyInstructions { | ||
filter, | ||
stack: vec![], | ||
} | ||
} | ||
|
||
fn check_instructions( | ||
&mut self, | ||
module: &Module, | ||
body: &FuncBody, | ||
index: usize, | ||
) -> Result<bool, InstructionError> { | ||
for instruction in body.code().elements() { | ||
if contains(instruction, &GET_INST) | ||
&& !self.push_global_or_local(module, instruction, body, index)? | ||
{ | ||
return Ok(false); | ||
} | ||
match self.filter { | ||
NumericInstructions => { | ||
let signature = get_instruction_signature(instruction); | ||
// if the instruction does not have a signature we are interested in, we continue | ||
if signature.is_some() | ||
&& !self.validate_instruction(&signature.unwrap(), instruction)? | ||
{ | ||
return Ok(false); | ||
} | ||
} | ||
NoFilter => (), // TODO: do this | ||
}; | ||
} | ||
Ok(true) | ||
} | ||
|
||
fn validate_instruction( | ||
&mut self, | ||
signature: &Signature, | ||
instruction: &Instruction, | ||
) -> Result<bool, InstructionError> { | ||
for signature_value in &signature.pop { | ||
let value = self.stack.pop(); | ||
match value { | ||
Some(stack_value) => { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is a matter of preference but I find it cleaner, in the case of destructuring two-valued enums like |
||
if stack_value != *signature_value { | ||
return Err(InstructionError::InvalidOperation(instruction.clone())); | ||
} | ||
} | ||
None => return Err(InstructionError::InvalidOperation(instruction.clone())), // Instructions are small, so clone | ||
} | ||
} | ||
self.stack.extend(&signature.push); | ||
|
||
Ok(true) | ||
} | ||
|
||
fn push_global_or_local( | ||
&mut self, | ||
module: &Module, | ||
instruction: &Instruction, | ||
body: &FuncBody, | ||
index: usize, | ||
) -> Result<bool, InstructionError> { | ||
// These next couple lines are just to get the parameters of the function we're dealing with. | ||
// We need the parameters because they can be loaded like local variables but they're not in the locals vec | ||
|
||
// type_ref is the index of the FunctionType in types_section | ||
let type_ref = &module.function_section().unwrap().entries()[index].type_ref(); | ||
let type_variant = &module.type_section().unwrap().types()[*type_ref as usize]; | ||
|
||
let mut locals = body.locals().to_vec(); | ||
match type_variant { | ||
Type::Function(ftype) => { | ||
locals.extend(ftype.params().iter().map(|f| Local::new(0, *f))); | ||
} | ||
} | ||
|
||
match instruction { | ||
Instruction::GetGlobal(local) => match locals.get(*local as usize) { | ||
Some(variable) => { | ||
self.stack.push(variable.value_type()); | ||
Ok(true) | ||
} | ||
None => Err(InstructionError::GlobalNotFound), | ||
}, | ||
Instruction::GetLocal(local) => match locals.get(*local as usize) { | ||
Some(variable) => { | ||
self.stack.push(variable.value_type()); | ||
Ok(true) | ||
} | ||
None => Err(InstructionError::LocalNotFound), | ||
}, | ||
_ => Err(InstructionError::UnmatchedInstruction), | ||
} | ||
} | ||
} | ||
|
||
impl InstructionValidator for VerifyInstructions { | ||
fn validate(&mut self, module: &Module) -> Result<bool, InstructionError> { | ||
match module.code_section() { | ||
Some(functions) => { | ||
for (index, function) in functions.bodies().iter().enumerate() { | ||
let is_function_valid: bool = | ||
self.check_instructions(module, function, index)?; | ||
if !is_function_valid { | ||
return Ok(false); | ||
} | ||
} | ||
Ok(true) | ||
} | ||
None => Ok(true), | ||
} | ||
} | ||
} | ||
|
||
fn contains(instruction: &Instruction, container: &[Instruction]) -> bool { | ||
container | ||
.iter() | ||
.any(|f| discriminant(f) == discriminant(instruction)) | ||
} | ||
|
||
fn get_instruction_signature(instruction: &Instruction) -> Option<Signature> { | ||
// returns some signature if there is a type we are interested in | ||
// returns None otherwise | ||
if contains(instruction, &I32_BINOP) { | ||
Some(Signature { | ||
pop: [ValueType::I32; 2].to_vec(), | ||
push: [ValueType::I32; 1].to_vec(), | ||
}) | ||
} else if contains(instruction, &I64_BINOP) { | ||
Some(Signature { | ||
pop: [ValueType::I64; 2].to_vec(), | ||
push: [ValueType::I64; 1].to_vec(), | ||
}) | ||
} else if contains(instruction, &F32_BINOP) { | ||
Some(Signature { | ||
pop: [ValueType::F32; 2].to_vec(), | ||
push: [ValueType::F32; 1].to_vec(), | ||
}) | ||
} else if contains(instruction, &F64_BINOP) { | ||
Some(Signature { | ||
pop: [ValueType::F64; 2].to_vec(), | ||
push: [ValueType::F64; 1].to_vec(), | ||
}) | ||
} else { | ||
None | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Needs moar unit tests |
||
use super::*; | ||
use parity_wasm::elements::deserialize_buffer; | ||
|
||
#[test] | ||
fn add_two_simple_binary() { | ||
// WAST: | ||
// (module | ||
// (type $t0 (func (param i32 i32) (result i32))) | ||
// (func $f0 (type $t0) (param $p0 i32) (param $p1 i32) (result i32) | ||
// (i32.add | ||
// (get_local $p0) | ||
// (get_local $p1)))) | ||
let wasm: Vec<u8> = vec![ | ||
0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x07, 0x01, 0x60, 0x02, 0x7f, | ||
0x7f, 0x01, 0x7f, 0x03, 0x02, 0x01, 0x00, 0x0a, 0x09, 0x01, 0x07, 0x00, 0x20, 0x00, | ||
0x20, 0x01, 0x6a, 0x0b, 0x00, 0x14, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x02, 0x0d, 0x01, | ||
0x00, 0x02, 0x00, 0x03, 0x6c, 0x68, 0x73, 0x01, 0x03, 0x72, 0x68, 0x73, | ||
]; | ||
|
||
let module = deserialize_buffer::<Module>(&wasm).unwrap(); | ||
|
||
let mut validator = VerifyInstructions::new(NumericInstructions); | ||
let is_valid = validator.validate(&module).unwrap(); | ||
assert!(true, is_valid) | ||
} | ||
|
||
#[test] | ||
#[should_panic] | ||
fn unmatched_type_failure_binary() { | ||
// Binary incorrectly tries to add an f64 with i32 to return an i32 | ||
// This should be failed by the validator | ||
// WAST: | ||
// (module | ||
// (func $addTwo (param f64 i32) (result i32) | ||
// get_local 0 | ||
// get_local 1 | ||
// i32.add) | ||
// (export "addTwo" (func $addTwo))) | ||
let wasm: Vec<u8> = vec![ | ||
0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x07, 0x01, 0x60, 0x02, 0x7c, | ||
0x7f, 0x01, 0x7f, 0x03, 0x02, 0x01, 0x00, 0x07, 0x0a, 0x01, 0x06, 0x61, 0x64, 0x64, | ||
0x54, 0x77, 0x6f, 0x00, 0x00, 0x0a, 0x09, 0x01, 0x07, 0x00, 0x20, 0x00, 0x20, 0x01, | ||
0x6a, 0x0b, 0x00, 0x19, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x01, 0x09, 0x01, 0x00, 0x06, | ||
0x61, 0x64, 0x64, 0x54, 0x77, 0x6f, 0x02, 0x07, 0x01, 0x00, 0x02, 0x00, 0x00, 0x01, | ||
0x00, | ||
]; | ||
|
||
let module = deserialize_buffer::<Module>(&wasm).unwrap(); | ||
|
||
let mut validator = VerifyInstructions::new(NumericInstructions); | ||
validator.validate(&module).unwrap(); | ||
} | ||
|
||
#[test] | ||
fn print_instructions_complex_binary() { | ||
// WAST: | ||
// (module | ||
// (type $t0 (func (param i32 i32) (result i32))) | ||
// (func $_Z4multii (export "_Z4multii") (type $t0) (param $p0 i32) (param $p1 i32) (result i32) | ||
// (i32.mul | ||
// (get_local $p1) | ||
// (get_local $p0))) | ||
// (func $_Z3addii (export "_Z3addii") (type $t0) (param $p0 i32) (param $p1 i32) (result i32) | ||
// (i32.add | ||
// (get_local $p1) | ||
// (get_local $p0))) | ||
// (func $_Z6divideii (export "_Z6divideii") (type $t0) (param $p0 i32) (param $p1 i32) (result i32) | ||
// (i32.div_s | ||
// (get_local $p0) | ||
// (get_local $p1))) | ||
// (table $T0 0 anyfunc) | ||
// (memory $memory (export "memory") 1)) | ||
|
||
let wasm: Vec<u8> = vec![ | ||
0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x07, 0x01, 0x60, 0x02, 0x7f, | ||
0x7f, 0x01, 0x7f, 0x03, 0x04, 0x03, 0x00, 0x00, 0x00, 0x04, 0x04, 0x01, 0x70, 0x00, | ||
0x00, 0x05, 0x03, 0x01, 0x00, 0x01, 0x07, 0x2f, 0x04, 0x09, 0x5f, 0x5a, 0x34, 0x6d, | ||
0x75, 0x6c, 0x74, 0x69, 0x69, 0x00, 0x00, 0x08, 0x5f, 0x5a, 0x33, 0x61, 0x64, 0x64, | ||
0x69, 0x69, 0x00, 0x01, 0x0b, 0x5f, 0x5a, 0x36, 0x64, 0x69, 0x76, 0x69, 0x64, 0x65, | ||
0x69, 0x69, 0x00, 0x02, 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x02, 0x00, 0x0a, | ||
0x19, 0x03, 0x07, 0x00, 0x20, 0x01, 0x20, 0x00, 0x6c, 0x0b, 0x07, 0x00, 0x20, 0x01, | ||
0x20, 0x00, 0x6a, 0x0b, 0x07, 0x00, 0x20, 0x00, 0x20, 0x01, 0x6d, 0x0b, 0x00, 0x4b, | ||
0x04, 0x6e, 0x61, 0x6d, 0x65, 0x01, 0x23, 0x03, 0x00, 0x09, 0x5f, 0x5a, 0x34, 0x6d, | ||
0x75, 0x6c, 0x74, 0x69, 0x69, 0x01, 0x08, 0x5f, 0x5a, 0x33, 0x61, 0x64, 0x64, 0x69, | ||
0x69, 0x02, 0x0b, 0x5f, 0x5a, 0x36, 0x64, 0x69, 0x76, 0x69, 0x64, 0x65, 0x69, 0x69, | ||
0x02, 0x1f, 0x03, 0x00, 0x02, 0x00, 0x02, 0x70, 0x30, 0x01, 0x02, 0x70, 0x31, 0x01, | ||
0x02, 0x00, 0x02, 0x70, 0x30, 0x01, 0x02, 0x70, 0x31, 0x02, 0x02, 0x00, 0x02, 0x70, | ||
0x30, 0x01, 0x02, 0x70, 0x31, | ||
]; | ||
|
||
let module = deserialize_buffer::<Module>(&wasm).unwrap(); | ||
|
||
let mut validator = VerifyInstructions::new(NumericInstructions); | ||
let is_valid = validator.validate(&module).unwrap(); | ||
assert!(true, is_valid) | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we merge
ModuleError
andInstructionError
, could you create a new PR which just adds theDisplay
andError
traits onModuleError
? Then this PR can just extend that with the new enum values.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Alternatively, instead of adding new variants, it may make sense to just encode these errors as a string in the
Custom
variant ofModuleError
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Or make
Custom(T)
type generic and throwInstructionError
in there.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Text in general is fine, but for programmatic detection the enums are better.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd like to stick to enum variants if possible, and I can certainly look into merging
InstructionError
intoModuleError
.