-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathtypes.ts
495 lines (434 loc) · 17.2 KB
/
types.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
import {Accessor, ExceptionVal, Invoked, Receiver} from "./nodeprof";
// Various types used throughout the analysis.
/**
* An abstract machine to interpret JavaScript code.
*
* Implementations of this abstract machine should maintain a stack of abstract
* values.
*
* All `description` arguments to these functions describe *why* and *where* in
* the code the action occurred.
*/
export interface AbstractMachine {
/**
* Pop an abstract value from the stack.
* @param description why and where the action occurred
*/
pop: (input: [StaticDescription]) => void;
/**
* Push a variable's abstract value onto the stack.
* @param s the name of the variable
* @param description why and where the action occurred
*/
readVar: (input: [VariableDescription, StaticDescription]) => void;
/**
* Write the abstract value at the top of the stack to the given variable
* name.
* @param s the name of the variable
* @param description why and where the action occurred
*/
writeVar: (input: [VariableDescription, StaticDescription]) => void;
/**
* Pop two values off the stack, perform a binary operation on them, and
* push the result.
* @param description why and where the action occurred
*/
binary: (input: [StaticDescription]) => void;
/**
* Pop one value off the stack, perform a unary operation on it, and
* push the result.
* @param description why and where the action occurred
*/
unary: (input: [StaticDescription]) => void;
/**
* Push an object property's abstract value onto the stack.
* @param o the object
* @param s the property name
* @param isMethod is this readProperty being used for a method call?
* @param isComputed is the property name computed?
* @param description why and where the action occurred
*/
readProperty: (input: [DynamicDescription, PropertyDescription, boolean, boolean, StaticDescription]) => void;
/**
* Write an abstract value to an object property from the top of the stack.
* Pops the value after it is written.
* @param o the object
* @param s the property name
* @param description why and where the action occurred
*/
writeProperty: (input: [DynamicDescription, PropertyDescription, StaticDescription]) => void;
/**
* Initialize a new variable with the abstract value at the top of the
* stack. It will be initialized in the current scope.
* @param s the name of the new variable
* @param description why and where the action occurred
*/
initVar: (input: [VariableDescription, StaticDescription]) => void;
/**
* This operation represents the execution of a *function invocation
* site*. This is the place in the programmer's source code (not a
* native function) where they *call* the function.
*
* This operation represents function invocation from the
* caller-perspective. `functionInvokeStart` represents function
* invocation from the callee-perspective.
*
* Ensure the stack is properly aligned for `expectedNumArgs`. If too
* many arguments were supplied, pop them of the stack. If too few
* arguments were supplied, push abstract values for them.
*
* @param name the name of the function according to the caller
* @param expectedNumArgs the number of arguments this function is
* expecting
* @param actualNumArgs the number of arguments actually given
* @param argShadowIDs the shadowIDs of the arguments themselves
* @param description why and where the action occurred
*/
functionInvokeStart: (input: [DynamicDescription, number, number, DynamicDescription[], StaticDescription]) => void;
/**
* This operation represents the return of a *function invocation
* site*. This is the place in the programmer's source code where they
* *call* the function.
*
* When a function uses the `return` keyword, you will see a
* functionReturn operation. If the function was called by the programmer's
* source code (and not a native function), you will see this
* functionInvokeEnd operation.
*
* This operation represents a function return from the
* caller-perspective. `functionReturn`/`functionExit` represent function
* return from the callee-perspective.
*
* Since we are returning into the programmer's source code, this
* operation should push the returned value onto the stack.
*
* @param name the name of the function according to the caller
* @param returnedIDs the shadowIDs of the return value (for use in models)
* @param description why and where the action occurred
*/
functionInvokeEnd: (input: [DynamicDescription, DynamicDescription[], StaticDescription]) => void;
/**
* This operation represents the execution of a *function*. This
* operation will occur for both user-defined and native functions.
*
* This operation can assume `actualNumArgs` values will be on the
* stack. This operation should track the flow of these arguments into
* this function.
*
* This operation represents function invocation from the
* callee-perspective. `functionInvokeStart` represents function
* invocation from the caller-perspective.
*
* @param name the name of the function according to the caller
* @param actualNumArgs the number of arguments actually given
* @param description why and where the action occurred
*/
functionEnter: (input: [DynamicDescription, number, StaticDescription]) => void;
/**
* This operation represents the execution of a *function*. This
* operation will occur for both user-defined and native functions.
*
* This operation can assume `actualNumArgs` values will be on the
* stack. This operation should track the flow of these arguments into
* this function.
*
* This operation represents function invocation from the
* callee-perspective. `functionInvokeStart` represents function
* invocation from the caller-perspective.
*
* @param name the name of the function according to the caller
* @param actualNumArgs the number of arguments actually given
* @param description why and where the action occurred
*/
functionExit: (input: [DynamicDescription, number, StaticDescription]) => void;
/**
* This operation represents the marking of the top-most value on the
* stack as the return value of this function. It should be saved and
* popped from the stack.
*
* If returning into user-defined code, its value will be pushed back
* onto the stack in `invokeFunEnd`.
*
* @param name the name of the function returning from
* @param shadowID of the return
* @param description why and where the action occurred
*/
functionReturn: (input: [DynamicDescription, StaticDescription]) => void;
/**
* Perform the specified builtin on the arguments from the top of the
* stack.
* @param name the name of the builtin
* @param receiver the dynamic description of the receiver object
* @param actualArgs the number of arguments supplied to the builtin
* @param extraRecords the extra information needed to implement this
* builtin model
* @param isMethod is this builtin being called as a method?
* @param description why and where the action occurred
*/
builtin: (input: [DynamicDescription, DynamicDescription, number, any, boolean, StaticDescription]) => void;
/**
* Used to signal a builtin has exited. The result is on the top of the
* stack. No stack action is required.
* @param name the name of the builtin
* @param returnValueName the object identifier of the return value, if
* it returned an object
* @param description why and where the action occurred
*/
builtinExit: (input: [DynamicDescription, DynamicDescription, StaticDescription]) => void;
/**
* Used to signal the end of execution.
*/
endExecution: ([]) => void;
/**
* Used to signal a conditional is branching on the abstract value at
* the top of the stack.
* @param description why and where the action occurred
*/
conditional: (input: [StaticDescription]) => void;
/**
* Used to signal the end of a conditional block.
* @param description why and where the action occurred
*/
conditionalEnd: (input: [StaticDescription]) => void;
/**
* Produce an abstract value for the given code location, and push it to
* the stack.
*
* This is the only place where abstract values are introduced into the
* stack machine. All other instructions re-use existing abstract values.
* @param description why and where the action occurred
*/
literal: (input: [StaticDescription]) => void;
/**
* Initialize the "arguments" object inside a function. The taint
* values of the arguments to the current function should be placed
* inside the shadow object, `argumentsObject`.
*
* This taint analysis currently only supports strict mode behavior of
* the "arguments" object.
*
* Documentation about the "arguments exotic object" in JavaScript:
* https://tc39.es/ecma262/#arguments-exotic-object
*
* @param argumentsObject the dynamic description of the arguments
* object we have to initialize
* @param description why and where the action occurred
*/
initializeArgumentsObject: (input: [DynamicDescription, StaticDescription]) => void;
/**
*
* @param input
*/
asyncFunctionEnter: (input: [StaticDescription]) => void;
/**
*
* @param input
*/
asyncFunctionExit: (input: [number, DynamicDescription, any, ExceptionVal, StaticDescription]) => void;
/**
*
* @param input
*/
awaitPre: (input: [number, DynamicDescription, any, StaticDescription]) => void;
/**
*
* @param input
*/
awaitPost: (input: [number, DynamicDescription, any, any, StaticDescription]) => void;
/**
*
* @param input
*/
promiseReaction: (input: [number, any, DynamicDescription, StaticDescription]) => void;
/**
*
* @param input
*/
promiseResolve: (input: [number, any, DynamicDescription, StaticDescription]) => void;
/**
*
* @param input
*/
promiseReject: (input: [number, any, DynamicDescription, StaticDescription]) => void;
/**
* Get the taint flows this AbstractMachine has detected so far.
*
* Note: this method only works for *real* AbstractMachine's, meaning
* all AbstractMachine's except for JSWriter. Since JSWriter simply
* writes the instructions out to a file, it isn't able to compute
* taint flows itself.
*/
getTaint(): any;
}
// an interface for associating shadow identifiers with arbitrary objects.
//
// shadow identifiers are just identifiers that are unique to each instance
// of an object.
export interface ShadowMemory {
// initialize a shadow id for an object
initialize(o: object): void;
// retrieve a shadow id for an object. if there is no valid shadow id
// for this object, this will return `undefined`.
getShadowID(o: object): DynamicDescription;
// used to keep track of what function we are currently in.
functionEnter(f: Function): void;
functionExit(): void;
awaitPre(id: number): void;
awaitPost(id: number): void;
declare(name: RawVariableDescription): void;
getFullVariableName(name: string): VariableDescription;
}
/**
* "Shadow objects" are how the taint analysis keeps track of which
* properties inside objects are tainted. For each object in the program
* we're analyzing, a corresponding ShadowObject will be made. This
* ShadowObject will have the same property names as the actual object, but
* the values of these properties will only be taint values.
*/
export type ShadowObject<V> = {[name: string]: V};
export type Command = keyof AbstractMachine;
export interface Instruction {
command: Command;
args: any[];
}
// Possible types of taint sources/sinks. JS expressions should only be
// recorded as "expr" if they appear within a statement block.
export type TaintType = "function"
| "variable"
| "builtin"
| "expr"
| "functionInvocation"
| "functionEnter"
| "functionReturn"
| "literal"
| "declaration"
| "asyncFunctionEnter"
| "asyncFunctionExit"
| "awaitPre"
| "awaitPost";
// A unique identifier for a particular DYNAMIC OBJECT. This is *NOT* the
// variable name. It is auto-generated, and has nothing to do with the
// object itself. Each object in the instrumented program has its own,
// unique, DynamicDescription.
//
// (The `brand` property should not be instantiated. It is a
// hack to implement "branded types". This type will be
// distinct from all other aliases of the `string` type that
// include a brand.)
export type DynamicDescription = string & { readonly brand: unique symbol };
// An identifier that represents a variable's name. It contains JUST
// the variable's name, and no other information about its scope or
// execution environment.
export type RawVariableDescription = string & { readonly brand: unique symbol };
// An identifier for a VARIABLE NAME and its associated scope.
// If a function is called multiple times, the names of local
// variables in them should NOT conflict. In other words, the
// `VariableDescription` must include some information about the
// scope it was defined in.
//
// (The `brand` property should not be instantiated. It is a
// hack to implement "branded types". This type will be
// distinct from all other aliases of the `string` type that
// include a brand.)
export type VariableDescription = string & { readonly brand: unique symbol };
// An identifier for a property of an object.
export type PropertyDescription = Accessor & { readonly brand: unique symbol };
// A set of STATIC objects that could be present at a particular source
// location. Flows are detected by checking if particular DYNAMIC objects
// fit into this set, based on where it is travelling through the source code.
//
// A description of a taint source/sink. All fields are optional.
export interface StaticDescription extends Object {
type?: TaintType;
name?: string;
location?: Location;
config?: TaintConfig;
}
export interface TaintConfig {
recursive?: boolean;
}
export interface Location extends Object {
fileName?: string;
pos?: SourceSpan;
}
/**
* One of:
* - a line number
* - a line and column number
*/
export type SourcePosition =
number // line number
| [number, number]; // line and col number
/**
* One of:
* - a single position in the source code
* - a range of text in the source code
*/
export type SourceSpan =
SourcePosition | // A single position in source
{start: SourcePosition, end: SourcePosition}; // a range in source
/**
* Different types of taint tracking Augur supports.
* The name refers to what information is available when a flow occurs.
*
* Boolean: a flow occurred into sink X
* SourcedBoolean: a flow occurred into sink X *from source Y*
* Expression: a flow occurred into sink X *and touched these expressions*
*
* These different tracking types incur different runtime overheads:
* 1. Boolean (fastest)
* 2. SourcedBoolean
* 3. Expression (slowest)
*
* They are implemented by using different `AbstractMachine`'s:
* 1. Boolean: `BooleanMachine`
* 2. SourcedBoolean: `SourcedBooleanMachine`
* 3. Expression: `ExpressionMachine`
*
* The main reason for using one tracking method over another is
* how much information you want about your flows. Sometimes, you might
* only care about seeing if a flow happened or not (Boolean). If you
* care about seeing which source the flow came from, use SourcedBoolean.
* If you need detailed information about every line of code a value passed
* through on its way to a sink, use Expression.
*/
export type TrackingType = "Boolean" | "SourcedBoolean" | "Expression";
/**
* The specification of a test to run. All fields are optional besides "main"
*/
export interface RunSpecification extends Object {
// The program to instrument
main: string;
// The type of taint tracking to use for this analysis. Defaults to Boolean
tracking?: TrackingType;
// Should we enable verbose logging?
verbose?: boolean;
// The sources of taint
sources?: Array<StaticDescription>;
// The sinks for taint
sinks?: Array<StaticDescription>;
// The list of sinks that are expected to be flowed into
expectedFlows?: Array<StaticDescription>;
sanitizers?: Array<StaticDescription>;
}
/**
* A list of valid fields for a spec file. This is used
* to ensure that a loaded spec has the correct shape.
*/
export const VALID_SPEC_FIELDS = [
"main",
"tracking",
"verbose",
"sources",
"sinks",
"expectedFlows",
"sanitizers"
]
export class SourcedBoolean {
value: boolean;
source: Set<StaticDescription>; // Int 0000000000000000001000010
constructor(value: boolean, source: Set<StaticDescription>) {
this.value = value;
this.source = source;
}
}