-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathsimrelaytrace.py
executable file
·160 lines (131 loc) · 6.15 KB
/
simrelaytrace.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
#!/usr/bin/env python3
import argparse
import common
import sys
import os
import numpy as np
ap = argparse.ArgumentParser()
ap.add_argument("-i", required=True,
help="input folder of client circpadtrace files")
ap.add_argument("-o", required=True,
help="output folder to store simulated relay circpadtrace files in")
ap.add_argument('--guard', default=False, action='store_true',
help="simulate a trace at a guard relay, not middle relay")
args = vars(ap.parse_args())
LATENCY_NOISE_VARIANCE = 0.1
def main():
'''Given an input folder of circpadtrace files generated by torlog2circpadtrace.py
using a client torlog, create a simulated trace for a guard relay or a
middle relay (middle by default).
Note that this simulation only works for nonpadding traffic. If a client
receives padding, we can't tell from the client's trace if it's from the
relay or not. We assume that the negotiation of the relay's padding machine
to be later simulated is taken care by the simulator.
The simulation estimates the latency between client-relay based on the
timestamps in the log. This will at best be an approximation. Only use these
generated tracers if you account for the impact of this approximation.
'''
if not os.path.isdir(args["i"]):
sys.exit(f"{args['i']} is not a directory")
if not os.path.isdir(args["o"]):
sys.exit(f"{args['o']} is not a directory")
for fname in os.listdir(args["i"]):
infname = os.path.join(args["i"], fname)
outfname = os.path.join(args["o"], fname)
if os.path.exists(outfname):
sys.exit(f"output file {outfname} already exists")
# By default, middle relays do not see the first onionskin handshake
# These bools work as flags to strip it out.
skipped_sent = False
skipped_recv = False
if args["guard"]:
# If we are making a guard trace, we exactly mirror
# the client trace. Dont skip the first handshake
skipped_sent = True
skipped_recv = True
# estimate latency distribution between client-relay
latency = estimate_latency_in_us(infname)
output = []
with open(infname, 'r') as trace:
# since we probabilistically simulate the time for events before our
# recorded timestamps, we need to keep tabs of the last (simulated)
# time the relay sent a cell, such that we keep chronological order
last_relay_sent = 0
for line in trace:
event, timestamp = common.circpad_parse_line(line)
t = 0
if common.CIRCPAD_EVENT_NONPADDING_RECV in event:
# the relay sent a cell timestamp - latency ago, loop until
# we sample a time that makes sense
while(True):
t = timestamp - latency()
if t >= last_relay_sent:
break
last_relay_sent = t
if skipped_recv:
output.append(
f"{t:016d} {common.CIRCPAD_EVENT_NONPADDING_SENT}"
)
else:
skipped_recv = True
elif common.CIRCPAD_EVENT_NONPADDING_SENT in event:
# the relay will receive a cell in timestamp + latency
t = timestamp + latency()
if skipped_sent:
output.append(
f"{t:016d} {common.CIRCPAD_EVENT_NONPADDING_RECV}"
)
else:
skipped_sent = True
# save results
with open(outfname, 'w') as f:
for l in output:
f.write(f"{l}\n")
def estimate_latency_in_us(infname):
''' Builds a function to sample the latency from.
The way we instrument tor to collect circpadtraces from torlogs records the
circuit creation. Consider the following first lines of a trace connecting
to eff.org:
0000000000000000 circpad_machine_event_circ_added_hop
0000000000084278 circpad_cell_event_nonpadding_sent
0000000080279314 circpad_cell_event_nonpadding_received
0000000080577754 circpad_machine_event_circ_added_hop
0000000080635312 circpad_cell_event_nonpadding_sent
0000000228976335 circpad_cell_event_nonpadding_received
0000000229672419 circpad_machine_event_circ_added_hop
0000000229680174 circpad_machine_event_circ_built
0000003116580616 circpad_machine_event_circ_has_streams
0000003116598049 connection_ap_handshake_send_begin eff.org
0000003116607777 circpad_cell_event_nonpadding_sent
The first line we get is when the connection to the guard is completed, due
to how events are hooked in tor. On lines 2-4 the client extends the circuit
to the middle relay, followed by completing the circuit to the exit on lines
5-8. Slightly less than three seconds later, on lines 9-11, is the cirucit
used to connect to instruct the exit to connect to eff.org.
This gives us that the duration between lines 2-3 is a RTT between client
and middle relay. We assume that this is the mean of a normal distribution
with variance LATENCY_NOISE_VARIANCE. More realistic network measurements
show a multimodal distribution, a truncated normal distribution, or a pareto
distribution depending on who you ask.
'''
rtt = 0
with open(infname, 'r') as trace:
CONST_NO_SENT = -1
latest_sent = CONST_NO_SENT
for line in trace:
event, timestamp = common.circpad_parse_line(line)
if common.CIRCPAD_EVENT_NONPADDING_SENT in event:
latest_sent = timestamp
elif common.CIRCPAD_EVENT_NONPADDING_RECV in event:
if latest_sent != CONST_NO_SENT:
rtt = timestamp-latest_sent
break
latency = rtt / 2
if args["guard"]:
latency /= 2
def sample():
s = int(np.random.normal(latency, LATENCY_NOISE_VARIANCE))
return s
return sample
if __name__ == "__main__":
main()