1 | """ |
---|
2 | this is a load-generating client program. It does all of its work through a |
---|
3 | given tahoe node (specified by URL), and performs random reads and writes |
---|
4 | to the target. |
---|
5 | |
---|
6 | Run this in a directory with the following files: |
---|
7 | server-URLs : a list of tahoe node URLs (one per line). Each operation |
---|
8 | will use a randomly-selected server. |
---|
9 | root.cap: (string) the top-level directory rwcap to use |
---|
10 | delay: (float) seconds to delay between operations |
---|
11 | operation-mix: "R/W": two ints, relative frequency of read and write ops |
---|
12 | #size:? |
---|
13 | |
---|
14 | Set argv[1] to a per-client stats-NN.out file. This will will be updated with |
---|
15 | running totals of bytes-per-second and operations-per-second. The stats from |
---|
16 | multiple clients can be totalled together and averaged over time to compute |
---|
17 | the traffic being accepted by the grid. |
---|
18 | |
---|
19 | Each time a 'read' operation is performed, the client will begin at the root |
---|
20 | and randomly choose a child. If the child is a directory, the client will |
---|
21 | recurse. If the child is a file, the client will read the contents of the |
---|
22 | file. |
---|
23 | |
---|
24 | Each time a 'write' operation is performed, the client will generate a target |
---|
25 | filename (a random string). 90% of the time, the file will be written into |
---|
26 | the same directory that was used last time (starting at the root). 10% of the |
---|
27 | time, a new directory is created by assembling 1 to 5 pathnames chosen at |
---|
28 | random. The client then writes a certain number of zero bytes to this file. |
---|
29 | The filesize is determined with something like a power-law distribution, with |
---|
30 | a mean of 10kB and a max of 100MB, so filesize=min(int(1.0/random(.0002)),1e8) |
---|
31 | |
---|
32 | |
---|
33 | """ |
---|
34 | from __future__ import annotations |
---|
35 | |
---|
36 | import os, sys, httplib, binascii |
---|
37 | import urllib, json, random, time, urlparse |
---|
38 | |
---|
39 | if sys.argv[1] == "--stats": |
---|
40 | statsfiles = sys.argv[2:] |
---|
41 | # gather stats every 10 seconds, do a moving-window average of the last |
---|
42 | # 60 seconds |
---|
43 | DELAY = 10 |
---|
44 | MAXSAMPLES = 6 |
---|
45 | totals = [] |
---|
46 | last_stats : dict[str, float] = {} |
---|
47 | while True: |
---|
48 | stats : dict[str, float] = {} |
---|
49 | for sf in statsfiles: |
---|
50 | for line in open(sf, "r").readlines(): |
---|
51 | name, str_value = line.split(":") |
---|
52 | value = int(str_value.strip()) |
---|
53 | if name not in stats: |
---|
54 | stats[name] = 0 |
---|
55 | stats[name] += float(value) |
---|
56 | del name |
---|
57 | if last_stats: |
---|
58 | delta = dict( [ (n,stats[n]-last_stats[n]) |
---|
59 | for n in stats ] ) |
---|
60 | print("THIS SAMPLE:") |
---|
61 | for name in sorted(delta.keys()): |
---|
62 | avg = float(delta[name]) / float(DELAY) |
---|
63 | print("%20s: %0.2f per second" % (name, avg)) |
---|
64 | totals.append(delta) |
---|
65 | while len(totals) > MAXSAMPLES: |
---|
66 | totals.pop(0) |
---|
67 | |
---|
68 | # now compute average |
---|
69 | print() |
---|
70 | print("MOVING WINDOW AVERAGE:") |
---|
71 | for name in sorted(delta.keys()): |
---|
72 | avg = sum([ s[name] for s in totals]) / (DELAY*len(totals)) |
---|
73 | print("%20s %0.2f per second" % (name, avg)) |
---|
74 | |
---|
75 | last_stats = stats |
---|
76 | print() |
---|
77 | print() |
---|
78 | time.sleep(DELAY) |
---|
79 | |
---|
80 | stats_out = sys.argv[1] |
---|
81 | |
---|
82 | server_urls = [] |
---|
83 | for url in open("server-URLs", "r").readlines(): |
---|
84 | url = url.strip() |
---|
85 | if url: |
---|
86 | server_urls.append(url) |
---|
87 | root = open("root.cap", "r").read().strip() |
---|
88 | delay = float(open("delay", "r").read().strip()) |
---|
89 | readfreq, writefreq = ( |
---|
90 | [int(x) for x in open("operation-mix", "r").read().strip().split("/")]) |
---|
91 | |
---|
92 | |
---|
93 | files_uploaded = 0 |
---|
94 | files_downloaded = 0 |
---|
95 | bytes_uploaded = 0 |
---|
96 | bytes_downloaded = 0 |
---|
97 | directories_read = 0 |
---|
98 | directories_written = 0 |
---|
99 | |
---|
100 | def listdir(nodeurl, root, remote_pathname): |
---|
101 | if nodeurl[-1] != "/": |
---|
102 | nodeurl += "/" |
---|
103 | url = nodeurl + "uri/%s/" % urllib.quote(root) |
---|
104 | if remote_pathname: |
---|
105 | url += urllib.quote(remote_pathname) |
---|
106 | url += "?t=json" |
---|
107 | data = urllib.urlopen(url).read() |
---|
108 | try: |
---|
109 | parsed = json.loads(data) |
---|
110 | except ValueError: |
---|
111 | print("URL was", url) |
---|
112 | print("DATA was", data) |
---|
113 | raise |
---|
114 | nodetype, d = parsed |
---|
115 | assert nodetype == "dirnode" |
---|
116 | global directories_read |
---|
117 | directories_read += 1 |
---|
118 | children = dict( [(str(name),value) |
---|
119 | for (name,value) |
---|
120 | in d["children"].iteritems()] ) |
---|
121 | return children |
---|
122 | |
---|
123 | |
---|
124 | def choose_random_descendant(server_url, root, pathname=""): |
---|
125 | children = listdir(server_url, root, pathname) |
---|
126 | name = random.choice(children.keys()) |
---|
127 | child = children[name] |
---|
128 | if pathname: |
---|
129 | new_pathname = pathname + "/" + name |
---|
130 | else: |
---|
131 | new_pathname = name |
---|
132 | if child[0] == "filenode": |
---|
133 | return new_pathname |
---|
134 | return choose_random_descendant(server_url, root, new_pathname) |
---|
135 | |
---|
136 | def read_and_discard(nodeurl, root, pathname): |
---|
137 | if nodeurl[-1] != "/": |
---|
138 | nodeurl += "/" |
---|
139 | url = nodeurl + "uri/%s/" % urllib.quote(root) |
---|
140 | if pathname: |
---|
141 | url += urllib.quote(pathname) |
---|
142 | f = urllib.urlopen(url) |
---|
143 | global bytes_downloaded |
---|
144 | while True: |
---|
145 | data = f.read(4096) |
---|
146 | if not data: |
---|
147 | break |
---|
148 | bytes_downloaded += len(data) |
---|
149 | |
---|
150 | |
---|
151 | directories = [ |
---|
152 | "dreamland/disengaging/hucksters", |
---|
153 | "dreamland/disengaging/klondikes", |
---|
154 | "dreamland/disengaging/neatly", |
---|
155 | "dreamland/cottages/richmond", |
---|
156 | "dreamland/cottages/perhaps", |
---|
157 | "dreamland/cottages/spies", |
---|
158 | "dreamland/finder/diversion", |
---|
159 | "dreamland/finder/cigarette", |
---|
160 | "dreamland/finder/album", |
---|
161 | "hazing/licences/comedian", |
---|
162 | "hazing/licences/goat", |
---|
163 | "hazing/licences/shopkeeper", |
---|
164 | "hazing/regiment/frigate", |
---|
165 | "hazing/regiment/quackery", |
---|
166 | "hazing/regiment/centerpiece", |
---|
167 | "hazing/disassociate/mob", |
---|
168 | "hazing/disassociate/nihilistic", |
---|
169 | "hazing/disassociate/bilbo", |
---|
170 | ] |
---|
171 | |
---|
172 | def create_random_directory(): |
---|
173 | d = random.choice(directories) |
---|
174 | pieces = d.split("/") |
---|
175 | numsegs = random.randint(1, len(pieces)) |
---|
176 | return "/".join(pieces[0:numsegs]) |
---|
177 | |
---|
178 | def generate_filename(): |
---|
179 | fn = binascii.hexlify(os.urandom(4)) |
---|
180 | return fn |
---|
181 | |
---|
182 | def choose_size(): |
---|
183 | mean = 10e3 |
---|
184 | size = random.expovariate(1.0 / mean) |
---|
185 | return int(min(size, 100e6)) |
---|
186 | |
---|
187 | # copied from twisted/web/client.py |
---|
188 | def parse_url(url, defaultPort=None): |
---|
189 | url = url.strip() |
---|
190 | parsed = urlparse.urlparse(url) |
---|
191 | scheme = parsed[0] |
---|
192 | path = urlparse.urlunparse(('','')+parsed[2:]) |
---|
193 | if defaultPort is None: |
---|
194 | if scheme == 'https': |
---|
195 | defaultPort = 443 |
---|
196 | else: |
---|
197 | defaultPort = 80 |
---|
198 | host, port = parsed[1], defaultPort |
---|
199 | if ':' in host: |
---|
200 | host, port = host.split(':') |
---|
201 | port = int(port) |
---|
202 | if path == "": |
---|
203 | path = "/" |
---|
204 | return scheme, host, port, path |
---|
205 | |
---|
206 | def generate_and_put(nodeurl, root, remote_filename, size): |
---|
207 | if nodeurl[-1] != "/": |
---|
208 | nodeurl += "/" |
---|
209 | url = nodeurl + "uri/%s/" % urllib.quote(root) |
---|
210 | url += urllib.quote(remote_filename) |
---|
211 | |
---|
212 | scheme, host, port, path = parse_url(url) |
---|
213 | if scheme == "http": |
---|
214 | c = httplib.HTTPConnection(host, port) |
---|
215 | elif scheme == "https": |
---|
216 | c = httplib.HTTPSConnection(host, port) |
---|
217 | else: |
---|
218 | raise ValueError("unknown scheme '%s', need http or https" % scheme) |
---|
219 | c.putrequest("PUT", path) |
---|
220 | c.putheader("Hostname", host) |
---|
221 | c.putheader("User-Agent", "tahoe-check-load") |
---|
222 | c.putheader("Connection", "close") |
---|
223 | c.putheader("Content-Length", "%d" % size) |
---|
224 | c.endheaders() |
---|
225 | global bytes_uploaded |
---|
226 | while size: |
---|
227 | chunksize = min(size, 4096) |
---|
228 | size -= chunksize |
---|
229 | c.send("\x00" * chunksize) |
---|
230 | bytes_uploaded += chunksize |
---|
231 | return c.getresponse() |
---|
232 | |
---|
233 | |
---|
234 | current_writedir = "" |
---|
235 | |
---|
236 | while True: |
---|
237 | time.sleep(delay) |
---|
238 | if random.uniform(0, readfreq+writefreq) < readfreq: |
---|
239 | op = "read" |
---|
240 | else: |
---|
241 | op = "write" |
---|
242 | print("OP:", op) |
---|
243 | server = random.choice(server_urls) |
---|
244 | if op == "read": |
---|
245 | pathname = choose_random_descendant(server, root) |
---|
246 | print(" reading", pathname) |
---|
247 | read_and_discard(server, root, pathname) |
---|
248 | files_downloaded += 1 |
---|
249 | elif op == "write": |
---|
250 | if random.uniform(0, 100) < 10: |
---|
251 | current_writedir = create_random_directory() |
---|
252 | filename = generate_filename() |
---|
253 | if current_writedir: |
---|
254 | pathname = current_writedir + "/" + filename |
---|
255 | else: |
---|
256 | pathname = filename |
---|
257 | print(" writing", pathname) |
---|
258 | size = choose_size() |
---|
259 | print(" size", size) |
---|
260 | generate_and_put(server, root, pathname, size) |
---|
261 | files_uploaded += 1 |
---|
262 | |
---|
263 | f = open(stats_out+".tmp", "w") |
---|
264 | f.write("files-uploaded: %d\n" % files_uploaded) |
---|
265 | f.write("files-downloaded: %d\n" % files_downloaded) |
---|
266 | f.write("bytes-uploaded: %d\n" % bytes_uploaded) |
---|
267 | f.write("bytes-downloaded: %d\n" % bytes_downloaded) |
---|
268 | f.write("directories-read: %d\n" % directories_read) |
---|
269 | f.write("directories-written: %d\n" % directories_written) |
---|
270 | f.close() |
---|
271 | os.rename(stats_out+".tmp", stats_out) |
---|