source: trunk/misc/checkers/check_load.py

Last change on this file was 6517cd4, checked in by Itamar Turner-Trauring <itamar@…>, at 2023-04-17T14:22:27Z

Fix lint found by ruff.

  • Property mode set to 100644
File size: 8.6 KB
Line 
1"""
2this is a load-generating client program. It does all of its work through a
3given tahoe node (specified by URL), and performs random reads and writes
4to the target.
5
6Run this in a directory with the following files:
7 server-URLs : a list of tahoe node URLs (one per line). Each operation
8               will use a randomly-selected server.
9 root.cap: (string) the top-level directory rwcap to use
10 delay: (float) seconds to delay between operations
11 operation-mix: "R/W": two ints, relative frequency of read and write ops
12 #size:?
13
14Set argv[1] to a per-client stats-NN.out file. This will will be updated with
15running totals of bytes-per-second and operations-per-second. The stats from
16multiple clients can be totalled together and averaged over time to compute
17the traffic being accepted by the grid.
18
19Each time a 'read' operation is performed, the client will begin at the root
20and randomly choose a child. If the child is a directory, the client will
21recurse. If the child is a file, the client will read the contents of the
22file.
23
24Each time a 'write' operation is performed, the client will generate a target
25filename (a random string). 90% of the time, the file will be written into
26the same directory that was used last time (starting at the root). 10% of the
27time, a new directory is created by assembling 1 to 5 pathnames chosen at
28random. The client then writes a certain number of zero bytes to this file.
29The filesize is determined with something like a power-law distribution, with
30a mean of 10kB and a max of 100MB, so filesize=min(int(1.0/random(.0002)),1e8)
31
32
33"""
34from __future__ import annotations
35
36import os, sys, httplib, binascii
37import urllib, json, random, time, urlparse
38
39if sys.argv[1] == "--stats":
40    statsfiles = sys.argv[2:]
41    # gather stats every 10 seconds, do a moving-window average of the last
42    # 60 seconds
43    DELAY = 10
44    MAXSAMPLES = 6
45    totals = []
46    last_stats : dict[str, float] = {}
47    while True:
48        stats : dict[str, float] = {}
49        for sf in statsfiles:
50            for line in open(sf, "r").readlines():
51                name, str_value = line.split(":")
52                value = int(str_value.strip())
53                if name not in stats:
54                    stats[name] = 0
55                stats[name] += float(value)
56        del name
57        if last_stats:
58            delta = dict( [ (n,stats[n]-last_stats[n])
59                            for n in stats ] )
60            print("THIS SAMPLE:")
61            for name in sorted(delta.keys()):
62                avg = float(delta[name]) / float(DELAY)
63                print("%20s: %0.2f per second" % (name, avg))
64            totals.append(delta)
65            while len(totals) > MAXSAMPLES:
66                totals.pop(0)
67
68            # now compute average
69            print()
70            print("MOVING WINDOW AVERAGE:")
71            for name in sorted(delta.keys()):
72                avg = sum([ s[name] for s in totals]) / (DELAY*len(totals))
73                print("%20s %0.2f per second" % (name, avg))
74
75        last_stats = stats
76        print()
77        print()
78        time.sleep(DELAY)
79
80stats_out = sys.argv[1]
81
82server_urls = []
83for url in open("server-URLs", "r").readlines():
84    url = url.strip()
85    if url:
86        server_urls.append(url)
87root = open("root.cap", "r").read().strip()
88delay = float(open("delay", "r").read().strip())
89readfreq, writefreq = (
90    [int(x) for x in open("operation-mix", "r").read().strip().split("/")])
91
92
93files_uploaded = 0
94files_downloaded = 0
95bytes_uploaded = 0
96bytes_downloaded = 0
97directories_read = 0
98directories_written = 0
99
100def listdir(nodeurl, root, remote_pathname):
101    if nodeurl[-1] != "/":
102        nodeurl += "/"
103    url = nodeurl + "uri/%s/" % urllib.quote(root)
104    if remote_pathname:
105        url += urllib.quote(remote_pathname)
106    url += "?t=json"
107    data = urllib.urlopen(url).read()
108    try:
109        parsed = json.loads(data)
110    except ValueError:
111        print("URL was", url)
112        print("DATA was", data)
113        raise
114    nodetype, d = parsed
115    assert nodetype == "dirnode"
116    global directories_read
117    directories_read += 1
118    children = dict( [(str(name),value)
119                      for (name,value)
120                      in d["children"].iteritems()] )
121    return children
122
123
124def choose_random_descendant(server_url, root, pathname=""):
125    children = listdir(server_url, root, pathname)
126    name = random.choice(children.keys())
127    child = children[name]
128    if pathname:
129        new_pathname = pathname + "/" + name
130    else:
131        new_pathname = name
132    if child[0] == "filenode":
133        return new_pathname
134    return choose_random_descendant(server_url, root, new_pathname)
135
136def read_and_discard(nodeurl, root, pathname):
137    if nodeurl[-1] != "/":
138        nodeurl += "/"
139    url = nodeurl + "uri/%s/" % urllib.quote(root)
140    if pathname:
141        url += urllib.quote(pathname)
142    f = urllib.urlopen(url)
143    global bytes_downloaded
144    while True:
145        data = f.read(4096)
146        if not data:
147            break
148        bytes_downloaded += len(data)
149
150
151directories = [
152    "dreamland/disengaging/hucksters",
153    "dreamland/disengaging/klondikes",
154    "dreamland/disengaging/neatly",
155    "dreamland/cottages/richmond",
156    "dreamland/cottages/perhaps",
157    "dreamland/cottages/spies",
158    "dreamland/finder/diversion",
159    "dreamland/finder/cigarette",
160    "dreamland/finder/album",
161    "hazing/licences/comedian",
162    "hazing/licences/goat",
163    "hazing/licences/shopkeeper",
164    "hazing/regiment/frigate",
165    "hazing/regiment/quackery",
166    "hazing/regiment/centerpiece",
167    "hazing/disassociate/mob",
168    "hazing/disassociate/nihilistic",
169    "hazing/disassociate/bilbo",
170    ]
171
172def create_random_directory():
173    d = random.choice(directories)
174    pieces = d.split("/")
175    numsegs = random.randint(1, len(pieces))
176    return "/".join(pieces[0:numsegs])
177
178def generate_filename():
179    fn = binascii.hexlify(os.urandom(4))
180    return fn
181
182def choose_size():
183    mean = 10e3
184    size = random.expovariate(1.0 / mean)
185    return int(min(size, 100e6))
186
187# copied from twisted/web/client.py
188def parse_url(url, defaultPort=None):
189    url = url.strip()
190    parsed = urlparse.urlparse(url)
191    scheme = parsed[0]
192    path = urlparse.urlunparse(('','')+parsed[2:])
193    if defaultPort is None:
194        if scheme == 'https':
195            defaultPort = 443
196        else:
197            defaultPort = 80
198    host, port = parsed[1], defaultPort
199    if ':' in host:
200        host, port = host.split(':')
201        port = int(port)
202    if path == "":
203        path = "/"
204    return scheme, host, port, path
205
206def generate_and_put(nodeurl, root, remote_filename, size):
207    if nodeurl[-1] != "/":
208        nodeurl += "/"
209    url = nodeurl + "uri/%s/" % urllib.quote(root)
210    url += urllib.quote(remote_filename)
211
212    scheme, host, port, path = parse_url(url)
213    if scheme == "http":
214        c = httplib.HTTPConnection(host, port)
215    elif scheme == "https":
216        c = httplib.HTTPSConnection(host, port)
217    else:
218        raise ValueError("unknown scheme '%s', need http or https" % scheme)
219    c.putrequest("PUT", path)
220    c.putheader("Hostname", host)
221    c.putheader("User-Agent", "tahoe-check-load")
222    c.putheader("Connection", "close")
223    c.putheader("Content-Length", "%d" % size)
224    c.endheaders()
225    global bytes_uploaded
226    while size:
227        chunksize = min(size, 4096)
228        size -= chunksize
229        c.send("\x00" * chunksize)
230        bytes_uploaded += chunksize
231    return c.getresponse()
232
233
234current_writedir = ""
235
236while True:
237    time.sleep(delay)
238    if random.uniform(0, readfreq+writefreq) < readfreq:
239        op = "read"
240    else:
241        op = "write"
242    print("OP:", op)
243    server = random.choice(server_urls)
244    if op == "read":
245        pathname = choose_random_descendant(server, root)
246        print("  reading", pathname)
247        read_and_discard(server, root, pathname)
248        files_downloaded += 1
249    elif op == "write":
250        if random.uniform(0, 100) < 10:
251            current_writedir = create_random_directory()
252        filename = generate_filename()
253        if current_writedir:
254            pathname = current_writedir + "/" + filename
255        else:
256            pathname = filename
257        print("  writing", pathname)
258        size = choose_size()
259        print("   size", size)
260        generate_and_put(server, root, pathname, size)
261        files_uploaded += 1
262
263    f = open(stats_out+".tmp", "w")
264    f.write("files-uploaded: %d\n" % files_uploaded)
265    f.write("files-downloaded: %d\n" % files_downloaded)
266    f.write("bytes-uploaded: %d\n" % bytes_uploaded)
267    f.write("bytes-downloaded: %d\n" % bytes_downloaded)
268    f.write("directories-read: %d\n" % directories_read)
269    f.write("directories-written: %d\n" % directories_written)
270    f.close()
271    os.rename(stats_out+".tmp", stats_out)
Note: See TracBrowser for help on using the repository browser.