Ticket #302: ringsim.py

File ringsim.py, 10.0 KB (added by warner, at 2009-12-26T04:37:16Z)

Brian's v1 simulator

Line 
1#! /usr/bin/python
2
3import time
4import random, math
5from hashlib import sha1, md5, sha256
6sha1 = md5
7# md5: 1520 "uploads" per second
8# sha1: 1350 ups
9# sha256: 930 ups
10from itertools import count
11from twisted.python import usage
12
13def abbreviate_space(s, SI=True):
14    if s is None:
15        return "unknown"
16    if SI:
17        U = 1000.0
18        isuffix = "B"
19    else:
20        U = 1024.0
21        isuffix = "iB"
22    def r(count, suffix):
23        return "%.2f %s%s" % (count, suffix, isuffix)
24
25    if s < 1024: # 1000-1023 get emitted as bytes, even in SI mode
26        return "%d B" % s
27    if s < U*U:
28        return r(s/U, "k")
29    if s < U*U*U:
30        return r(s/(U*U), "M")
31    if s < U*U*U*U:
32        return r(s/(U*U*U), "G")
33    if s < U*U*U*U*U:
34        return r(s/(U*U*U*U), "T")
35    return r(s/(U*U*U*U*U), "P")
36
37def make_up_a_file_size(max=2**31):
38    #return (2 ** random.randrange(8, 31)) # avg=??
39    return random.randrange(max) # avg 1GB
40sizes = [make_up_a_file_size() for i in range(10000)]
41avg_filesize = sum(sizes)/len(sizes)
42print "average file size:", abbreviate_space(avg_filesize)
43
44SERVER_CAPACITY = 10**12 * 1000
45
46class Server:
47    def __init__(self, nodeid, capacity):
48        self.nodeid = nodeid
49        self.used = 0
50        self.capacity = capacity
51        self.numshares = 0
52        self.full_at_tick = None
53
54    def upload(self, sharesize):
55        if self.used + sharesize < self.capacity:
56            self.used += sharesize
57            self.numshares += 1
58            return True
59        return False
60
61    def __repr__(self):
62        if self.full_at_tick is not None:
63            return "<%s %s full at %d>" % (self.__class__.__name__, self.nodeid, self.full_at_tick)
64        else:
65            return "<%s %s>" % (self.__class__.__name__, self.nodeid)
66
67class Ring:
68    def __init__(self, numservers, seed, permute):
69        self.servers = []
70        for i in range(numservers):
71            nodeid = sha1(str(seed)+str(i)).hexdigest()
72            capacity = SERVER_CAPACITY
73            s = Server(nodeid, capacity)
74            self.servers.append(s)
75        self.servers.sort(key=lambda s: s.nodeid)
76
77        self.permute = permute
78
79    def servers_for_si(self, si):
80        if self.permute:
81            def sortkey(s):
82                return sha1(s.nodeid+si).digest()
83            return sorted(self.servers, key=sortkey)
84        for i in range(len(self.servers)):
85            if self.servers[i].nodeid >= si:
86                return self.servers[i:] + self.servers[:i]
87        return list(self.servers)
88
89    def show_servers(self, picked):
90        bits = []
91        for s in self.servers:
92            if s in picked:
93                bits.append("1")
94            else:
95                bits.append("0")
96        #d = [s in picked and "1" or "0" for s in self.servers]
97        return "".join(bits)
98
99    def dump_usage(self, numfiles, avg_space_per_file):
100        print "uploaded", numfiles
101        # avg_space_per_file measures expected grid-wide ciphertext per file
102        used = list(reversed(sorted([s.used for s in self.servers])))
103        # used is actual per-server ciphertext
104        usedpf = [1.0*u/numfiles for u in used]
105        # usedpf is actual per-server-per-file ciphertext
106        #print "min/max usage: %s/%s" % (abbreviate_space(used[-1]),
107        #                                abbreviate_space(used[0]))
108        avg_usage_per_file = avg_space_per_file/len(self.servers)
109        # avg_usage_per_file is expected per-server-per-file ciphertext
110        spreadpf = usedpf[0] - usedpf[-1]
111        average_usagepf = sum(usedpf) / len(usedpf)
112        variance = sum([(u-average_usagepf)**2 for u in usedpf])/(len(usedpf)-1)
113        std_deviation = math.sqrt(variance)
114        sd_of_total = std_deviation / avg_usage_per_file
115
116        print "min/max/(exp) usage-pf-ps %s/%s/(%s):" % (
117            abbreviate_space(usedpf[-1]),
118            abbreviate_space(usedpf[0]),
119            abbreviate_space(avg_usage_per_file) ),
120        print "spread-pf: %s (%.2f%%)" % (
121            abbreviate_space(spreadpf), 100.0*spreadpf/avg_usage_per_file),
122        #print "average_usage:", abbreviate_space(average_usagepf)
123        print "stddev: %s (%.2f%%)" % (abbreviate_space(std_deviation),
124                                       100.0*sd_of_total)
125
126
127class Options(usage.Options):
128    optParameters = [
129        ("k", "k", 3, "required shares", int),
130        ("N", "N", 10, "total shares", int),
131        ("servers", None, 100, "number of servers", int),
132        ("seed", None, None, "seed to use for creating ring"),
133        ("permute", "p", 1, "1 to permute, 0 to use flat ring", int),
134        ]
135    def postOptions(self):
136        assert self["seed"]
137
138
139def do_run(ring, opts):
140    avg_space_per_file = avg_filesize * opts["N"] / opts["k"]
141    start = time.time()
142    for filenum in count(0):
143        #used = list(reversed(sorted([s.used for s in ring.servers])))
144        #used = [s.used for s in ring.servers]
145        #print used
146        filesize = make_up_a_file_size()
147        sharesize = filesize / opts["k"]
148        si = sha1(str(random.randrange(2**40))).hexdigest()
149        if filenum%4000==0 and filenum > 1:
150            ring.dump_usage(filenum, avg_space_per_file)
151        servers = ring.servers_for_si(si)
152        #print ring.show_servers(servers[:opts["N"]])
153        remaining_shares = opts["N"]
154        index = 0
155        while remaining_shares:
156            s = servers[index]
157            accepted = s.upload(sharesize)
158            if not accepted:
159                return filenum # number of files successfully uploaded
160            remaining_shares -= 1
161            index += 1
162
163
164def do_ring(opts):
165    #seed = str(random.randrange(2**31))
166    total_capacity = opts["servers"]*SERVER_CAPACITY
167    avg_space_per_file = avg_filesize * opts["N"] / opts["k"]
168    avg_files = total_capacity / avg_space_per_file
169    print "expected number of uploads:", avg_files
170    if opts["permute"]:
171        print " PERMUTED"
172    else:
173        print " LINEAR"
174    seed = opts["seed"]
175
176    ring = Ring(opts["servers"], seed, opts["permute"])
177    num_files = do_run(ring, opts)
178
179def run(opts):
180    do_ring(opts)
181
182if __name__ == "__main__":
183    opts = Options()
184    opts.parseOptions()
185    run(opts)
186
187
188def go(opts):
189    servers = [ Server() for x in range(SERVERS) ]
190    servers.sort(cmp=lambda x,y: cmp(x.si, y.si))
191
192    doubled_up_shares = 0
193    doubled_up_at_tick = None
194    tick = 0
195    fullservers = 0
196    while True:
197        nextsharesize = make_up_a_file_size() / K
198        if permutedpeerlist:
199            random.shuffle(servers)
200        else:
201            # rotate a random number
202            rot = random.randrange(0, len(servers))
203            servers = servers[rot:] + servers[:rot]
204
205        i = 0
206        wrapped = False
207        sharestoput = N
208        while sharestoput:
209            server = servers[i]
210            if server.used + nextsharesize < server.max:
211                server.used += nextsharesize
212                sharestoput -= 1
213                if wrapped:
214                    doubled_up_shares += 1
215                    if doubled_up_at_tick is None:
216                        doubled_up_at_tick = tick
217            else:
218                if server.full_at_tick is None:
219                    server.full_at_tick = tick
220                    fullservers += 1
221                    if fullservers == len(servers):
222                        # print "Couldn't place share -- all servers full.  Stopping."
223                        return (servers, doubled_up_shares, doubled_up_at_tick)
224
225            i += 1
226            if i == len(servers):
227                wrapped = True
228                i = 0
229
230        tick += 1
231
232def div_ceil(n, d):
233    """
234    The smallest integer k such that k*d >= n.
235    """
236    return (n/d) + (n%d != 0)
237
238DESIRED_COLUMNS = 70
239
240START_FILES = 137000
241STOP_FILES = 144000
242
243def test(permutedpeerlist, iters):
244    # The i'th element of the filledat list is how many servers got full when the i'th file was uploaded.
245    filledat = []
246    for test in range(iters):
247        (servers, doubled_up_shares, doubled_up_at_tick) = go(permutedpeerlist)
248        print "doubled_up_shares: ", doubled_up_shares
249        print "doubled_up_at_tick: ", doubled_up_at_tick
250        #full_at = [server.full_at_tick for server in servers]
251        #full_at.sort()
252        #print full_at
253        #return
254        for server in servers:
255            fidx = server.full_at_tick
256            filledat.extend([0]*(fidx-len(filledat)+1))
257            filledat[fidx] += 1
258
259    startfiles = 0
260    while filledat[startfiles] == 0:
261        startfiles += 1
262    filespercolumn = div_ceil(len(filledat) - startfiles, (DESIRED_COLUMNS - 3))
263
264    # to make comparisons between runs line up:
265    # startfiles = START_FILES
266    # filespercolumn = div_ceil(STOP_FILES - startfiles, (DESIRED_COLUMNS - 3))
267
268    # The i'th element of the compressedfilledat list is how many servers got full when the filespercolumn files starting at startfiles + i were uploaded.
269    compressedfilledat = []
270    idx = startfiles
271    while idx < len(filledat):
272        compressedfilledat.append(0)
273        for i in range(filespercolumn):
274            compressedfilledat[-1] += filledat[idx]
275            idx += 1
276            if idx >= len(filledat):
277                break
278
279    # The i'th element of the fullat list is how many servers were full by the tick numbered startfiles + i * filespercolumn (on average).
280    fullat = [0] * len(compressedfilledat)
281    for idx, num in enumerate(compressedfilledat):
282        for fidx in range(idx, len(fullat)):
283            fullat[fidx] += num
284
285    for idx in range(len(fullat)):
286        fullat[idx]  = fullat[idx] / float(iters)
287
288    # Now print it out as an ascii art graph.
289    import sys
290    for serversfull in range(40, 0, -1):
291        sys.stdout.write("%2d " % serversfull)
292        for numfull in fullat:
293            if int(numfull) == serversfull:
294                sys.stdout.write("*")
295            else:
296                sys.stdout.write(" ")
297        sys.stdout.write("\n")
298
299    sys.stdout.write(" ^-- servers full\n")
300    idx = 0
301    while idx < len(fullat):
302        nextmark  = "%d--^ " % (startfiles + idx * filespercolumn)
303        sys.stdout.write(nextmark)
304        idx += len(nextmark)
305
306    sys.stdout.write("\nfiles uploaded --> \n")
307
308