Ticket #302: ringsim.3.py

File ringsim.3.py, 8.0 KB (added by warner, at 2009-12-27T02:53:46Z)

v3 of brian's simulator

Line 
1#! /usr/bin/python
2
3# used to discuss ticket #302: "stop permuting peerlist?"
4
5import time
6import math
7from hashlib import sha1, md5, sha256
8myhash = md5
9# md5: 1520 "uploads" per second
10# sha1: 1350 ups
11# sha256: 930 ups
12from itertools import count
13from twisted.python import usage
14
15def abbreviate_space(s, SI=True):
16    if s is None:
17        return "unknown"
18    if SI:
19        U = 1000.0
20        isuffix = "B"
21    else:
22        U = 1024.0
23        isuffix = "iB"
24    def r(count, suffix):
25        return "%.2f %s%s" % (count, suffix, isuffix)
26
27    if s < 1024: # 1000-1023 get emitted as bytes, even in SI mode
28        return "%d B" % s
29    if s < U*U:
30        return r(s/U, "k")
31    if s < U*U*U:
32        return r(s/(U*U), "M")
33    if s < U*U*U*U:
34        return r(s/(U*U*U), "G")
35    if s < U*U*U*U*U:
36        return r(s/(U*U*U*U), "T")
37    return r(s/(U*U*U*U*U), "P")
38
39def make_up_a_file_size(seed):
40    h = int(myhash(seed).hexdigest(),16)
41    max=2**31
42    if 1: # exponential distribution
43        e = 8 + (h % (31-8))
44        return 2 ** e
45    # uniform distribution
46    return h % max # avg 1GB
47
48sizes = [make_up_a_file_size(str(i)) for i in range(10000)]
49avg_filesize = sum(sizes)/len(sizes)
50print "average file size:", abbreviate_space(avg_filesize)
51
52SERVER_CAPACITY = 10**12
53
54class Server:
55    def __init__(self, nodeid, capacity):
56        self.nodeid = nodeid
57        self.used = 0
58        self.capacity = capacity
59        self.numshares = 0
60        self.full_at_tick = None
61
62    def upload(self, sharesize):
63        if self.used + sharesize < self.capacity:
64            self.used += sharesize
65            self.numshares += 1
66            return True
67        return False
68
69    def __repr__(self):
70        if self.full_at_tick is not None:
71            return "<%s %s full at %d>" % (self.__class__.__name__, self.nodeid, self.full_at_tick)
72        else:
73            return "<%s %s>" % (self.__class__.__name__, self.nodeid)
74
75class Ring:
76    SHOW_MINMAX = False
77    def __init__(self, numservers, seed, permute):
78        self.servers = []
79        for i in range(numservers):
80            nodeid = myhash(str(seed)+str(i)).hexdigest()
81            capacity = SERVER_CAPACITY
82            s = Server(nodeid, capacity)
83            self.servers.append(s)
84        self.servers.sort(key=lambda s: s.nodeid)
85        self.permute = permute
86        #self.list_servers()
87
88    def list_servers(self):
89        for i in range(len(self.servers)):
90            s = self.servers[i]
91            next_s = self.servers[(i+1)%len(self.servers)]
92            diff = "%032x" % (int(next_s.nodeid,16) - int(s.nodeid,16))
93            s.next_diff = diff
94            prev_s = self.servers[(i-1)%len(self.servers)]
95            diff = "%032x" % (int(s.nodeid,16) - int(prev_s.nodeid,16))
96            s.prev_diff = diff
97            print s, s.prev_diff
98
99        print "sorted by delta"
100        for s in sorted(self.servers, key=lambda s:s.prev_diff):
101            print s, s.prev_diff
102
103    def servers_for_si(self, si):
104        if self.permute:
105            def sortkey(s):
106                return myhash(s.nodeid+si).digest()
107            return sorted(self.servers, key=sortkey)
108        for i in range(len(self.servers)):
109            if self.servers[i].nodeid >= si:
110                return self.servers[i:] + self.servers[:i]
111        return list(self.servers)
112
113    def show_servers(self, picked):
114        bits = []
115        for s in self.servers:
116            if s in picked:
117                bits.append("1")
118            else:
119                bits.append("0")
120        #d = [s in picked and "1" or "0" for s in self.servers]
121        return "".join(bits)
122
123    def dump_usage(self, numfiles, avg_space_per_file):
124        print "uploaded", numfiles
125        # avg_space_per_file measures expected grid-wide ciphertext per file
126        used = list(reversed(sorted([s.used for s in self.servers])))
127        # used is actual per-server ciphertext
128        usedpf = [1.0*u/numfiles for u in used]
129        # usedpf is actual per-server-per-file ciphertext
130        #print "min/max usage: %s/%s" % (abbreviate_space(used[-1]),
131        #                                abbreviate_space(used[0]))
132        avg_usage_per_file = avg_space_per_file/len(self.servers)
133        # avg_usage_per_file is expected per-server-per-file ciphertext
134        spreadpf = usedpf[0] - usedpf[-1]
135        average_usagepf = sum(usedpf) / len(usedpf)
136        variance = sum([(u-average_usagepf)**2 for u in usedpf])/(len(usedpf)-1)
137        std_deviation = math.sqrt(variance)
138        sd_of_total = std_deviation / avg_usage_per_file
139
140        print "min/max/(exp) usage-pf-ps %s/%s/(%s):" % (
141            abbreviate_space(usedpf[-1]),
142            abbreviate_space(usedpf[0]),
143            abbreviate_space(avg_usage_per_file) ),
144        print "spread-pf: %s (%.2f%%)" % (
145            abbreviate_space(spreadpf), 100.0*spreadpf/avg_usage_per_file),
146        #print "average_usage:", abbreviate_space(average_usagepf)
147        print "stddev: %s (%.2f%%)" % (abbreviate_space(std_deviation),
148                                       100.0*sd_of_total)
149        if self.SHOW_MINMAX:
150            s2 = sorted(self.servers, key=lambda s: s.used)
151            print "least:", s2[0].nodeid
152            print "most:", s2[-1].nodeid
153
154
155class Options(usage.Options):
156    optParameters = [
157        ("k", "k", 3, "required shares", int),
158        ("N", "N", 10, "total shares", int),
159        ("servers", None, 100, "number of servers", int),
160        ("seed", None, None, "seed to use for creating ring"),
161        ("fileseed", None, "blah", "seed to use for creating files"),
162        ("permute", "p", 1, "1 to permute, 0 to use flat ring", int),
163        ]
164    def postOptions(self):
165        assert self["seed"]
166
167
168def do_run(ring, opts):
169    avg_space_per_file = avg_filesize * opts["N"] / opts["k"]
170    fileseed = opts["fileseed"]
171    start = time.time()
172    all_servers_have_room = True
173    no_files_have_wrapped = True
174    for filenum in count(0):
175        #used = list(reversed(sorted([s.used for s in ring.servers])))
176        #used = [s.used for s in ring.servers]
177        #print used
178        si = myhash(fileseed+str(filenum)).hexdigest()
179        filesize = make_up_a_file_size(si)
180        sharesize = filesize / opts["k"]
181        if filenum%4000==0 and filenum > 1:
182            ring.dump_usage(filenum, avg_space_per_file)
183        servers = ring.servers_for_si(si)
184        #print ring.show_servers(servers[:opts["N"]])
185        remaining_shares = opts["N"]
186        index = 0
187        server_was_full = False
188        file_was_wrapped = False
189        remaining_servers = set(servers)
190        while remaining_shares:
191            if index >= len(servers):
192                index = 0
193                file_was_wrapped = True
194            s = servers[index]
195            accepted = s.upload(sharesize)
196            if not accepted:
197                server_was_full = True
198                remaining_servers.discard(s)
199                if not remaining_servers:
200                    print "-- GRID IS FULL"
201                    ring.dump_usage(filenum, avg_space_per_file)
202                    return filenum
203                index += 1
204                continue
205            remaining_shares -= 1
206            index += 1
207        # file is done being uploaded
208
209        if server_was_full and all_servers_have_room:
210            all_servers_have_room = False
211            print "-- FIRST SERVER FULL"
212            ring.dump_usage(filenum, avg_space_per_file)
213        if file_was_wrapped and no_files_have_wrapped:
214            no_files_have_wrapped = False
215            print "-- FIRST FILE WRAPPED"
216            ring.dump_usage(filenum, avg_space_per_file)
217
218
219def do_ring(opts):
220    total_capacity = opts["servers"]*SERVER_CAPACITY
221    avg_space_per_file = avg_filesize * opts["N"] / opts["k"]
222    avg_files = total_capacity / avg_space_per_file
223    print "expected number of uploads:", avg_files
224    if opts["permute"]:
225        print " PERMUTED"
226    else:
227        print " LINEAR"
228    seed = opts["seed"]
229
230    ring = Ring(opts["servers"], seed, opts["permute"])
231    num_files = do_run(ring, opts)
232
233def run(opts):
234    do_ring(opts)
235
236if __name__ == "__main__":
237    opts = Options()
238    opts.parseOptions()
239    run(opts)