1 | """ |
---|
2 | I contain utilities useful for calculating servers_of_happiness, and for |
---|
3 | reporting it in messages. |
---|
4 | |
---|
5 | Ported to Python 3. |
---|
6 | """ |
---|
7 | |
---|
8 | from copy import deepcopy |
---|
9 | from allmydata.immutable.happiness_upload import residual_network |
---|
10 | from allmydata.immutable.happiness_upload import augmenting_path_for |
---|
11 | |
---|
12 | |
---|
13 | def failure_message(peer_count, k, happy, effective_happy): |
---|
14 | # If peer_count < needed_shares, this error message makes more |
---|
15 | # sense than any of the others, so use it. |
---|
16 | if peer_count < k: |
---|
17 | msg = ("shares could be placed or found on only %d " |
---|
18 | "server(s). " |
---|
19 | "We were asked to place shares on at least %d " |
---|
20 | "server(s) such that any %d of them have " |
---|
21 | "enough shares to recover the file." % |
---|
22 | (peer_count, happy, k)) |
---|
23 | # Otherwise, if we've placed on at least needed_shares |
---|
24 | # peers, but there isn't an x-happy subset of those peers |
---|
25 | # for x >= needed_shares, we use this error message. |
---|
26 | elif effective_happy < k: |
---|
27 | msg = ("shares could be placed or found on %d " |
---|
28 | "server(s), but they are not spread out evenly " |
---|
29 | "enough to ensure that any %d of these servers " |
---|
30 | "would have enough shares to recover the file. " |
---|
31 | "We were asked to place " |
---|
32 | "shares on at least %d servers such that any " |
---|
33 | "%d of them have enough shares to recover the " |
---|
34 | "file." % |
---|
35 | (peer_count, k, happy, k)) |
---|
36 | # Otherwise, if there is an x-happy subset of peers where |
---|
37 | # x >= needed_shares, but x < servers_of_happiness, then |
---|
38 | # we use this message. |
---|
39 | else: |
---|
40 | msg = ("shares could be placed on only %d server(s) " |
---|
41 | "such that any %d of them have enough shares " |
---|
42 | "to recover the file, but we were asked to " |
---|
43 | "place shares on at least %d such servers." % |
---|
44 | (effective_happy, k, happy)) |
---|
45 | return msg |
---|
46 | |
---|
47 | |
---|
48 | def shares_by_server(servermap): |
---|
49 | """ |
---|
50 | I accept a dict of shareid -> set(peerid) mappings, and return a |
---|
51 | dict of peerid -> set(shareid) mappings. My argument is a dictionary |
---|
52 | with sets of peers, indexed by shares, and I transform that into a |
---|
53 | dictionary of sets of shares, indexed by peerids. |
---|
54 | """ |
---|
55 | ret = {} |
---|
56 | for shareid, peers in servermap.items(): |
---|
57 | assert isinstance(peers, set) |
---|
58 | for peerid in peers: |
---|
59 | ret.setdefault(peerid, set()).add(shareid) |
---|
60 | return ret |
---|
61 | |
---|
62 | def merge_servers(servermap, upload_trackers=None): |
---|
63 | """ |
---|
64 | I accept a dict of shareid -> set(serverid) mappings, and optionally a |
---|
65 | set of ServerTrackers. If no set of ServerTrackers is provided, I return |
---|
66 | my first argument unmodified. Otherwise, I update a copy of my first |
---|
67 | argument to include the shareid -> serverid mappings implied in the |
---|
68 | set of ServerTrackers, returning the resulting dict. |
---|
69 | """ |
---|
70 | # Since we mutate servermap, and are called outside of a |
---|
71 | # context where it is okay to do that, make a copy of servermap and |
---|
72 | # work with it. |
---|
73 | servermap = deepcopy(servermap) |
---|
74 | if not upload_trackers: |
---|
75 | return servermap |
---|
76 | |
---|
77 | assert(isinstance(servermap, dict)) |
---|
78 | assert(isinstance(upload_trackers, set)) |
---|
79 | |
---|
80 | for tracker in upload_trackers: |
---|
81 | for shnum in tracker.buckets: |
---|
82 | servermap.setdefault(shnum, set()).add(tracker.get_serverid()) |
---|
83 | return servermap |
---|
84 | |
---|
85 | |
---|
86 | def servers_of_happiness(sharemap): |
---|
87 | """ |
---|
88 | I accept 'sharemap', a dict of shareid -> set(peerid) mappings. I |
---|
89 | return the 'servers_of_happiness' number that sharemap results in. |
---|
90 | |
---|
91 | To calculate the 'servers_of_happiness' number for the sharemap, I |
---|
92 | construct a bipartite graph with servers in one partition of vertices |
---|
93 | and shares in the other, and with an edge between a server s and a share t |
---|
94 | if s is to store t. I then compute the size of a maximum matching in |
---|
95 | the resulting graph; this is then returned as the 'servers_of_happiness' |
---|
96 | for my arguments. |
---|
97 | |
---|
98 | For example, consider the following layout: |
---|
99 | |
---|
100 | server 1: shares 1, 2, 3, 4 |
---|
101 | server 2: share 6 |
---|
102 | server 3: share 3 |
---|
103 | server 4: share 4 |
---|
104 | server 5: share 2 |
---|
105 | |
---|
106 | From this, we can construct the following graph: |
---|
107 | |
---|
108 | L = {server 1, server 2, server 3, server 4, server 5} |
---|
109 | R = {share 1, share 2, share 3, share 4, share 6} |
---|
110 | V = L U R |
---|
111 | E = {(server 1, share 1), (server 1, share 2), (server 1, share 3), |
---|
112 | (server 1, share 4), (server 2, share 6), (server 3, share 3), |
---|
113 | (server 4, share 4), (server 5, share 2)} |
---|
114 | G = (V, E) |
---|
115 | |
---|
116 | Note that G is bipartite since every edge in e has one endpoint in L |
---|
117 | and one endpoint in R. |
---|
118 | |
---|
119 | A matching in a graph G is a subset M of E such that, for any vertex |
---|
120 | v in V, v is incident to at most one edge of M. A maximum matching |
---|
121 | in G is a matching that is no smaller than any other matching. For |
---|
122 | this graph, a matching of cardinality 5 is: |
---|
123 | |
---|
124 | M = {(server 1, share 1), (server 2, share 6), |
---|
125 | (server 3, share 3), (server 4, share 4), |
---|
126 | (server 5, share 2)} |
---|
127 | |
---|
128 | Since G is bipartite, and since |L| = 5, we cannot have an M' such |
---|
129 | that |M'| > |M|. Then M is a maximum matching in G. Intuitively, and |
---|
130 | as long as k <= 5, we can see that the layout above has |
---|
131 | servers_of_happiness = 5, which matches the results here. |
---|
132 | """ |
---|
133 | if sharemap == {}: |
---|
134 | return 0 |
---|
135 | servermap = shares_by_server(sharemap) |
---|
136 | graph = _flow_network_for(servermap) |
---|
137 | |
---|
138 | # XXX this core stuff is identical to |
---|
139 | # happiness_upload._compute_maximum_graph and we should find a way |
---|
140 | # to share the code. |
---|
141 | |
---|
142 | # This is an implementation of the Ford-Fulkerson method for finding |
---|
143 | # a maximum flow in a flow network applied to a bipartite graph. |
---|
144 | # Specifically, it is the Edmonds-Karp algorithm, since it uses a |
---|
145 | # BFS to find the shortest augmenting path at each iteration, if one |
---|
146 | # exists. |
---|
147 | # |
---|
148 | # The implementation here is an adapation of an algorithm described in |
---|
149 | # "Introduction to Algorithms", Cormen et al, 2nd ed., pp 658-662. |
---|
150 | dim = len(graph) |
---|
151 | flow_function = [[0 for sh in range(dim)] for s in range(dim)] |
---|
152 | residual_graph, residual_function = residual_network(graph, flow_function) |
---|
153 | while augmenting_path_for(residual_graph): |
---|
154 | path = augmenting_path_for(residual_graph) |
---|
155 | # Delta is the largest amount that we can increase flow across |
---|
156 | # all of the edges in path. Because of the way that the residual |
---|
157 | # function is constructed, f[u][v] for a particular edge (u, v) |
---|
158 | # is the amount of unused capacity on that edge. Taking the |
---|
159 | # minimum of a list of those values for each edge in the |
---|
160 | # augmenting path gives us our delta. |
---|
161 | delta = min(residual_function[u][v] for (u, v) in path) |
---|
162 | for (u, v) in path: |
---|
163 | flow_function[u][v] += delta |
---|
164 | flow_function[v][u] -= delta |
---|
165 | residual_graph, residual_function = residual_network(graph, |
---|
166 | flow_function) |
---|
167 | num_servers = len(servermap) |
---|
168 | # The value of a flow is the total flow out of the source vertex |
---|
169 | # (vertex 0, in our graph). We could just as well sum across all of |
---|
170 | # f[0], but we know that vertex 0 only has edges to the servers in |
---|
171 | # our graph, so we can stop after summing flow across those. The |
---|
172 | # value of a flow computed in this way is the size of a maximum |
---|
173 | # matching on the bipartite graph described above. |
---|
174 | return sum([flow_function[0][v] for v in range(1, num_servers+1)]) |
---|
175 | |
---|
176 | def _flow_network_for(servermap): |
---|
177 | """ |
---|
178 | I take my argument, a dict of peerid -> set(shareid) mappings, and |
---|
179 | turn it into a flow network suitable for use with Edmonds-Karp. I |
---|
180 | then return the adjacency list representation of that network. |
---|
181 | |
---|
182 | Specifically, I build G = (V, E), where: |
---|
183 | V = { peerid in servermap } U { shareid in servermap } U {s, t} |
---|
184 | E = {(s, peerid) for each peerid} |
---|
185 | U {(peerid, shareid) if peerid is to store shareid } |
---|
186 | U {(shareid, t) for each shareid} |
---|
187 | |
---|
188 | s and t will be source and sink nodes when my caller starts treating |
---|
189 | the graph I return like a flow network. Without s and t, the |
---|
190 | returned graph is bipartite. |
---|
191 | """ |
---|
192 | # Servers don't have integral identifiers, and we can't make any |
---|
193 | # assumptions about the way shares are indexed -- it's possible that |
---|
194 | # there are missing shares, for example. So before making a graph, |
---|
195 | # we re-index so that all of our vertices have integral indices, and |
---|
196 | # that there aren't any holes. We start indexing at 1, so that we |
---|
197 | # can add a source node at index 0. |
---|
198 | servermap, num_shares = _reindex(servermap, base_index=1) |
---|
199 | num_servers = len(servermap) |
---|
200 | graph = [] # index -> [index], an adjacency list |
---|
201 | # Add an entry at the top (index 0) that has an edge to every server |
---|
202 | # in servermap |
---|
203 | graph.append(list(servermap.keys())) |
---|
204 | # For each server, add an entry that has an edge to every share that it |
---|
205 | # contains (or will contain). |
---|
206 | for k in servermap: |
---|
207 | graph.append(servermap[k]) |
---|
208 | # For each share, add an entry that has an edge to the sink. |
---|
209 | sink_num = num_servers + num_shares + 1 |
---|
210 | for i in range(num_shares): |
---|
211 | graph.append([sink_num]) |
---|
212 | # Add an empty entry for the sink, which has no outbound edges. |
---|
213 | graph.append([]) |
---|
214 | return graph |
---|
215 | |
---|
216 | |
---|
217 | # XXX warning: this is different from happiness_upload's _reindex! |
---|
218 | def _reindex(servermap, base_index): |
---|
219 | """ |
---|
220 | Given servermap, I map peerids and shareids to integers that don't |
---|
221 | conflict with each other, so they're useful as indices in a graph. I |
---|
222 | return a servermap that is reindexed appropriately, and also the |
---|
223 | number of distinct shares in the resulting servermap as a convenience |
---|
224 | for my caller. base_index tells me where to start indexing. |
---|
225 | """ |
---|
226 | shares = {} # shareid -> vertex index |
---|
227 | num = base_index |
---|
228 | ret = {} # peerid -> [shareid], a reindexed servermap. |
---|
229 | # Number the servers first |
---|
230 | for k in servermap: |
---|
231 | ret[num] = servermap[k] |
---|
232 | num += 1 |
---|
233 | # Number the shares |
---|
234 | for k in ret: |
---|
235 | for shnum in ret[k]: |
---|
236 | if shnum not in shares: |
---|
237 | shares[shnum] = num |
---|
238 | num += 1 |
---|
239 | ret[k] = [shares[x] for x in ret[k]] |
---|
240 | return (ret, len(shares)) |
---|