Context Navigation

source: trunk/src/allmydata/util/happinessutil.py

Visit:

Last change on this file was 1cfe843d, checked in by Alexandre Detiste <alexandre.detiste@…>, at 2024-02-22T23:40:25Z
more python2 removal
Property mode set to `100644`
File size: 10.1 KB

Line
1	"""
2	I contain utilities useful for calculating servers_of_happiness, and for
3	reporting it in messages.
4
5	Ported to Python 3.
6	"""
7
8	from copy import deepcopy
9	from allmydata.immutable.happiness_upload import residual_network
10	from allmydata.immutable.happiness_upload import augmenting_path_for
11
12
13	def failure_message(peer_count, k, happy, effective_happy):
14	# If peer_count < needed_shares, this error message makes more
15	# sense than any of the others, so use it.
16	if peer_count < k:
17	msg = ("shares could be placed or found on only %d "
18	"server(s). "
19	"We were asked to place shares on at least %d "
20	"server(s) such that any %d of them have "
21	"enough shares to recover the file." %
22	(peer_count, happy, k))
23	# Otherwise, if we've placed on at least needed_shares
24	# peers, but there isn't an x-happy subset of those peers
25	# for x >= needed_shares, we use this error message.
26	elif effective_happy < k:
27	msg = ("shares could be placed or found on %d "
28	"server(s), but they are not spread out evenly "
29	"enough to ensure that any %d of these servers "
30	"would have enough shares to recover the file. "
31	"We were asked to place "
32	"shares on at least %d servers such that any "
33	"%d of them have enough shares to recover the "
34	"file." %
35	(peer_count, k, happy, k))
36	# Otherwise, if there is an x-happy subset of peers where
37	# x >= needed_shares, but x < servers_of_happiness, then
38	# we use this message.
39	else:
40	msg = ("shares could be placed on only %d server(s) "
41	"such that any %d of them have enough shares "
42	"to recover the file, but we were asked to "
43	"place shares on at least %d such servers." %
44	(effective_happy, k, happy))
45	return msg
46
47
48	def shares_by_server(servermap):
49	"""
50	I accept a dict of shareid -> set(peerid) mappings, and return a
51	dict of peerid -> set(shareid) mappings. My argument is a dictionary
52	with sets of peers, indexed by shares, and I transform that into a
53	dictionary of sets of shares, indexed by peerids.
54	"""
55	ret = {}
56	for shareid, peers in servermap.items():
57	assert isinstance(peers, set)
58	for peerid in peers:
59	ret.setdefault(peerid, set()).add(shareid)
60	return ret
61
62	def merge_servers(servermap, upload_trackers=None):
63	"""
64	I accept a dict of shareid -> set(serverid) mappings, and optionally a
65	set of ServerTrackers. If no set of ServerTrackers is provided, I return
66	my first argument unmodified. Otherwise, I update a copy of my first
67	argument to include the shareid -> serverid mappings implied in the
68	set of ServerTrackers, returning the resulting dict.
69	"""
70	# Since we mutate servermap, and are called outside of a
71	# context where it is okay to do that, make a copy of servermap and
72	# work with it.
73	servermap = deepcopy(servermap)
74	if not upload_trackers:
75	return servermap
76
77	assert(isinstance(servermap, dict))
78	assert(isinstance(upload_trackers, set))
79
80	for tracker in upload_trackers:
81	for shnum in tracker.buckets:
82	servermap.setdefault(shnum, set()).add(tracker.get_serverid())
83	return servermap
84
85
86	def servers_of_happiness(sharemap):
87	"""
88	I accept 'sharemap', a dict of shareid -> set(peerid) mappings. I
89	return the 'servers_of_happiness' number that sharemap results in.
90
91	To calculate the 'servers_of_happiness' number for the sharemap, I
92	construct a bipartite graph with servers in one partition of vertices
93	and shares in the other, and with an edge between a server s and a share t
94	if s is to store t. I then compute the size of a maximum matching in
95	the resulting graph; this is then returned as the 'servers_of_happiness'
96	for my arguments.
97
98	For example, consider the following layout:
99
100	server 1: shares 1, 2, 3, 4
101	server 2: share 6
102	server 3: share 3
103	server 4: share 4
104	server 5: share 2
105
106	From this, we can construct the following graph:
107
108	L = {server 1, server 2, server 3, server 4, server 5}
109	R = {share 1, share 2, share 3, share 4, share 6}
110	V = L U R
111	E = {(server 1, share 1), (server 1, share 2), (server 1, share 3),
112	(server 1, share 4), (server 2, share 6), (server 3, share 3),
113	(server 4, share 4), (server 5, share 2)}
114	G = (V, E)
115
116	Note that G is bipartite since every edge in e has one endpoint in L
117	and one endpoint in R.
118
119	A matching in a graph G is a subset M of E such that, for any vertex
120	v in V, v is incident to at most one edge of M. A maximum matching
121	in G is a matching that is no smaller than any other matching. For
122	this graph, a matching of cardinality 5 is:
123
124	M = {(server 1, share 1), (server 2, share 6),
125	(server 3, share 3), (server 4, share 4),
126	(server 5, share 2)}
127
128	Since G is bipartite, and since \|L\| = 5, we cannot have an M' such
129	that \|M'\| > \|M\|. Then M is a maximum matching in G. Intuitively, and
130	as long as k <= 5, we can see that the layout above has
131	servers_of_happiness = 5, which matches the results here.
132	"""
133	if sharemap == {}:
134	return 0
135	servermap = shares_by_server(sharemap)
136	graph = _flow_network_for(servermap)
137
138	# XXX this core stuff is identical to
139	# happiness_upload._compute_maximum_graph and we should find a way
140	# to share the code.
141
142	# This is an implementation of the Ford-Fulkerson method for finding
143	# a maximum flow in a flow network applied to a bipartite graph.
144	# Specifically, it is the Edmonds-Karp algorithm, since it uses a
145	# BFS to find the shortest augmenting path at each iteration, if one
146	# exists.
147	#
148	# The implementation here is an adapation of an algorithm described in
149	# "Introduction to Algorithms", Cormen et al, 2nd ed., pp 658-662.
150	dim = len(graph)
151	flow_function = [[0 for sh in range(dim)] for s in range(dim)]
152	residual_graph, residual_function = residual_network(graph, flow_function)
153	while augmenting_path_for(residual_graph):
154	path = augmenting_path_for(residual_graph)
155	# Delta is the largest amount that we can increase flow across
156	# all of the edges in path. Because of the way that the residual
157	# function is constructed, f[u][v] for a particular edge (u, v)
158	# is the amount of unused capacity on that edge. Taking the
159	# minimum of a list of those values for each edge in the
160	# augmenting path gives us our delta.
161	delta = min(residual_function[u][v] for (u, v) in path)
162	for (u, v) in path:
163	flow_function[u][v] += delta
164	flow_function[v][u] -= delta
165	residual_graph, residual_function = residual_network(graph,
166	flow_function)
167	num_servers = len(servermap)
168	# The value of a flow is the total flow out of the source vertex
169	# (vertex 0, in our graph). We could just as well sum across all of
170	# f[0], but we know that vertex 0 only has edges to the servers in
171	# our graph, so we can stop after summing flow across those. The
172	# value of a flow computed in this way is the size of a maximum
173	# matching on the bipartite graph described above.
174	return sum([flow_function[0][v] for v in range(1, num_servers+1)])
175
176	def _flow_network_for(servermap):
177	"""
178	I take my argument, a dict of peerid -> set(shareid) mappings, and
179	turn it into a flow network suitable for use with Edmonds-Karp. I
180	then return the adjacency list representation of that network.
181
182	Specifically, I build G = (V, E), where:
183	V = { peerid in servermap } U { shareid in servermap } U {s, t}
184	E = {(s, peerid) for each peerid}
185	U {(peerid, shareid) if peerid is to store shareid }
186	U {(shareid, t) for each shareid}
187
188	s and t will be source and sink nodes when my caller starts treating
189	the graph I return like a flow network. Without s and t, the
190	returned graph is bipartite.
191	"""
192	# Servers don't have integral identifiers, and we can't make any
193	# assumptions about the way shares are indexed -- it's possible that
194	# there are missing shares, for example. So before making a graph,
195	# we re-index so that all of our vertices have integral indices, and
196	# that there aren't any holes. We start indexing at 1, so that we
197	# can add a source node at index 0.
198	servermap, num_shares = _reindex(servermap, base_index=1)
199	num_servers = len(servermap)
200	graph = [] # index -> [index], an adjacency list
201	# Add an entry at the top (index 0) that has an edge to every server
202	# in servermap
203	graph.append(list(servermap.keys()))
204	# For each server, add an entry that has an edge to every share that it
205	# contains (or will contain).
206	for k in servermap:
207	graph.append(servermap[k])
208	# For each share, add an entry that has an edge to the sink.
209	sink_num = num_servers + num_shares + 1
210	for i in range(num_shares):
211	graph.append([sink_num])
212	# Add an empty entry for the sink, which has no outbound edges.
213	graph.append([])
214	return graph
215
216
217	# XXX warning: this is different from happiness_upload's _reindex!
218	def _reindex(servermap, base_index):
219	"""
220	Given servermap, I map peerids and shareids to integers that don't
221	conflict with each other, so they're useful as indices in a graph. I
222	return a servermap that is reindexed appropriately, and also the
223	number of distinct shares in the resulting servermap as a convenience
224	for my caller. base_index tells me where to start indexing.
225	"""
226	shares = {} # shareid -> vertex index
227	num = base_index
228	ret = {} # peerid -> [shareid], a reindexed servermap.
229	# Number the servers first
230	for k in servermap:
231	ret[num] = servermap[k]
232	num += 1
233	# Number the shares
234	for k in ret:
235	for shnum in ret[k]:
236	if shnum not in shares:
237	shares[shnum] = num
238	num += 1
239	ret[k] = [shares[x] for x in ret[k]]
240	return (ret, len(shares))

Note: See TracBrowser for help on using the repository browser.

Download in other formats: