Context Navigation

Back to Ticket #778

Ticket #778: fecparams.py

File fecparams.py, 3.0 KB (added by swillden, at 2009-08-19T16:30:03Z)
Implementation of proposed k_e, m_e selection algorithm, per comment 32

Line
1	import math
2
3	def compute_fec_params(k, m, n):
4	"""
5	Returns the FEC encoding parameters k_e and m_e, where k_e is the
6	number of shares required to reconstruct the file, and m_e is the
7	number of shares to be deployed to the servers. 'k' is the numer
8	of servers that must be available to reconstruct the file, 'm' is
9	the maximum number of servers to which shares should be deployed,
10	and 'n' is the number of servers available.
11
12	In the nominal case, n >= k, and in that case this function
13	returns parameters that ensure each server gets at least one share
14	(to maximize parallelism available for retrieval), that any
15	k-server subset of n has sufficient shares to reconstruct the file
16	and that FEC expansion is minimal, consistent with the retrieval
17	performance and availability goals.
18
19	If n < k, this function ensures that enough shares are deployed
20	that the file can be retrieved, providing as much redundancy as
21	possible, but without much more FEC expansion than is allowed by
22	the user's choice of k and m.
23	"""
24	assert k <= m
25	assert n <= m
26
27	if n <= k:
28	return k, min(k * n, k * int(math.ceil(float(m) / k)))
29
30	if n % k == 0:
31	return n, n * n / k
32	else:
33	return n, n - k + 1 + n * (n // k)
34
35	def compute_p_failure(k_e, m_e, n, p):
36	"""
37	Compute the probability that the file is lost given that 'm_e'
38	shares are deployed to 'n' servers, that 'k_e' shares are required
39	for recovery and that any given server fails with probability 'p'
40	"""
41	# Allocate shares to servers.
42	share_lst = [ m_e // n ] * n
43	m_e -= m_e // n * n
44	for i in range(0, n):
45	if m_e == 0:
46	break
47	share_lst[i] += 1
48	m_e -= 1
49
50	# Construct per-server share survival probability mass functions
51	server_pmfs = []
52	for shares in share_lst:
53	server_pmf = [ p ] + [ 0 ] * (shares - 1) + [ 1 - p ]
54	server_pmfs.append(server_pmf)
55
56	# Convolve per-server PMFs to produce aggregate share survival PMF
57	pmf = reduce(convolve, server_pmfs)
58
59	# Probability of file survival is the probability that at least
60	# k_e shares survive.
61	return 1 - sum(pmf[k_e:])
62
63	def convolve(list_a, list_b):
64	"""
65	Returns the discrete convolution of two lists.
66
67	Given two random variables X and Y, the convolution of their
68	probability mass functions Pr(X) and Pr(Y) is equal to the
69	Pr(X+Y).
70	"""
71	n = len(list_a)
72	m = len(list_b)
73
74	result = []
75	for i in range(n + m - 1):
76	sum = 0.0
77
78	lower = max(0, i - n + 1)
79	upper = min(m - 1, i)
80
81	for j in range(lower, upper+1):
82	sum += list_a[i-j] * list_b[j]
83
84	result.append(sum)
85
86	return result
87
88	if __name__ == "__main__":
89
90	for n in range(1, 11):
91	k_e, m_e = compute_fec_params(3, 10, n)
92	p_fail = compute_p_failure(k_e, m_e, n, 0.001)
93	s = "%2d server(s), %2d shares, %2d needed, %2.2f expansion, %0.2e p_fail"
94	print s % (n, m_e, k_e, float(m_e) / float(k_e), p_fail)

Download in other formats:

Original Format