1 | import math |
---|
2 | |
---|
3 | def compute_fec_params(k, m, n): |
---|
4 | """ |
---|
5 | Returns the FEC encoding parameters k_e and m_e, where k_e is the |
---|
6 | number of shares required to reconstruct the file, and m_e is the |
---|
7 | number of shares to be deployed to the servers. 'k' is the numer |
---|
8 | of servers that must be available to reconstruct the file, 'm' is |
---|
9 | the maximum number of servers to which shares should be deployed, |
---|
10 | and 'n' is the number of servers available. |
---|
11 | |
---|
12 | In the nominal case, n >= k, and in that case this function |
---|
13 | returns parameters that ensure each server gets at least one share |
---|
14 | (to maximize parallelism available for retrieval), that any |
---|
15 | k-server subset of n has sufficient shares to reconstruct the file |
---|
16 | and that FEC expansion is minimal, consistent with the retrieval |
---|
17 | performance and availability goals. |
---|
18 | |
---|
19 | If n < k, this function ensures that enough shares are deployed |
---|
20 | that the file can be retrieved, providing as much redundancy as |
---|
21 | possible, but without much more FEC expansion than is allowed by |
---|
22 | the user's choice of k and m. |
---|
23 | """ |
---|
24 | assert k <= m |
---|
25 | assert n <= m |
---|
26 | |
---|
27 | if n <= k: |
---|
28 | return k, min(k * n, k * int(math.ceil(float(m) / k))) |
---|
29 | |
---|
30 | if n % k == 0: |
---|
31 | return n, n * n / k |
---|
32 | else: |
---|
33 | return n, n - k + 1 + n * (n // k) |
---|
34 | |
---|
35 | def compute_p_failure(k_e, m_e, n, p): |
---|
36 | """ |
---|
37 | Compute the probability that the file is lost given that 'm_e' |
---|
38 | shares are deployed to 'n' servers, that 'k_e' shares are required |
---|
39 | for recovery and that any given server fails with probability 'p' |
---|
40 | """ |
---|
41 | # Allocate shares to servers. |
---|
42 | share_lst = [ m_e // n ] * n |
---|
43 | m_e -= m_e // n * n |
---|
44 | for i in range(0, n): |
---|
45 | if m_e == 0: |
---|
46 | break |
---|
47 | share_lst[i] += 1 |
---|
48 | m_e -= 1 |
---|
49 | |
---|
50 | # Construct per-server share survival probability mass functions |
---|
51 | server_pmfs = [] |
---|
52 | for shares in share_lst: |
---|
53 | server_pmf = [ p ] + [ 0 ] * (shares - 1) + [ 1 - p ] |
---|
54 | server_pmfs.append(server_pmf) |
---|
55 | |
---|
56 | # Convolve per-server PMFs to produce aggregate share survival PMF |
---|
57 | pmf = reduce(convolve, server_pmfs) |
---|
58 | |
---|
59 | # Probability of file survival is the probability that at least |
---|
60 | # k_e shares survive. |
---|
61 | return 1 - sum(pmf[k_e:]) |
---|
62 | |
---|
63 | def convolve(list_a, list_b): |
---|
64 | """ |
---|
65 | Returns the discrete convolution of two lists. |
---|
66 | |
---|
67 | Given two random variables X and Y, the convolution of their |
---|
68 | probability mass functions Pr(X) and Pr(Y) is equal to the |
---|
69 | Pr(X+Y). |
---|
70 | """ |
---|
71 | n = len(list_a) |
---|
72 | m = len(list_b) |
---|
73 | |
---|
74 | result = [] |
---|
75 | for i in range(n + m - 1): |
---|
76 | sum = 0.0 |
---|
77 | |
---|
78 | lower = max(0, i - n + 1) |
---|
79 | upper = min(m - 1, i) |
---|
80 | |
---|
81 | for j in range(lower, upper+1): |
---|
82 | sum += list_a[i-j] * list_b[j] |
---|
83 | |
---|
84 | result.append(sum) |
---|
85 | |
---|
86 | return result |
---|
87 | |
---|
88 | if __name__ == "__main__": |
---|
89 | |
---|
90 | for n in range(1, 11): |
---|
91 | k_e, m_e = compute_fec_params(3, 10, n) |
---|
92 | p_fail = compute_p_failure(k_e, m_e, n, 0.001) |
---|
93 | s = "%2d server(s), %2d shares, %2d needed, %2.2f expansion, %0.2e p_fail" |
---|
94 | print s % (n, m_e, k_e, float(m_e) / float(k_e), p_fail) |
---|