source: trunk/misc/coding_tools/make-canary-files.py

Last change on this file was b856238, checked in by Alexandre Detiste <alexandre.detiste@…>, at 2024-02-15T15:53:34Z

remove old Python2 future statements

  • Property mode set to 100644
File size: 5.3 KB
Line 
1#!/usr/bin/env python
2
3
4"""
5Given a list of nodeids and a 'convergence' file, create a bunch of files
6that will (when encoded at k=1,N=1) be uploaded to specific nodeids.
7
8Run this as follows:
9
10 make-canary-files.py -c PATH/TO/convergence -n PATH/TO/nodeids -k 1 -N 1
11
12It will create a directory named 'canaries', with one file per nodeid named
13'$NODEID-$NICKNAME.txt', that contains some random text.
14
15The 'nodeids' file should contain one base32 nodeid per line, followed by the
16optional nickname, like:
17
18---
195yyqu2hbvbh3rgtsgxrmmg4g77b6p3yo  server12
20vb7vm2mneyid5jbyvcbk2wb5icdhwtun  server13
21...
22---
23
24The resulting 'canaries/5yyqu2hbvbh3rgtsgxrmmg4g77b6p3yo-server12.txt' file
25will, when uploaded with the given (convergence,k,N) pair, have its first
26share placed on the 5yyq/server12 storage server. If N>1, the other shares
27will be placed elsewhere, of course.
28
29This tool can be useful to construct a set of 'canary' files, which can then
30be uploaded to storage servers, and later downloaded to test a grid's health.
31If you are able to download the canary for server12 via some tahoe node X,
32then the following properties are known to be true:
33
34 node X is running, and has established a connection to server12
35 server12 is running, and returning data for at least the given file
36
37Using k=1/N=1 creates a separate test for each server. The test process is
38then to download the whole directory of files (perhaps with a t=deep-check
39operation).
40
41Alternatively, you could upload with the usual k=3/N=10 and then move/delete
42shares to put all N shares on a single server.
43
44Note that any changes to the nodeid list will affect the placement of shares.
45Shares should be uploaded with the same nodeid list as this tool used when
46constructing the files.
47
48Also note that this tool uses the Tahoe codebase, so it should be run on a
49system where Tahoe is installed, or in a source tree with setup.py like this:
50
51 setup.py run_with_pythonpath -p -c 'misc/make-canary-files.py ARGS..'
52"""
53
54from past.builtins import cmp
55
56import os, hashlib
57from twisted.python import usage
58from allmydata.immutable import upload
59from allmydata.util import base32
60
61class Options(usage.Options):
62    optParameters = [
63        ("convergence", "c", None, "path to NODEDIR/private/convergence"),
64        ("nodeids", "n", None, "path to file with one base32 nodeid per line"),
65        ("k", "k", 1, "number of necessary shares, defaults to 1", int),
66        ("N", "N", 1, "number of total shares, defaults to 1", int),
67        ]
68    optFlags = [
69        ("verbose", "v", "Be noisy"),
70        ]
71
72opts = Options()
73opts.parseOptions()
74
75verbose = bool(opts["verbose"])
76
77nodes = {}
78for line in open(opts["nodeids"], "r").readlines():
79    line = line.strip()
80    if not line or line.startswith("#"):
81        continue
82    pieces = line.split(None, 1)
83    if len(pieces) == 2:
84        nodeid_s, nickname = pieces
85    else:
86        nodeid_s = pieces[0]
87        nickname = None
88    nodeid = base32.a2b(nodeid_s)
89    nodes[nodeid] = nickname
90
91if opts["k"] != 3 or opts["N"] != 10:
92    print("note: using non-default k/N requires patching the Tahoe code")
93    print("src/allmydata/client.py line 55, DEFAULT_ENCODING_PARAMETERS")
94
95convergence_file = os.path.expanduser(opts["convergence"])
96convergence_s = open(convergence_file, "rb").read().strip()
97convergence = base32.a2b(convergence_s)
98
99def get_permuted_peers(key):
100    results = []
101    for nodeid in nodes:
102        permuted = hashlib.sha1(key + nodeid).digest()
103        results.append((permuted, nodeid))
104    results.sort(lambda a,b: cmp(a[0], b[0]))
105    return [ r[1] for r in results ]
106
107def find_share_for_target(target):
108    target_s = base32.b2a(target)
109    prefix = "The first share of this file will be placed on " + target_s + "\n"
110    prefix += "This data is random: "
111    attempts = 0
112    while True:
113        attempts += 1
114        suffix = base32.b2a(os.urandom(10))
115        if verbose: print(" trying", suffix, end=' ')
116        data = prefix + suffix + "\n"
117        assert len(data) > 55  # no LIT files
118        # now, what storage index will this get?
119        u = upload.Data(data, convergence)
120        eu = upload.EncryptAnUploadable(u)
121        d = eu.get_storage_index() # this happens to run synchronously
122        def _got_si(si, data=data):
123            if verbose: print("SI", base32.b2a(si), end=' ')
124            peerlist = get_permuted_peers(si)
125            if peerlist[0] == target:
126                # great!
127                if verbose: print("  yay!")
128                fn = base32.b2a(target)
129                if nodes[target]:
130                    nickname = nodes[target].replace("/", "_")
131                    fn += "-" + nickname
132                fn += ".txt"
133                fn = os.path.join("canaries", fn)
134                open(fn, "w").write(data)
135                return True
136            # nope, must try again
137            if verbose: print("  boo")
138            return False
139        d.addCallback(_got_si)
140        # get sneaky and look inside the Deferred for the synchronous result
141        if d.result:
142            return attempts
143
144os.mkdir("canaries")
145attempts = []
146for target in nodes:
147    target_s = base32.b2a(target)
148    print("working on", target_s)
149    attempts.append(find_share_for_target(target))
150print("done")
151print("%d attempts total, avg %d per target, max %d" % \
152      (sum(attempts), 1.0* sum(attempts) / len(nodes), max(attempts)))
153
154
Note: See TracBrowser for help on using the repository browser.