Ticket #798: new-downloader-v10.diff
File new-downloader-v10.diff, 379.2 KB (added by warner, at 2010-07-26T02:26:52Z) |
---|
-
Makefile
diff --git a/Makefile b/Makefile index 3e4be60..723d656 100644
a b quicktest: 125 125 # quicktest-coverage" to do a unit test run with coverage-gathering enabled, 126 126 # then use "make coverate-output-text" for a brief report, or "make 127 127 # coverage-output" for a pretty HTML report. Also see "make .coverage.el" and 128 # misc/coding_ helpers/coverage.el for emacs integration.128 # misc/coding_tools/coverage.el for emacs integration. 129 129 130 130 quicktest-coverage: 131 131 rm -f .coverage … … quicktest-coverage: 134 134 135 135 coverage-output: 136 136 rm -rf coverage-html 137 coverage html - d coverage-html137 coverage html -i -d coverage-html $(COVERAGE_OMIT) 138 138 cp .coverage coverage-html/coverage.data 139 139 @echo "now point your browser at coverage-html/index.html" 140 140 … … coverage-output: 154 154 .PHONY: repl test-darcs-boringfile test-clean clean find-trailing-spaces 155 155 156 156 .coverage.el: .coverage 157 $(PYTHON) misc/coding_ helpers/coverage2el.py157 $(PYTHON) misc/coding_tools/coverage2el.py 158 158 159 159 # 'upload-coverage' is meant to be run with an UPLOAD_TARGET=host:/dir setting 160 160 ifdef UPLOAD_TARGET … … endif 178 178 179 179 pyflakes: 180 180 $(PYTHON) -OOu `which pyflakes` src/allmydata |sort |uniq 181 check-umids: 182 $(PYTHON) misc/coding_tools/check-umids.py `find src/allmydata -name '*.py'` 181 183 182 184 count-lines: 183 185 @echo -n "files: " -
new file misc/coding_tools/check-umids.py
diff --git a/misc/coding_tools/check-umids.py b/misc/coding_tools/check-umids.py new file mode 100755 index 0000000..05e8825
- + 1 #! /usr/bin/python 2 3 # ./rumid.py foo.py 4 5 import sys, re, os 6 7 ok = True 8 umids = {} 9 10 for fn in sys.argv[1:]: 11 fn = os.path.abspath(fn) 12 for lineno,line in enumerate(open(fn, "r").readlines()): 13 lineno = lineno+1 14 if "umid" not in line: 15 continue 16 mo = re.search("umid=[\"\']([^\"\']+)[\"\']", line) 17 if mo: 18 umid = mo.group(1) 19 if umid in umids: 20 oldfn, oldlineno = umids[umid] 21 print "%s:%d: duplicate umid '%s'" % (fn, lineno, umid) 22 print "%s:%d: first used here" % (oldfn, oldlineno) 23 ok = False 24 umids[umid] = (fn,lineno) 25 26 if ok: 27 print "all umids are unique" 28 else: 29 print "some umids were duplicates" 30 sys.exit(1) -
misc/coding_tools/coverage.el
diff --git a/misc/coding_tools/coverage.el b/misc/coding_tools/coverage.el index bad490f..8d69d5d 100644
a b 84 84 'face '(:box "red") 85 85 ) 86 86 ) 87 (message "Added annotations") 87 (message (format "Added annotations: %d uncovered lines" 88 (safe-length uncovered-code-lines))) 88 89 ) 89 90 ) 90 91 (message "unable to find coverage for this file")) -
misc/coding_tools/coverage2el.py
diff --git a/misc/coding_tools/coverage2el.py b/misc/coding_tools/coverage2el.py index ed94bd0..7d03a27 100644
a b 1 1 2 from coverage import coverage, summary 2 from coverage import coverage, summary, misc 3 3 4 4 class ElispReporter(summary.SummaryReporter): 5 5 def report(self): … … class ElispReporter(summary.SummaryReporter): 21 21 out.write("(let ((results (make-hash-table :test 'equal)))\n") 22 22 for cu in self.code_units: 23 23 f = cu.filename 24 (fn, executable, missing, mf) = self.coverage.analysis(cu) 24 try: 25 (fn, executable, missing, mf) = self.coverage.analysis(cu) 26 except misc.NoSource: 27 continue 25 28 code_linenumbers = executable 26 29 uncovered_code = missing 27 30 covered_linenumbers = sorted(set(executable) - set(missing)) -
misc/simulators/sizes.py
diff --git a/misc/simulators/sizes.py b/misc/simulators/sizes.py index d9c230a..7910946 100644
a b class Sizes: 60 60 self.block_arity = 0 61 61 self.block_tree_depth = 0 62 62 self.block_overhead = 0 63 self.bytes_until_some_data = 20+ share_size63 self.bytes_until_some_data = 32 + share_size 64 64 self.share_storage_overhead = 0 65 65 self.share_transmission_overhead = 0 66 66 67 67 elif mode == "beta": 68 68 # k=num_blocks, d=1 69 # each block has a 20-byte hash69 # each block has a 32-byte hash 70 70 self.block_arity = num_blocks 71 71 self.block_tree_depth = 1 72 self.block_overhead = 2072 self.block_overhead = 32 73 73 # the share has a list of hashes, one for each block 74 74 self.share_storage_overhead = (self.block_overhead * 75 75 num_blocks) 76 76 # we can get away with not sending the hash of the share that 77 77 # we're sending in full, once 78 self.share_transmission_overhead = self.share_storage_overhead - 2078 self.share_transmission_overhead = self.share_storage_overhead - 32 79 79 # we must get the whole list (so it can be validated) before 80 80 # any data can be validated 81 81 self.bytes_until_some_data = (self.share_transmission_overhead + … … class Sizes: 89 89 # to make things easier, we make the pessimistic assumption that 90 90 # we have to store hashes for all the empty places in the tree 91 91 # (when the number of shares is not an exact exponent of k) 92 self.block_overhead = 2092 self.block_overhead = 32 93 93 # the block hashes are organized into a k-ary tree, which 94 94 # means storing (and eventually transmitting) more hashes. This 95 95 # count includes all the low-level share hashes and the root. … … class Sizes: 98 98 #print "num_leaves", num_leaves 99 99 #print "hash_nodes", hash_nodes 100 100 # the storage overhead is this 101 self.share_storage_overhead = 20* (hash_nodes - 1)101 self.share_storage_overhead = 32 * (hash_nodes - 1) 102 102 # the transmission overhead is smaller: if we actually transmit 103 103 # every block, we don't have to transmit 1/k of the 104 104 # lowest-level block hashes, and we don't have to transmit the 105 105 # root because it was already sent with the share-level hash tree 106 self.share_transmission_overhead = 20* (hash_nodes106 self.share_transmission_overhead = 32 * (hash_nodes 107 107 - 1 # the root 108 108 - num_leaves / k) 109 109 # we must get a full sibling hash chain before we can validate 110 110 # any data 111 111 sibling_length = d * (k-1) 112 self.bytes_until_some_data = 20* sibling_length + block_size112 self.bytes_until_some_data = 32 * sibling_length + block_size 113 113 114 114 115 115 -
misc/simulators/storage-overhead.py
diff --git a/misc/simulators/storage-overhead.py b/misc/simulators/storage-overhead.py index 75a0bf6..a294b8d 100644
a b 1 1 #!/usr/bin/env python 2 2 3 3 import sys, math 4 from allmydata import upload, uri, encode, storage 4 from allmydata import uri, storage 5 from allmydata.immutable import upload 6 from allmydata.interfaces import DEFAULT_MAX_SEGMENT_SIZE 5 7 from allmydata.util import mathutil 6 8 7 9 def roundup(size, blocksize=4096): … … class BigFakeString: 22 24 def tell(self): 23 25 return self.fp 24 26 25 def calc(filesize, params=(3,7,10), segsize= encode.Encoder.MAX_SEGMENT_SIZE):27 def calc(filesize, params=(3,7,10), segsize=DEFAULT_MAX_SEGMENT_SIZE): 26 28 num_shares = params[2] 27 29 if filesize <= upload.Uploader.URI_LIT_SIZE_THRESHOLD: 28 urisize = len(uri. pack_lit("A"*filesize))30 urisize = len(uri.LiteralFileURI("A"*filesize).to_string()) 29 31 sharesize = 0 30 32 sharespace = 0 31 33 else: 32 u = upload.FileUploader(None) 34 u = upload.FileUploader(None) # XXX changed 33 35 u.set_params(params) 34 36 # unfortunately, Encoder doesn't currently lend itself to answering 35 37 # this question without measuring a filesize, so we have to give it a -
src/allmydata/client.py
diff --git a/src/allmydata/client.py b/src/allmydata/client.py index 12e7473..c914ec4 100644
a b 1 import os, stat, time 1 import os, stat, time, weakref 2 2 from allmydata.interfaces import RIStorageServer 3 3 from allmydata import node 4 4 5 5 from zope.interface import implements 6 6 from twisted.internet import reactor, defer 7 from twisted.application import service 7 8 from twisted.application.internet import TimerService 8 9 from foolscap.api import Referenceable 9 10 from pycryptopp.publickey import rsa … … import allmydata 12 13 from allmydata.storage.server import StorageServer 13 14 from allmydata import storage_client 14 15 from allmydata.immutable.upload import Uploader 15 from allmydata.immutable.download import Downloader16 16 from allmydata.immutable.offloaded import Helper 17 17 from allmydata.control import ControlServer 18 18 from allmydata.introducer.client import IntroducerClient 19 from allmydata.util import hashutil, base32, pollmixin, cachedir,log19 from allmydata.util import hashutil, base32, pollmixin, log 20 20 from allmydata.util.abbreviate import parse_abbreviated_size 21 21 from allmydata.util.time_format import parse_duration, parse_date 22 22 from allmydata.stats import StatsProvider … … class KeyGenerator: 94 94 verifier = signer.get_verifying_key() 95 95 return defer.succeed( (verifier, signer) ) 96 96 97 class Terminator(service.Service): 98 def __init__(self): 99 self._clients = weakref.WeakKeyDictionary() 100 def register(self, c): 101 self._clients[c] = None 102 def stopService(self): 103 for c in self._clients: 104 c.stop() 105 return service.Service.stopService(self) 106 97 107 98 108 class Client(node.Node, pollmixin.PollMixin): 99 109 implements(IStatsProducer) … … class Client(node.Node, pollmixin.PollMixin): 278 288 279 289 self.init_client_storage_broker() 280 290 self.history = History(self.stats_provider) 291 self.terminator = Terminator() 292 self.terminator.setServiceParent(self) 281 293 self.add_service(Uploader(helper_furl, self.stats_provider)) 282 download_cachedir = os.path.join(self.basedir,283 "private", "cache", "download")284 self.download_cache_dirman = cachedir.CacheDirectoryManager(download_cachedir)285 self.download_cache_dirman.setServiceParent(self)286 self.downloader = Downloader(self.storage_broker, self.stats_provider)287 294 self.init_stub_client() 288 295 self.init_nodemaker() 289 296 … … class Client(node.Node, pollmixin.PollMixin): 342 349 self._secret_holder, 343 350 self.get_history(), 344 351 self.getServiceNamed("uploader"), 345 self.downloader, 346 self.download_cache_dirman, 352 self.terminator, 347 353 self.get_encoding_parameters(), 348 354 self._key_generator) 349 355 -
src/allmydata/immutable/checker.py
diff --git a/src/allmydata/immutable/checker.py b/src/allmydata/immutable/checker.py index 2f2d8f1..cd5c556 100644
a b 1 from zope.interface import implements 2 from twisted.internet import defer 1 3 from foolscap.api import DeadReferenceError, RemoteException 4 from allmydata import hashtree, codec, uri 5 from allmydata.interfaces import IValidatedThingProxy, IVerifierURI 2 6 from allmydata.hashtree import IncompleteHashTree 3 7 from allmydata.check_results import CheckResults 4 from allmydata.immutable import download5 8 from allmydata.uri import CHKFileVerifierURI 6 9 from allmydata.util.assertutil import precondition 7 from allmydata.util import base32, idlib, deferredutil, dictutil, log 10 from allmydata.util import base32, idlib, deferredutil, dictutil, log, mathutil 8 11 from allmydata.util.hashutil import file_renewal_secret_hash, \ 9 12 file_cancel_secret_hash, bucket_renewal_secret_hash, \ 10 bucket_cancel_secret_hash 13 bucket_cancel_secret_hash, uri_extension_hash, CRYPTO_VAL_SIZE, \ 14 block_hash 11 15 12 16 from allmydata.immutable import layout 13 17 18 class IntegrityCheckReject(Exception): 19 pass 20 class BadURIExtension(IntegrityCheckReject): 21 pass 22 class BadURIExtensionHashValue(IntegrityCheckReject): 23 pass 24 class BadOrMissingHash(IntegrityCheckReject): 25 pass 26 class UnsupportedErasureCodec(BadURIExtension): 27 pass 28 29 class ValidatedExtendedURIProxy: 30 implements(IValidatedThingProxy) 31 """ I am a front-end for a remote UEB (using a local ReadBucketProxy), 32 responsible for retrieving and validating the elements from the UEB.""" 33 34 def __init__(self, readbucketproxy, verifycap, fetch_failures=None): 35 # fetch_failures is for debugging -- see test_encode.py 36 self._fetch_failures = fetch_failures 37 self._readbucketproxy = readbucketproxy 38 precondition(IVerifierURI.providedBy(verifycap), verifycap) 39 self._verifycap = verifycap 40 41 # required 42 self.segment_size = None 43 self.crypttext_root_hash = None 44 self.share_root_hash = None 45 46 # computed 47 self.block_size = None 48 self.share_size = None 49 self.num_segments = None 50 self.tail_data_size = None 51 self.tail_segment_size = None 52 53 # optional 54 self.crypttext_hash = None 55 56 def __str__(self): 57 return "<%s %s>" % (self.__class__.__name__, self._verifycap.to_string()) 58 59 def _check_integrity(self, data): 60 h = uri_extension_hash(data) 61 if h != self._verifycap.uri_extension_hash: 62 msg = ("The copy of uri_extension we received from %s was bad: wanted %s, got %s" % 63 (self._readbucketproxy, 64 base32.b2a(self._verifycap.uri_extension_hash), 65 base32.b2a(h))) 66 if self._fetch_failures is not None: 67 self._fetch_failures["uri_extension"] += 1 68 raise BadURIExtensionHashValue(msg) 69 else: 70 return data 71 72 def _parse_and_validate(self, data): 73 self.share_size = mathutil.div_ceil(self._verifycap.size, 74 self._verifycap.needed_shares) 75 76 d = uri.unpack_extension(data) 77 78 # There are several kinds of things that can be found in a UEB. 79 # First, things that we really need to learn from the UEB in order to 80 # do this download. Next: things which are optional but not redundant 81 # -- if they are present in the UEB they will get used. Next, things 82 # that are optional and redundant. These things are required to be 83 # consistent: they don't have to be in the UEB, but if they are in 84 # the UEB then they will be checked for consistency with the 85 # already-known facts, and if they are inconsistent then an exception 86 # will be raised. These things aren't actually used -- they are just 87 # tested for consistency and ignored. Finally: things which are 88 # deprecated -- they ought not be in the UEB at all, and if they are 89 # present then a warning will be logged but they are otherwise 90 # ignored. 91 92 # First, things that we really need to learn from the UEB: 93 # segment_size, crypttext_root_hash, and share_root_hash. 94 self.segment_size = d['segment_size'] 95 96 self.block_size = mathutil.div_ceil(self.segment_size, 97 self._verifycap.needed_shares) 98 self.num_segments = mathutil.div_ceil(self._verifycap.size, 99 self.segment_size) 100 101 self.tail_data_size = self._verifycap.size % self.segment_size 102 if not self.tail_data_size: 103 self.tail_data_size = self.segment_size 104 # padding for erasure code 105 self.tail_segment_size = mathutil.next_multiple(self.tail_data_size, 106 self._verifycap.needed_shares) 107 108 # Ciphertext hash tree root is mandatory, so that there is at most 109 # one ciphertext that matches this read-cap or verify-cap. The 110 # integrity check on the shares is not sufficient to prevent the 111 # original encoder from creating some shares of file A and other 112 # shares of file B. 113 self.crypttext_root_hash = d['crypttext_root_hash'] 114 115 self.share_root_hash = d['share_root_hash'] 116 117 118 # Next: things that are optional and not redundant: crypttext_hash 119 if d.has_key('crypttext_hash'): 120 self.crypttext_hash = d['crypttext_hash'] 121 if len(self.crypttext_hash) != CRYPTO_VAL_SIZE: 122 raise BadURIExtension('crypttext_hash is required to be hashutil.CRYPTO_VAL_SIZE bytes, not %s bytes' % (len(self.crypttext_hash),)) 123 124 125 # Next: things that are optional, redundant, and required to be 126 # consistent: codec_name, codec_params, tail_codec_params, 127 # num_segments, size, needed_shares, total_shares 128 if d.has_key('codec_name'): 129 if d['codec_name'] != "crs": 130 raise UnsupportedErasureCodec(d['codec_name']) 131 132 if d.has_key('codec_params'): 133 ucpss, ucpns, ucpts = codec.parse_params(d['codec_params']) 134 if ucpss != self.segment_size: 135 raise BadURIExtension("inconsistent erasure code params: " 136 "ucpss: %s != self.segment_size: %s" % 137 (ucpss, self.segment_size)) 138 if ucpns != self._verifycap.needed_shares: 139 raise BadURIExtension("inconsistent erasure code params: ucpns: %s != " 140 "self._verifycap.needed_shares: %s" % 141 (ucpns, self._verifycap.needed_shares)) 142 if ucpts != self._verifycap.total_shares: 143 raise BadURIExtension("inconsistent erasure code params: ucpts: %s != " 144 "self._verifycap.total_shares: %s" % 145 (ucpts, self._verifycap.total_shares)) 146 147 if d.has_key('tail_codec_params'): 148 utcpss, utcpns, utcpts = codec.parse_params(d['tail_codec_params']) 149 if utcpss != self.tail_segment_size: 150 raise BadURIExtension("inconsistent erasure code params: utcpss: %s != " 151 "self.tail_segment_size: %s, self._verifycap.size: %s, " 152 "self.segment_size: %s, self._verifycap.needed_shares: %s" 153 % (utcpss, self.tail_segment_size, self._verifycap.size, 154 self.segment_size, self._verifycap.needed_shares)) 155 if utcpns != self._verifycap.needed_shares: 156 raise BadURIExtension("inconsistent erasure code params: utcpns: %s != " 157 "self._verifycap.needed_shares: %s" % (utcpns, 158 self._verifycap.needed_shares)) 159 if utcpts != self._verifycap.total_shares: 160 raise BadURIExtension("inconsistent erasure code params: utcpts: %s != " 161 "self._verifycap.total_shares: %s" % (utcpts, 162 self._verifycap.total_shares)) 163 164 if d.has_key('num_segments'): 165 if d['num_segments'] != self.num_segments: 166 raise BadURIExtension("inconsistent num_segments: size: %s, " 167 "segment_size: %s, computed_num_segments: %s, " 168 "ueb_num_segments: %s" % (self._verifycap.size, 169 self.segment_size, 170 self.num_segments, d['num_segments'])) 171 172 if d.has_key('size'): 173 if d['size'] != self._verifycap.size: 174 raise BadURIExtension("inconsistent size: URI size: %s, UEB size: %s" % 175 (self._verifycap.size, d['size'])) 176 177 if d.has_key('needed_shares'): 178 if d['needed_shares'] != self._verifycap.needed_shares: 179 raise BadURIExtension("inconsistent needed shares: URI needed shares: %s, UEB " 180 "needed shares: %s" % (self._verifycap.total_shares, 181 d['needed_shares'])) 182 183 if d.has_key('total_shares'): 184 if d['total_shares'] != self._verifycap.total_shares: 185 raise BadURIExtension("inconsistent total shares: URI total shares: %s, UEB " 186 "total shares: %s" % (self._verifycap.total_shares, 187 d['total_shares'])) 188 189 # Finally, things that are deprecated and ignored: plaintext_hash, 190 # plaintext_root_hash 191 if d.get('plaintext_hash'): 192 log.msg("Found plaintext_hash in UEB. This field is deprecated for security reasons " 193 "and is no longer used. Ignoring. %s" % (self,)) 194 if d.get('plaintext_root_hash'): 195 log.msg("Found plaintext_root_hash in UEB. This field is deprecated for security " 196 "reasons and is no longer used. Ignoring. %s" % (self,)) 197 198 return self 199 200 def start(self): 201 """Fetch the UEB from bucket, compare its hash to the hash from 202 verifycap, then parse it. Returns a deferred which is called back 203 with self once the fetch is successful, or is erred back if it 204 fails.""" 205 d = self._readbucketproxy.get_uri_extension() 206 d.addCallback(self._check_integrity) 207 d.addCallback(self._parse_and_validate) 208 return d 209 210 class ValidatedReadBucketProxy(log.PrefixingLogMixin): 211 """I am a front-end for a remote storage bucket, responsible for 212 retrieving and validating data from that bucket. 213 214 My get_block() method is used by BlockDownloaders. 215 """ 216 217 def __init__(self, sharenum, bucket, share_hash_tree, num_blocks, 218 block_size, share_size): 219 """ share_hash_tree is required to have already been initialized with 220 the root hash (the number-0 hash), using the share_root_hash from the 221 UEB""" 222 precondition(share_hash_tree[0] is not None, share_hash_tree) 223 prefix = "%d-%s-%s" % (sharenum, bucket, 224 base32.b2a_l(share_hash_tree[0][:8], 60)) 225 log.PrefixingLogMixin.__init__(self, 226 facility="tahoe.immutable.download", 227 prefix=prefix) 228 self.sharenum = sharenum 229 self.bucket = bucket 230 self.share_hash_tree = share_hash_tree 231 self.num_blocks = num_blocks 232 self.block_size = block_size 233 self.share_size = share_size 234 self.block_hash_tree = hashtree.IncompleteHashTree(self.num_blocks) 235 236 def get_all_sharehashes(self): 237 """Retrieve and validate all the share-hash-tree nodes that are 238 included in this share, regardless of whether we need them to 239 validate the share or not. Each share contains a minimal Merkle tree 240 chain, but there is lots of overlap, so usually we'll be using hashes 241 from other shares and not reading every single hash from this share. 242 The Verifier uses this function to read and validate every single 243 hash from this share. 244 245 Call this (and wait for the Deferred it returns to fire) before 246 calling get_block() for the first time: this lets us check that the 247 share share contains enough hashes to validate its own data, and 248 avoids downloading any share hash twice. 249 250 I return a Deferred which errbacks upon failure, probably with 251 BadOrMissingHash.""" 252 253 d = self.bucket.get_share_hashes() 254 def _got_share_hashes(sh): 255 sharehashes = dict(sh) 256 try: 257 self.share_hash_tree.set_hashes(sharehashes) 258 except IndexError, le: 259 raise BadOrMissingHash(le) 260 except (hashtree.BadHashError, hashtree.NotEnoughHashesError), le: 261 raise BadOrMissingHash(le) 262 d.addCallback(_got_share_hashes) 263 return d 264 265 def get_all_blockhashes(self): 266 """Retrieve and validate all the block-hash-tree nodes that are 267 included in this share. Each share contains a full Merkle tree, but 268 we usually only fetch the minimal subset necessary for any particular 269 block. This function fetches everything at once. The Verifier uses 270 this function to validate the block hash tree. 271 272 Call this (and wait for the Deferred it returns to fire) after 273 calling get_all_sharehashes() and before calling get_block() for the 274 first time: this lets us check that the share contains all block 275 hashes and avoids downloading them multiple times. 276 277 I return a Deferred which errbacks upon failure, probably with 278 BadOrMissingHash. 279 """ 280 281 # get_block_hashes(anything) currently always returns everything 282 needed = list(range(len(self.block_hash_tree))) 283 d = self.bucket.get_block_hashes(needed) 284 def _got_block_hashes(blockhashes): 285 if len(blockhashes) < len(self.block_hash_tree): 286 raise BadOrMissingHash() 287 bh = dict(enumerate(blockhashes)) 288 289 try: 290 self.block_hash_tree.set_hashes(bh) 291 except IndexError, le: 292 raise BadOrMissingHash(le) 293 except (hashtree.BadHashError, hashtree.NotEnoughHashesError), le: 294 raise BadOrMissingHash(le) 295 d.addCallback(_got_block_hashes) 296 return d 297 298 def get_all_crypttext_hashes(self, crypttext_hash_tree): 299 """Retrieve and validate all the crypttext-hash-tree nodes that are 300 in this share. Normally we don't look at these at all: the download 301 process fetches them incrementally as needed to validate each segment 302 of ciphertext. But this is a convenient place to give the Verifier a 303 function to validate all of these at once. 304 305 Call this with a new hashtree object for each share, initialized with 306 the crypttext hash tree root. I return a Deferred which errbacks upon 307 failure, probably with BadOrMissingHash. 308 """ 309 310 # get_crypttext_hashes() always returns everything 311 d = self.bucket.get_crypttext_hashes() 312 def _got_crypttext_hashes(hashes): 313 if len(hashes) < len(crypttext_hash_tree): 314 raise BadOrMissingHash() 315 ct_hashes = dict(enumerate(hashes)) 316 try: 317 crypttext_hash_tree.set_hashes(ct_hashes) 318 except IndexError, le: 319 raise BadOrMissingHash(le) 320 except (hashtree.BadHashError, hashtree.NotEnoughHashesError), le: 321 raise BadOrMissingHash(le) 322 d.addCallback(_got_crypttext_hashes) 323 return d 324 325 def get_block(self, blocknum): 326 # the first time we use this bucket, we need to fetch enough elements 327 # of the share hash tree to validate it from our share hash up to the 328 # hashroot. 329 if self.share_hash_tree.needed_hashes(self.sharenum): 330 d1 = self.bucket.get_share_hashes() 331 else: 332 d1 = defer.succeed([]) 333 334 # We might need to grab some elements of our block hash tree, to 335 # validate the requested block up to the share hash. 336 blockhashesneeded = self.block_hash_tree.needed_hashes(blocknum, include_leaf=True) 337 # We don't need the root of the block hash tree, as that comes in the 338 # share tree. 339 blockhashesneeded.discard(0) 340 d2 = self.bucket.get_block_hashes(blockhashesneeded) 341 342 if blocknum < self.num_blocks-1: 343 thisblocksize = self.block_size 344 else: 345 thisblocksize = self.share_size % self.block_size 346 if thisblocksize == 0: 347 thisblocksize = self.block_size 348 d3 = self.bucket.get_block_data(blocknum, 349 self.block_size, thisblocksize) 350 351 dl = deferredutil.gatherResults([d1, d2, d3]) 352 dl.addCallback(self._got_data, blocknum) 353 return dl 354 355 def _got_data(self, results, blocknum): 356 precondition(blocknum < self.num_blocks, 357 self, blocknum, self.num_blocks) 358 sharehashes, blockhashes, blockdata = results 359 try: 360 sharehashes = dict(sharehashes) 361 except ValueError, le: 362 le.args = tuple(le.args + (sharehashes,)) 363 raise 364 blockhashes = dict(enumerate(blockhashes)) 365 366 candidate_share_hash = None # in case we log it in the except block below 367 blockhash = None # in case we log it in the except block below 368 369 try: 370 if self.share_hash_tree.needed_hashes(self.sharenum): 371 # This will raise exception if the values being passed do not 372 # match the root node of self.share_hash_tree. 373 try: 374 self.share_hash_tree.set_hashes(sharehashes) 375 except IndexError, le: 376 # Weird -- sharehashes contained index numbers outside of 377 # the range that fit into this hash tree. 378 raise BadOrMissingHash(le) 379 380 # To validate a block we need the root of the block hash tree, 381 # which is also one of the leafs of the share hash tree, and is 382 # called "the share hash". 383 if not self.block_hash_tree[0]: # empty -- no root node yet 384 # Get the share hash from the share hash tree. 385 share_hash = self.share_hash_tree.get_leaf(self.sharenum) 386 if not share_hash: 387 # No root node in block_hash_tree and also the share hash 388 # wasn't sent by the server. 389 raise hashtree.NotEnoughHashesError 390 self.block_hash_tree.set_hashes({0: share_hash}) 391 392 if self.block_hash_tree.needed_hashes(blocknum): 393 self.block_hash_tree.set_hashes(blockhashes) 394 395 blockhash = block_hash(blockdata) 396 self.block_hash_tree.set_hashes(leaves={blocknum: blockhash}) 397 #self.log("checking block_hash(shareid=%d, blocknum=%d) len=%d " 398 # "%r .. %r: %s" % 399 # (self.sharenum, blocknum, len(blockdata), 400 # blockdata[:50], blockdata[-50:], base32.b2a(blockhash))) 401 402 except (hashtree.BadHashError, hashtree.NotEnoughHashesError), le: 403 # log.WEIRD: indicates undetected disk/network error, or more 404 # likely a programming error 405 self.log("hash failure in block=%d, shnum=%d on %s" % 406 (blocknum, self.sharenum, self.bucket)) 407 if self.block_hash_tree.needed_hashes(blocknum): 408 self.log(""" failure occurred when checking the block_hash_tree. 409 This suggests that either the block data was bad, or that the 410 block hashes we received along with it were bad.""") 411 else: 412 self.log(""" the failure probably occurred when checking the 413 share_hash_tree, which suggests that the share hashes we 414 received from the remote peer were bad.""") 415 self.log(" have candidate_share_hash: %s" % bool(candidate_share_hash)) 416 self.log(" block length: %d" % len(blockdata)) 417 self.log(" block hash: %s" % base32.b2a_or_none(blockhash)) 418 if len(blockdata) < 100: 419 self.log(" block data: %r" % (blockdata,)) 420 else: 421 self.log(" block data start/end: %r .. %r" % 422 (blockdata[:50], blockdata[-50:])) 423 self.log(" share hash tree:\n" + self.share_hash_tree.dump()) 424 self.log(" block hash tree:\n" + self.block_hash_tree.dump()) 425 lines = [] 426 for i,h in sorted(sharehashes.items()): 427 lines.append("%3d: %s" % (i, base32.b2a_or_none(h))) 428 self.log(" sharehashes:\n" + "\n".join(lines) + "\n") 429 lines = [] 430 for i,h in blockhashes.items(): 431 lines.append("%3d: %s" % (i, base32.b2a_or_none(h))) 432 log.msg(" blockhashes:\n" + "\n".join(lines) + "\n") 433 raise BadOrMissingHash(le) 434 435 # If we made it here, the block is good. If the hash trees didn't 436 # like what they saw, they would have raised a BadHashError, causing 437 # our caller to see a Failure and thus ignore this block (as well as 438 # dropping this bucket). 439 return blockdata 440 441 14 442 class Checker(log.PrefixingLogMixin): 15 443 """I query all servers to see if M uniquely-numbered shares are 16 444 available. … … class Checker(log.PrefixingLogMixin): 85 513 level = log.WEIRD 86 514 if f.check(DeadReferenceError): 87 515 level = log.UNUSUAL 88 self.log("failure from server on 'get_buckets' the REMOTE failure was:", facility="tahoe.immutable.checker", failure=f, level=level, umid="3uuBUQ") 516 self.log("failure from server on 'get_buckets' the REMOTE failure was:", 517 facility="tahoe.immutable.checker", 518 failure=f, level=level, umid="AX7wZQ") 89 519 return ({}, serverid, False) 90 520 91 521 d.addCallbacks(_wrap_results, _trap_errs) … … class Checker(log.PrefixingLogMixin): 146 576 147 577 vcap = self._verifycap 148 578 b = layout.ReadBucketProxy(bucket, serverid, vcap.get_storage_index()) 149 veup = download.ValidatedExtendedURIProxy(b, vcap)579 veup = ValidatedExtendedURIProxy(b, vcap) 150 580 d = veup.start() 151 581 152 582 def _got_ueb(vup): 153 583 share_hash_tree = IncompleteHashTree(vcap.total_shares) 154 584 share_hash_tree.set_hashes({0: vup.share_root_hash}) 155 585 156 vrbp = download.ValidatedReadBucketProxy(sharenum, b,157 158 159 160 586 vrbp = ValidatedReadBucketProxy(sharenum, b, 587 share_hash_tree, 588 vup.num_segments, 589 vup.block_size, 590 vup.share_size) 161 591 162 592 # note: normal download doesn't use get_all_sharehashes(), 163 593 # because it gets more data than necessary. We've discussed the … … class Checker(log.PrefixingLogMixin): 216 646 return (False, sharenum, 'incompatible') 217 647 elif f.check(layout.LayoutInvalid, 218 648 layout.RidiculouslyLargeURIExtensionBlock, 219 download.BadOrMissingHash,220 download.BadURIExtensionHashValue):649 BadOrMissingHash, 650 BadURIExtensionHashValue): 221 651 return (False, sharenum, 'corrupt') 222 652 223 653 # if it wasn't one of those reasons, re-raise the error -
deleted file src/allmydata/immutable/download.py
diff --git a/src/allmydata/immutable/download.py b/src/allmydata/immutable/download.py deleted file mode 100644 index eb02c6a..0000000
+ - 1 import random, weakref, itertools, time2 from zope.interface import implements3 from twisted.internet import defer, reactor4 from twisted.internet.interfaces import IPushProducer, IConsumer5 from foolscap.api import DeadReferenceError, RemoteException, eventually6 7 from allmydata.util import base32, deferredutil, hashutil, log, mathutil, idlib8 from allmydata.util.assertutil import _assert, precondition9 from allmydata import codec, hashtree, uri10 from allmydata.interfaces import IDownloadTarget, IDownloader, IVerifierURI, \11 IDownloadStatus, IDownloadResults, IValidatedThingProxy, \12 IStorageBroker, NotEnoughSharesError, NoSharesError, NoServersError, \13 UnableToFetchCriticalDownloadDataError14 from allmydata.immutable import layout15 from allmydata.monitor import Monitor16 from pycryptopp.cipher.aes import AES17 18 class IntegrityCheckReject(Exception):19 pass20 21 class BadURIExtensionHashValue(IntegrityCheckReject):22 pass23 class BadURIExtension(IntegrityCheckReject):24 pass25 class UnsupportedErasureCodec(BadURIExtension):26 pass27 class BadCrypttextHashValue(IntegrityCheckReject):28 pass29 class BadOrMissingHash(IntegrityCheckReject):30 pass31 32 class DownloadStopped(Exception):33 pass34 35 class DownloadResults:36 implements(IDownloadResults)37 38 def __init__(self):39 self.servers_used = set()40 self.server_problems = {}41 self.servermap = {}42 self.timings = {}43 self.file_size = None44 45 class DecryptingTarget(log.PrefixingLogMixin):46 implements(IDownloadTarget, IConsumer)47 def __init__(self, target, key, _log_msg_id=None):48 precondition(IDownloadTarget.providedBy(target), target)49 self.target = target50 self._decryptor = AES(key)51 prefix = str(target)52 log.PrefixingLogMixin.__init__(self, "allmydata.immutable.download", _log_msg_id, prefix=prefix)53 # methods to satisfy the IConsumer interface54 def registerProducer(self, producer, streaming):55 if IConsumer.providedBy(self.target):56 self.target.registerProducer(producer, streaming)57 def unregisterProducer(self):58 if IConsumer.providedBy(self.target):59 self.target.unregisterProducer()60 def write(self, ciphertext):61 plaintext = self._decryptor.process(ciphertext)62 self.target.write(plaintext)63 def open(self, size):64 self.target.open(size)65 def close(self):66 self.target.close()67 def finish(self):68 return self.target.finish()69 # The following methods is just to pass through to the next target, and70 # just because that target might be a repairer.DownUpConnector, and just71 # because the current CHKUpload object expects to find the storage index72 # in its Uploadable.73 def set_storageindex(self, storageindex):74 self.target.set_storageindex(storageindex)75 def set_encodingparams(self, encodingparams):76 self.target.set_encodingparams(encodingparams)77 78 class ValidatedThingObtainer:79 def __init__(self, validatedthingproxies, debugname, log_id):80 self._validatedthingproxies = validatedthingproxies81 self._debugname = debugname82 self._log_id = log_id83 84 def _bad(self, f, validatedthingproxy):85 f.trap(RemoteException, DeadReferenceError,86 IntegrityCheckReject, layout.LayoutInvalid,87 layout.ShareVersionIncompatible)88 level = log.WEIRD89 if f.check(DeadReferenceError):90 level = log.UNUSUAL91 elif f.check(RemoteException):92 level = log.WEIRD93 else:94 level = log.SCARY95 log.msg(parent=self._log_id, facility="tahoe.immutable.download",96 format="operation %(op)s from validatedthingproxy %(validatedthingproxy)s failed",97 op=self._debugname, validatedthingproxy=str(validatedthingproxy),98 failure=f, level=level, umid="JGXxBA")99 if not self._validatedthingproxies:100 raise UnableToFetchCriticalDownloadDataError("ran out of peers, last error was %s" % (f,))101 # try again with a different one102 d = self._try_the_next_one()103 return d104 105 def _try_the_next_one(self):106 vtp = self._validatedthingproxies.pop(0)107 # start() obtains, validates, and callsback-with the thing or else108 # errbacks109 d = vtp.start()110 d.addErrback(self._bad, vtp)111 return d112 113 def start(self):114 return self._try_the_next_one()115 116 class ValidatedCrypttextHashTreeProxy:117 implements(IValidatedThingProxy)118 """ I am a front-end for a remote crypttext hash tree using a local119 ReadBucketProxy -- I use its get_crypttext_hashes() method and offer the120 Validated Thing protocol (i.e., I have a start() method that fires with121 self once I get a valid one)."""122 def __init__(self, readbucketproxy, crypttext_hash_tree, num_segments,123 fetch_failures=None):124 # fetch_failures is for debugging -- see test_encode.py125 self._readbucketproxy = readbucketproxy126 self._num_segments = num_segments127 self._fetch_failures = fetch_failures128 self._crypttext_hash_tree = crypttext_hash_tree129 130 def _validate(self, proposal):131 ct_hashes = dict(list(enumerate(proposal)))132 try:133 self._crypttext_hash_tree.set_hashes(ct_hashes)134 except (hashtree.BadHashError, hashtree.NotEnoughHashesError), le:135 if self._fetch_failures is not None:136 self._fetch_failures["crypttext_hash_tree"] += 1137 raise BadOrMissingHash(le)138 # If we now have enough of the crypttext hash tree to integrity-check139 # *any* segment of ciphertext, then we are done. TODO: It would have140 # better alacrity if we downloaded only part of the crypttext hash141 # tree at a time.142 for segnum in range(self._num_segments):143 if self._crypttext_hash_tree.needed_hashes(segnum):144 raise BadOrMissingHash("not enough hashes to validate segment number %d" % (segnum,))145 return self146 147 def start(self):148 d = self._readbucketproxy.get_crypttext_hashes()149 d.addCallback(self._validate)150 return d151 152 class ValidatedExtendedURIProxy:153 implements(IValidatedThingProxy)154 """ I am a front-end for a remote UEB (using a local ReadBucketProxy),155 responsible for retrieving and validating the elements from the UEB."""156 157 def __init__(self, readbucketproxy, verifycap, fetch_failures=None):158 # fetch_failures is for debugging -- see test_encode.py159 self._fetch_failures = fetch_failures160 self._readbucketproxy = readbucketproxy161 precondition(IVerifierURI.providedBy(verifycap), verifycap)162 self._verifycap = verifycap163 164 # required165 self.segment_size = None166 self.crypttext_root_hash = None167 self.share_root_hash = None168 169 # computed170 self.block_size = None171 self.share_size = None172 self.num_segments = None173 self.tail_data_size = None174 self.tail_segment_size = None175 176 # optional177 self.crypttext_hash = None178 179 def __str__(self):180 return "<%s %s>" % (self.__class__.__name__, self._verifycap.to_string())181 182 def _check_integrity(self, data):183 h = hashutil.uri_extension_hash(data)184 if h != self._verifycap.uri_extension_hash:185 msg = ("The copy of uri_extension we received from %s was bad: wanted %s, got %s" %186 (self._readbucketproxy,187 base32.b2a(self._verifycap.uri_extension_hash),188 base32.b2a(h)))189 if self._fetch_failures is not None:190 self._fetch_failures["uri_extension"] += 1191 raise BadURIExtensionHashValue(msg)192 else:193 return data194 195 def _parse_and_validate(self, data):196 self.share_size = mathutil.div_ceil(self._verifycap.size,197 self._verifycap.needed_shares)198 199 d = uri.unpack_extension(data)200 201 # There are several kinds of things that can be found in a UEB.202 # First, things that we really need to learn from the UEB in order to203 # do this download. Next: things which are optional but not redundant204 # -- if they are present in the UEB they will get used. Next, things205 # that are optional and redundant. These things are required to be206 # consistent: they don't have to be in the UEB, but if they are in207 # the UEB then they will be checked for consistency with the208 # already-known facts, and if they are inconsistent then an exception209 # will be raised. These things aren't actually used -- they are just210 # tested for consistency and ignored. Finally: things which are211 # deprecated -- they ought not be in the UEB at all, and if they are212 # present then a warning will be logged but they are otherwise213 # ignored.214 215 # First, things that we really need to learn from the UEB:216 # segment_size, crypttext_root_hash, and share_root_hash.217 self.segment_size = d['segment_size']218 219 self.block_size = mathutil.div_ceil(self.segment_size,220 self._verifycap.needed_shares)221 self.num_segments = mathutil.div_ceil(self._verifycap.size,222 self.segment_size)223 224 self.tail_data_size = self._verifycap.size % self.segment_size225 if not self.tail_data_size:226 self.tail_data_size = self.segment_size227 # padding for erasure code228 self.tail_segment_size = mathutil.next_multiple(self.tail_data_size,229 self._verifycap.needed_shares)230 231 # Ciphertext hash tree root is mandatory, so that there is at most232 # one ciphertext that matches this read-cap or verify-cap. The233 # integrity check on the shares is not sufficient to prevent the234 # original encoder from creating some shares of file A and other235 # shares of file B.236 self.crypttext_root_hash = d['crypttext_root_hash']237 238 self.share_root_hash = d['share_root_hash']239 240 241 # Next: things that are optional and not redundant: crypttext_hash242 if d.has_key('crypttext_hash'):243 self.crypttext_hash = d['crypttext_hash']244 if len(self.crypttext_hash) != hashutil.CRYPTO_VAL_SIZE:245 raise BadURIExtension('crypttext_hash is required to be hashutil.CRYPTO_VAL_SIZE bytes, not %s bytes' % (len(self.crypttext_hash),))246 247 248 # Next: things that are optional, redundant, and required to be249 # consistent: codec_name, codec_params, tail_codec_params,250 # num_segments, size, needed_shares, total_shares251 if d.has_key('codec_name'):252 if d['codec_name'] != "crs":253 raise UnsupportedErasureCodec(d['codec_name'])254 255 if d.has_key('codec_params'):256 ucpss, ucpns, ucpts = codec.parse_params(d['codec_params'])257 if ucpss != self.segment_size:258 raise BadURIExtension("inconsistent erasure code params: "259 "ucpss: %s != self.segment_size: %s" %260 (ucpss, self.segment_size))261 if ucpns != self._verifycap.needed_shares:262 raise BadURIExtension("inconsistent erasure code params: ucpns: %s != "263 "self._verifycap.needed_shares: %s" %264 (ucpns, self._verifycap.needed_shares))265 if ucpts != self._verifycap.total_shares:266 raise BadURIExtension("inconsistent erasure code params: ucpts: %s != "267 "self._verifycap.total_shares: %s" %268 (ucpts, self._verifycap.total_shares))269 270 if d.has_key('tail_codec_params'):271 utcpss, utcpns, utcpts = codec.parse_params(d['tail_codec_params'])272 if utcpss != self.tail_segment_size:273 raise BadURIExtension("inconsistent erasure code params: utcpss: %s != "274 "self.tail_segment_size: %s, self._verifycap.size: %s, "275 "self.segment_size: %s, self._verifycap.needed_shares: %s"276 % (utcpss, self.tail_segment_size, self._verifycap.size,277 self.segment_size, self._verifycap.needed_shares))278 if utcpns != self._verifycap.needed_shares:279 raise BadURIExtension("inconsistent erasure code params: utcpns: %s != "280 "self._verifycap.needed_shares: %s" % (utcpns,281 self._verifycap.needed_shares))282 if utcpts != self._verifycap.total_shares:283 raise BadURIExtension("inconsistent erasure code params: utcpts: %s != "284 "self._verifycap.total_shares: %s" % (utcpts,285 self._verifycap.total_shares))286 287 if d.has_key('num_segments'):288 if d['num_segments'] != self.num_segments:289 raise BadURIExtension("inconsistent num_segments: size: %s, "290 "segment_size: %s, computed_num_segments: %s, "291 "ueb_num_segments: %s" % (self._verifycap.size,292 self.segment_size,293 self.num_segments, d['num_segments']))294 295 if d.has_key('size'):296 if d['size'] != self._verifycap.size:297 raise BadURIExtension("inconsistent size: URI size: %s, UEB size: %s" %298 (self._verifycap.size, d['size']))299 300 if d.has_key('needed_shares'):301 if d['needed_shares'] != self._verifycap.needed_shares:302 raise BadURIExtension("inconsistent needed shares: URI needed shares: %s, UEB "303 "needed shares: %s" % (self._verifycap.total_shares,304 d['needed_shares']))305 306 if d.has_key('total_shares'):307 if d['total_shares'] != self._verifycap.total_shares:308 raise BadURIExtension("inconsistent total shares: URI total shares: %s, UEB "309 "total shares: %s" % (self._verifycap.total_shares,310 d['total_shares']))311 312 # Finally, things that are deprecated and ignored: plaintext_hash,313 # plaintext_root_hash314 if d.get('plaintext_hash'):315 log.msg("Found plaintext_hash in UEB. This field is deprecated for security reasons "316 "and is no longer used. Ignoring. %s" % (self,))317 if d.get('plaintext_root_hash'):318 log.msg("Found plaintext_root_hash in UEB. This field is deprecated for security "319 "reasons and is no longer used. Ignoring. %s" % (self,))320 321 return self322 323 def start(self):324 """Fetch the UEB from bucket, compare its hash to the hash from325 verifycap, then parse it. Returns a deferred which is called back326 with self once the fetch is successful, or is erred back if it327 fails."""328 d = self._readbucketproxy.get_uri_extension()329 d.addCallback(self._check_integrity)330 d.addCallback(self._parse_and_validate)331 return d332 333 class ValidatedReadBucketProxy(log.PrefixingLogMixin):334 """I am a front-end for a remote storage bucket, responsible for335 retrieving and validating data from that bucket.336 337 My get_block() method is used by BlockDownloaders.338 """339 340 def __init__(self, sharenum, bucket, share_hash_tree, num_blocks,341 block_size, share_size):342 """ share_hash_tree is required to have already been initialized with343 the root hash (the number-0 hash), using the share_root_hash from the344 UEB"""345 precondition(share_hash_tree[0] is not None, share_hash_tree)346 prefix = "%d-%s-%s" % (sharenum, bucket,347 base32.b2a_l(share_hash_tree[0][:8], 60))348 log.PrefixingLogMixin.__init__(self,349 facility="tahoe.immutable.download",350 prefix=prefix)351 self.sharenum = sharenum352 self.bucket = bucket353 self.share_hash_tree = share_hash_tree354 self.num_blocks = num_blocks355 self.block_size = block_size356 self.share_size = share_size357 self.block_hash_tree = hashtree.IncompleteHashTree(self.num_blocks)358 359 def get_all_sharehashes(self):360 """Retrieve and validate all the share-hash-tree nodes that are361 included in this share, regardless of whether we need them to362 validate the share or not. Each share contains a minimal Merkle tree363 chain, but there is lots of overlap, so usually we'll be using hashes364 from other shares and not reading every single hash from this share.365 The Verifier uses this function to read and validate every single366 hash from this share.367 368 Call this (and wait for the Deferred it returns to fire) before369 calling get_block() for the first time: this lets us check that the370 share share contains enough hashes to validate its own data, and371 avoids downloading any share hash twice.372 373 I return a Deferred which errbacks upon failure, probably with374 BadOrMissingHash."""375 376 d = self.bucket.get_share_hashes()377 def _got_share_hashes(sh):378 sharehashes = dict(sh)379 try:380 self.share_hash_tree.set_hashes(sharehashes)381 except IndexError, le:382 raise BadOrMissingHash(le)383 except (hashtree.BadHashError, hashtree.NotEnoughHashesError), le:384 raise BadOrMissingHash(le)385 d.addCallback(_got_share_hashes)386 return d387 388 def get_all_blockhashes(self):389 """Retrieve and validate all the block-hash-tree nodes that are390 included in this share. Each share contains a full Merkle tree, but391 we usually only fetch the minimal subset necessary for any particular392 block. This function fetches everything at once. The Verifier uses393 this function to validate the block hash tree.394 395 Call this (and wait for the Deferred it returns to fire) after396 calling get_all_sharehashes() and before calling get_block() for the397 first time: this lets us check that the share contains all block398 hashes and avoids downloading them multiple times.399 400 I return a Deferred which errbacks upon failure, probably with401 BadOrMissingHash.402 """403 404 # get_block_hashes(anything) currently always returns everything405 needed = list(range(len(self.block_hash_tree)))406 d = self.bucket.get_block_hashes(needed)407 def _got_block_hashes(blockhashes):408 if len(blockhashes) < len(self.block_hash_tree):409 raise BadOrMissingHash()410 bh = dict(enumerate(blockhashes))411 412 try:413 self.block_hash_tree.set_hashes(bh)414 except IndexError, le:415 raise BadOrMissingHash(le)416 except (hashtree.BadHashError, hashtree.NotEnoughHashesError), le:417 raise BadOrMissingHash(le)418 d.addCallback(_got_block_hashes)419 return d420 421 def get_all_crypttext_hashes(self, crypttext_hash_tree):422 """Retrieve and validate all the crypttext-hash-tree nodes that are423 in this share. Normally we don't look at these at all: the download424 process fetches them incrementally as needed to validate each segment425 of ciphertext. But this is a convenient place to give the Verifier a426 function to validate all of these at once.427 428 Call this with a new hashtree object for each share, initialized with429 the crypttext hash tree root. I return a Deferred which errbacks upon430 failure, probably with BadOrMissingHash.431 """432 433 # get_crypttext_hashes() always returns everything434 d = self.bucket.get_crypttext_hashes()435 def _got_crypttext_hashes(hashes):436 if len(hashes) < len(crypttext_hash_tree):437 raise BadOrMissingHash()438 ct_hashes = dict(enumerate(hashes))439 try:440 crypttext_hash_tree.set_hashes(ct_hashes)441 except IndexError, le:442 raise BadOrMissingHash(le)443 except (hashtree.BadHashError, hashtree.NotEnoughHashesError), le:444 raise BadOrMissingHash(le)445 d.addCallback(_got_crypttext_hashes)446 return d447 448 def get_block(self, blocknum):449 # the first time we use this bucket, we need to fetch enough elements450 # of the share hash tree to validate it from our share hash up to the451 # hashroot.452 if self.share_hash_tree.needed_hashes(self.sharenum):453 d1 = self.bucket.get_share_hashes()454 else:455 d1 = defer.succeed([])456 457 # We might need to grab some elements of our block hash tree, to458 # validate the requested block up to the share hash.459 blockhashesneeded = self.block_hash_tree.needed_hashes(blocknum, include_leaf=True)460 # We don't need the root of the block hash tree, as that comes in the461 # share tree.462 blockhashesneeded.discard(0)463 d2 = self.bucket.get_block_hashes(blockhashesneeded)464 465 if blocknum < self.num_blocks-1:466 thisblocksize = self.block_size467 else:468 thisblocksize = self.share_size % self.block_size469 if thisblocksize == 0:470 thisblocksize = self.block_size471 d3 = self.bucket.get_block_data(blocknum,472 self.block_size, thisblocksize)473 474 dl = deferredutil.gatherResults([d1, d2, d3])475 dl.addCallback(self._got_data, blocknum)476 return dl477 478 def _got_data(self, results, blocknum):479 precondition(blocknum < self.num_blocks,480 self, blocknum, self.num_blocks)481 sharehashes, blockhashes, blockdata = results482 try:483 sharehashes = dict(sharehashes)484 except ValueError, le:485 le.args = tuple(le.args + (sharehashes,))486 raise487 blockhashes = dict(enumerate(blockhashes))488 489 candidate_share_hash = None # in case we log it in the except block below490 blockhash = None # in case we log it in the except block below491 492 try:493 if self.share_hash_tree.needed_hashes(self.sharenum):494 # This will raise exception if the values being passed do not495 # match the root node of self.share_hash_tree.496 try:497 self.share_hash_tree.set_hashes(sharehashes)498 except IndexError, le:499 # Weird -- sharehashes contained index numbers outside of500 # the range that fit into this hash tree.501 raise BadOrMissingHash(le)502 503 # To validate a block we need the root of the block hash tree,504 # which is also one of the leafs of the share hash tree, and is505 # called "the share hash".506 if not self.block_hash_tree[0]: # empty -- no root node yet507 # Get the share hash from the share hash tree.508 share_hash = self.share_hash_tree.get_leaf(self.sharenum)509 if not share_hash:510 # No root node in block_hash_tree and also the share hash511 # wasn't sent by the server.512 raise hashtree.NotEnoughHashesError513 self.block_hash_tree.set_hashes({0: share_hash})514 515 if self.block_hash_tree.needed_hashes(blocknum):516 self.block_hash_tree.set_hashes(blockhashes)517 518 blockhash = hashutil.block_hash(blockdata)519 self.block_hash_tree.set_hashes(leaves={blocknum: blockhash})520 #self.log("checking block_hash(shareid=%d, blocknum=%d) len=%d "521 # "%r .. %r: %s" %522 # (self.sharenum, blocknum, len(blockdata),523 # blockdata[:50], blockdata[-50:], base32.b2a(blockhash)))524 525 except (hashtree.BadHashError, hashtree.NotEnoughHashesError), le:526 # log.WEIRD: indicates undetected disk/network error, or more527 # likely a programming error528 self.log("hash failure in block=%d, shnum=%d on %s" %529 (blocknum, self.sharenum, self.bucket))530 if self.block_hash_tree.needed_hashes(blocknum):531 self.log(""" failure occurred when checking the block_hash_tree.532 This suggests that either the block data was bad, or that the533 block hashes we received along with it were bad.""")534 else:535 self.log(""" the failure probably occurred when checking the536 share_hash_tree, which suggests that the share hashes we537 received from the remote peer were bad.""")538 self.log(" have candidate_share_hash: %s" % bool(candidate_share_hash))539 self.log(" block length: %d" % len(blockdata))540 self.log(" block hash: %s" % base32.b2a_or_none(blockhash))541 if len(blockdata) < 100:542 self.log(" block data: %r" % (blockdata,))543 else:544 self.log(" block data start/end: %r .. %r" %545 (blockdata[:50], blockdata[-50:]))546 self.log(" share hash tree:\n" + self.share_hash_tree.dump())547 self.log(" block hash tree:\n" + self.block_hash_tree.dump())548 lines = []549 for i,h in sorted(sharehashes.items()):550 lines.append("%3d: %s" % (i, base32.b2a_or_none(h)))551 self.log(" sharehashes:\n" + "\n".join(lines) + "\n")552 lines = []553 for i,h in blockhashes.items():554 lines.append("%3d: %s" % (i, base32.b2a_or_none(h)))555 log.msg(" blockhashes:\n" + "\n".join(lines) + "\n")556 raise BadOrMissingHash(le)557 558 # If we made it here, the block is good. If the hash trees didn't559 # like what they saw, they would have raised a BadHashError, causing560 # our caller to see a Failure and thus ignore this block (as well as561 # dropping this bucket).562 return blockdata563 564 565 566 class BlockDownloader(log.PrefixingLogMixin):567 """I am responsible for downloading a single block (from a single bucket)568 for a single segment.569 570 I am a child of the SegmentDownloader.571 """572 573 def __init__(self, vbucket, blocknum, parent, results):574 precondition(isinstance(vbucket, ValidatedReadBucketProxy), vbucket)575 prefix = "%s-%d" % (vbucket, blocknum)576 log.PrefixingLogMixin.__init__(self, facility="tahoe.immutable.download", prefix=prefix)577 self.vbucket = vbucket578 self.blocknum = blocknum579 self.parent = parent580 self.results = results581 582 def start(self, segnum):583 self.log("get_block(segnum=%d)" % segnum)584 started = time.time()585 d = self.vbucket.get_block(segnum)586 d.addCallbacks(self._hold_block, self._got_block_error,587 callbackArgs=(started,))588 return d589 590 def _hold_block(self, data, started):591 if self.results:592 elapsed = time.time() - started593 peerid = self.vbucket.bucket.get_peerid()594 if peerid not in self.results.timings["fetch_per_server"]:595 self.results.timings["fetch_per_server"][peerid] = []596 self.results.timings["fetch_per_server"][peerid].append(elapsed)597 self.log("got block")598 self.parent.hold_block(self.blocknum, data)599 600 def _got_block_error(self, f):601 f.trap(RemoteException, DeadReferenceError,602 IntegrityCheckReject, layout.LayoutInvalid,603 layout.ShareVersionIncompatible)604 if f.check(RemoteException, DeadReferenceError):605 level = log.UNUSUAL606 else:607 level = log.WEIRD608 self.log("failure to get block", level=level, umid="5Z4uHQ")609 if self.results:610 peerid = self.vbucket.bucket.get_peerid()611 self.results.server_problems[peerid] = str(f)612 self.parent.bucket_failed(self.vbucket)613 614 class SegmentDownloader:615 """I am responsible for downloading all the blocks for a single segment616 of data.617 618 I am a child of the CiphertextDownloader.619 """620 621 def __init__(self, parent, segmentnumber, needed_shares, results):622 self.parent = parent623 self.segmentnumber = segmentnumber624 self.needed_blocks = needed_shares625 self.blocks = {} # k: blocknum, v: data626 self.results = results627 self._log_number = self.parent.log("starting segment %d" %628 segmentnumber)629 630 def log(self, *args, **kwargs):631 if "parent" not in kwargs:632 kwargs["parent"] = self._log_number633 return self.parent.log(*args, **kwargs)634 635 def start(self):636 return self._download()637 638 def _download(self):639 d = self._try()640 def _done(res):641 if len(self.blocks) >= self.needed_blocks:642 # we only need self.needed_blocks blocks643 # we want to get the smallest blockids, because they are644 # more likely to be fast "primary blocks"645 blockids = sorted(self.blocks.keys())[:self.needed_blocks]646 blocks = []647 for blocknum in blockids:648 blocks.append(self.blocks[blocknum])649 return (blocks, blockids)650 else:651 return self._download()652 d.addCallback(_done)653 return d654 655 def _try(self):656 # fill our set of active buckets, maybe raising NotEnoughSharesError657 active_buckets = self.parent._activate_enough_buckets()658 # Now we have enough buckets, in self.parent.active_buckets.659 660 # in test cases, bd.start might mutate active_buckets right away, so661 # we need to put off calling start() until we've iterated all the way662 # through it.663 downloaders = []664 for blocknum, vbucket in active_buckets.iteritems():665 assert isinstance(vbucket, ValidatedReadBucketProxy), vbucket666 bd = BlockDownloader(vbucket, blocknum, self, self.results)667 downloaders.append(bd)668 if self.results:669 self.results.servers_used.add(vbucket.bucket.get_peerid())670 l = [bd.start(self.segmentnumber) for bd in downloaders]671 return defer.DeferredList(l, fireOnOneErrback=True)672 673 def hold_block(self, blocknum, data):674 self.blocks[blocknum] = data675 676 def bucket_failed(self, vbucket):677 self.parent.bucket_failed(vbucket)678 679 class DownloadStatus:680 implements(IDownloadStatus)681 statusid_counter = itertools.count(0)682 683 def __init__(self):684 self.storage_index = None685 self.size = None686 self.helper = False687 self.status = "Not started"688 self.progress = 0.0689 self.paused = False690 self.stopped = False691 self.active = True692 self.results = None693 self.counter = self.statusid_counter.next()694 self.started = time.time()695 696 def get_started(self):697 return self.started698 def get_storage_index(self):699 return self.storage_index700 def get_size(self):701 return self.size702 def using_helper(self):703 return self.helper704 def get_status(self):705 status = self.status706 if self.paused:707 status += " (output paused)"708 if self.stopped:709 status += " (output stopped)"710 return status711 def get_progress(self):712 return self.progress713 def get_active(self):714 return self.active715 def get_results(self):716 return self.results717 def get_counter(self):718 return self.counter719 720 def set_storage_index(self, si):721 self.storage_index = si722 def set_size(self, size):723 self.size = size724 def set_helper(self, helper):725 self.helper = helper726 def set_status(self, status):727 self.status = status728 def set_paused(self, paused):729 self.paused = paused730 def set_stopped(self, stopped):731 self.stopped = stopped732 def set_progress(self, value):733 self.progress = value734 def set_active(self, value):735 self.active = value736 def set_results(self, value):737 self.results = value738 739 class CiphertextDownloader(log.PrefixingLogMixin):740 """ I download shares, check their integrity, then decode them, check the741 integrity of the resulting ciphertext, then and write it to my target.742 Before I send any new request to a server, I always ask the 'monitor'743 object that was passed into my constructor whether this task has been744 cancelled (by invoking its raise_if_cancelled() method)."""745 implements(IPushProducer)746 _status = None747 748 def __init__(self, storage_broker, v, target, monitor):749 750 precondition(IStorageBroker.providedBy(storage_broker), storage_broker)751 precondition(IVerifierURI.providedBy(v), v)752 precondition(IDownloadTarget.providedBy(target), target)753 754 self._storage_broker = storage_broker755 self._verifycap = v756 self._storage_index = v.get_storage_index()757 self._uri_extension_hash = v.uri_extension_hash758 759 prefix=base32.b2a_l(self._storage_index[:8], 60)760 log.PrefixingLogMixin.__init__(self, facility="tahoe.immutable.download", prefix=prefix)761 762 self._started = time.time()763 self._status = s = DownloadStatus()764 s.set_status("Starting")765 s.set_storage_index(self._storage_index)766 s.set_size(self._verifycap.size)767 s.set_helper(False)768 s.set_active(True)769 770 self._results = DownloadResults()771 s.set_results(self._results)772 self._results.file_size = self._verifycap.size773 self._results.timings["servers_peer_selection"] = {}774 self._results.timings["fetch_per_server"] = {}775 self._results.timings["cumulative_fetch"] = 0.0776 self._results.timings["cumulative_decode"] = 0.0777 self._results.timings["cumulative_decrypt"] = 0.0778 self._results.timings["paused"] = 0.0779 780 self._paused = False781 self._stopped = False782 if IConsumer.providedBy(target):783 target.registerProducer(self, True)784 self._target = target785 # Repairer (uploader) needs the storageindex.786 self._target.set_storageindex(self._storage_index)787 self._monitor = monitor788 self._opened = False789 790 self.active_buckets = {} # k: shnum, v: bucket791 self._share_buckets = {} # k: sharenum, v: list of buckets792 793 # _download_all_segments() will set this to:794 # self._share_vbuckets = {} # k: shnum, v: set of ValidatedBuckets795 self._share_vbuckets = None796 797 self._fetch_failures = {"uri_extension": 0, "crypttext_hash_tree": 0, }798 799 self._ciphertext_hasher = hashutil.crypttext_hasher()800 801 self._bytes_done = 0802 self._status.set_progress(float(self._bytes_done)/self._verifycap.size)803 804 # _got_uri_extension() will create the following:805 # self._crypttext_hash_tree806 # self._share_hash_tree807 # self._current_segnum = 0808 # self._vup # ValidatedExtendedURIProxy809 810 # _get_all_shareholders() will create the following:811 # self._total_queries812 # self._responses_received = 0813 # self._queries_failed = 0814 815 # This is solely for the use of unit tests. It will be triggered when816 # we start downloading shares.817 self._stage_4_d = defer.Deferred()818 819 def pauseProducing(self):820 if self._paused:821 return822 self._paused = defer.Deferred()823 self._paused_at = time.time()824 if self._status:825 self._status.set_paused(True)826 827 def resumeProducing(self):828 if self._paused:829 paused_for = time.time() - self._paused_at830 self._results.timings['paused'] += paused_for831 p = self._paused832 self._paused = None833 eventually(p.callback, None)834 if self._status:835 self._status.set_paused(False)836 837 def stopProducing(self):838 self.log("Download.stopProducing")839 self._stopped = True840 self.resumeProducing()841 if self._status:842 self._status.set_stopped(True)843 self._status.set_active(False)844 845 def start(self):846 self.log("starting download")847 848 # first step: who should we download from?849 d = defer.maybeDeferred(self._get_all_shareholders)850 d.addBoth(self._got_all_shareholders)851 # now get the uri_extension block from somebody and integrity check852 # it and parse and validate its contents853 d.addCallback(self._obtain_uri_extension)854 d.addCallback(self._get_crypttext_hash_tree)855 # once we know that, we can download blocks from everybody856 d.addCallback(self._download_all_segments)857 def _finished(res):858 if self._status:859 self._status.set_status("Finished")860 self._status.set_active(False)861 self._status.set_paused(False)862 if IConsumer.providedBy(self._target):863 self._target.unregisterProducer()864 return res865 d.addBoth(_finished)866 def _failed(why):867 if self._status:868 self._status.set_status("Failed")869 self._status.set_active(False)870 if why.check(DownloadStopped):871 # DownloadStopped just means the consumer aborted the872 # download; not so scary.873 self.log("download stopped", level=log.UNUSUAL)874 else:875 # This is really unusual, and deserves maximum forensics.876 self.log("download failed!", failure=why, level=log.SCARY,877 umid="lp1vaQ")878 return why879 d.addErrback(_failed)880 d.addCallback(self._done)881 return d882 883 def _get_all_shareholders(self):884 """ Once the number of buckets that I know about is >= K then I885 callback the Deferred that I return.886 887 If all of the get_buckets deferreds have fired (whether callback888 or errback) and I still don't have enough buckets then I'll also889 callback -- not errback -- the Deferred that I return.890 """891 wait_for_enough_buckets_d = defer.Deferred()892 self._wait_for_enough_buckets_d = wait_for_enough_buckets_d893 894 sb = self._storage_broker895 servers = sb.get_servers_for_index(self._storage_index)896 if not servers:897 raise NoServersError("broker gave us no servers!")898 899 self._total_queries = len(servers)900 self._responses_received = 0901 self._queries_failed = 0902 for (peerid,ss) in servers:903 self.log(format="sending DYHB to [%(peerid)s]",904 peerid=idlib.shortnodeid_b2a(peerid),905 level=log.NOISY, umid="rT03hg")906 d = ss.callRemote("get_buckets", self._storage_index)907 d.addCallbacks(self._got_response, self._got_error,908 callbackArgs=(peerid,))909 d.addBoth(self._check_got_all_responses)910 911 if self._status:912 self._status.set_status("Locating Shares (%d/%d)" %913 (self._responses_received,914 self._total_queries))915 return wait_for_enough_buckets_d916 917 def _check_got_all_responses(self, ignored=None):918 assert (self._responses_received+self._queries_failed) <= self._total_queries919 if self._wait_for_enough_buckets_d and (self._responses_received+self._queries_failed) == self._total_queries:920 reactor.callLater(0, self._wait_for_enough_buckets_d.callback, False)921 self._wait_for_enough_buckets_d = None922 923 def _got_response(self, buckets, peerid):924 # Note that this can continue to receive responses after _wait_for_enough_buckets_d925 # has fired.926 self._responses_received += 1927 self.log(format="got results from [%(peerid)s]: shnums %(shnums)s",928 peerid=idlib.shortnodeid_b2a(peerid),929 shnums=sorted(buckets.keys()),930 level=log.NOISY, umid="o4uwFg")931 if self._results:932 elapsed = time.time() - self._started933 self._results.timings["servers_peer_selection"][peerid] = elapsed934 if self._status:935 self._status.set_status("Locating Shares (%d/%d)" %936 (self._responses_received,937 self._total_queries))938 for sharenum, bucket in buckets.iteritems():939 b = layout.ReadBucketProxy(bucket, peerid, self._storage_index)940 self.add_share_bucket(sharenum, b)941 # If we just got enough buckets for the first time, then fire the942 # deferred. Then remove it from self so that we don't fire it943 # again.944 if self._wait_for_enough_buckets_d and len(self._share_buckets) >= self._verifycap.needed_shares:945 reactor.callLater(0, self._wait_for_enough_buckets_d.callback, True)946 self._wait_for_enough_buckets_d = None947 948 if self._share_vbuckets is not None:949 vbucket = ValidatedReadBucketProxy(sharenum, b, self._share_hash_tree, self._vup.num_segments, self._vup.block_size, self._vup.share_size)950 self._share_vbuckets.setdefault(sharenum, set()).add(vbucket)951 952 if self._results:953 if peerid not in self._results.servermap:954 self._results.servermap[peerid] = set()955 self._results.servermap[peerid].add(sharenum)956 957 def add_share_bucket(self, sharenum, bucket):958 # this is split out for the benefit of test_encode.py959 self._share_buckets.setdefault(sharenum, []).append(bucket)960 961 def _got_error(self, f):962 self._queries_failed += 1963 level = log.WEIRD964 if f.check(DeadReferenceError):965 level = log.UNUSUAL966 self.log("Error during get_buckets", failure=f, level=level,967 umid="3uuBUQ")968 969 def bucket_failed(self, vbucket):970 shnum = vbucket.sharenum971 del self.active_buckets[shnum]972 s = self._share_vbuckets[shnum]973 # s is a set of ValidatedReadBucketProxy instances974 s.remove(vbucket)975 # ... which might now be empty976 if not s:977 # there are no more buckets which can provide this share, so978 # remove the key. This may prompt us to use a different share.979 del self._share_vbuckets[shnum]980 981 def _got_all_shareholders(self, res):982 if self._results:983 now = time.time()984 self._results.timings["peer_selection"] = now - self._started985 986 if len(self._share_buckets) < self._verifycap.needed_shares:987 msg = "Failed to get enough shareholders: have %d, need %d" \988 % (len(self._share_buckets), self._verifycap.needed_shares)989 if self._share_buckets:990 raise NotEnoughSharesError(msg)991 else:992 raise NoSharesError(msg)993 994 #for s in self._share_vbuckets.values():995 # for vb in s:996 # assert isinstance(vb, ValidatedReadBucketProxy), \997 # "vb is %s but should be a ValidatedReadBucketProxy" % (vb,)998 999 def _obtain_uri_extension(self, ignored):1000 # all shareholders are supposed to have a copy of uri_extension, and1001 # all are supposed to be identical. We compute the hash of the data1002 # that comes back, and compare it against the version in our URI. If1003 # they don't match, ignore their data and try someone else.1004 if self._status:1005 self._status.set_status("Obtaining URI Extension")1006 1007 uri_extension_fetch_started = time.time()1008 1009 vups = []1010 for sharenum, buckets in self._share_buckets.iteritems():1011 for bucket in buckets:1012 vups.append(ValidatedExtendedURIProxy(bucket, self._verifycap, self._fetch_failures))1013 vto = ValidatedThingObtainer(vups, debugname="vups", log_id=self._parentmsgid)1014 d = vto.start()1015 1016 def _got_uri_extension(vup):1017 precondition(isinstance(vup, ValidatedExtendedURIProxy), vup)1018 if self._results:1019 elapsed = time.time() - uri_extension_fetch_started1020 self._results.timings["uri_extension"] = elapsed1021 1022 self._vup = vup1023 self._codec = codec.CRSDecoder()1024 self._codec.set_params(self._vup.segment_size, self._verifycap.needed_shares, self._verifycap.total_shares)1025 self._tail_codec = codec.CRSDecoder()1026 self._tail_codec.set_params(self._vup.tail_segment_size, self._verifycap.needed_shares, self._verifycap.total_shares)1027 1028 self._current_segnum = 01029 1030 self._share_hash_tree = hashtree.IncompleteHashTree(self._verifycap.total_shares)1031 self._share_hash_tree.set_hashes({0: vup.share_root_hash})1032 1033 self._crypttext_hash_tree = hashtree.IncompleteHashTree(self._vup.num_segments)1034 self._crypttext_hash_tree.set_hashes({0: self._vup.crypttext_root_hash})1035 1036 # Repairer (uploader) needs the encodingparams.1037 self._target.set_encodingparams((1038 self._verifycap.needed_shares,1039 0, # see ticket #778 for why this is1040 self._verifycap.total_shares,1041 self._vup.segment_size1042 ))1043 d.addCallback(_got_uri_extension)1044 return d1045 1046 def _get_crypttext_hash_tree(self, res):1047 vchtps = []1048 for sharenum, buckets in self._share_buckets.iteritems():1049 for bucket in buckets:1050 vchtp = ValidatedCrypttextHashTreeProxy(bucket, self._crypttext_hash_tree, self._vup.num_segments, self._fetch_failures)1051 vchtps.append(vchtp)1052 1053 _get_crypttext_hash_tree_started = time.time()1054 if self._status:1055 self._status.set_status("Retrieving crypttext hash tree")1056 1057 vto = ValidatedThingObtainer(vchtps, debugname="vchtps",1058 log_id=self._parentmsgid)1059 d = vto.start()1060 1061 def _got_crypttext_hash_tree(res):1062 # Good -- the self._crypttext_hash_tree that we passed to vchtp1063 # is now populated with hashes.1064 if self._results:1065 elapsed = time.time() - _get_crypttext_hash_tree_started1066 self._results.timings["hashtrees"] = elapsed1067 d.addCallback(_got_crypttext_hash_tree)1068 return d1069 1070 def _activate_enough_buckets(self):1071 """either return a mapping from shnum to a ValidatedReadBucketProxy1072 that can provide data for that share, or raise NotEnoughSharesError"""1073 1074 while len(self.active_buckets) < self._verifycap.needed_shares:1075 # need some more1076 handled_shnums = set(self.active_buckets.keys())1077 available_shnums = set(self._share_vbuckets.keys())1078 potential_shnums = list(available_shnums - handled_shnums)1079 if len(potential_shnums) < (self._verifycap.needed_shares1080 - len(self.active_buckets)):1081 have = len(potential_shnums) + len(self.active_buckets)1082 msg = "Unable to activate enough shares: have %d, need %d" \1083 % (have, self._verifycap.needed_shares)1084 if have:1085 raise NotEnoughSharesError(msg)1086 else:1087 raise NoSharesError(msg)1088 # For the next share, choose a primary share if available, else a1089 # randomly chosen secondary share.1090 potential_shnums.sort()1091 if potential_shnums[0] < self._verifycap.needed_shares:1092 shnum = potential_shnums[0]1093 else:1094 shnum = random.choice(potential_shnums)1095 # and a random bucket that will provide it1096 validated_bucket = random.choice(list(self._share_vbuckets[shnum]))1097 self.active_buckets[shnum] = validated_bucket1098 return self.active_buckets1099 1100 1101 def _download_all_segments(self, res):1102 # From now on if new buckets are received then I will notice that1103 # self._share_vbuckets is not None and generate a vbucket for that new1104 # bucket and add it in to _share_vbuckets. (We had to wait because we1105 # didn't have self._vup and self._share_hash_tree earlier. We didn't1106 # need validated buckets until now -- now that we are ready to download1107 # shares.)1108 self._share_vbuckets = {}1109 for sharenum, buckets in self._share_buckets.iteritems():1110 for bucket in buckets:1111 vbucket = ValidatedReadBucketProxy(sharenum, bucket, self._share_hash_tree, self._vup.num_segments, self._vup.block_size, self._vup.share_size)1112 self._share_vbuckets.setdefault(sharenum, set()).add(vbucket)1113 1114 # after the above code, self._share_vbuckets contains enough1115 # buckets to complete the download, and some extra ones to1116 # tolerate some buckets dropping out or having1117 # errors. self._share_vbuckets is a dictionary that maps from1118 # shnum to a set of ValidatedBuckets, which themselves are1119 # wrappers around RIBucketReader references.1120 self.active_buckets = {} # k: shnum, v: ValidatedReadBucketProxy instance1121 1122 self._started_fetching = time.time()1123 1124 d = defer.succeed(None)1125 for segnum in range(self._vup.num_segments):1126 d.addCallback(self._download_segment, segnum)1127 # this pause, at the end of write, prevents pre-fetch from1128 # happening until the consumer is ready for more data.1129 d.addCallback(self._check_for_pause)1130 1131 self._stage_4_d.callback(None)1132 return d1133 1134 def _check_for_pause(self, res):1135 if self._paused:1136 d = defer.Deferred()1137 self._paused.addCallback(lambda ignored: d.callback(res))1138 return d1139 if self._stopped:1140 raise DownloadStopped("our Consumer called stopProducing()")1141 self._monitor.raise_if_cancelled()1142 return res1143 1144 def _download_segment(self, res, segnum):1145 if self._status:1146 self._status.set_status("Downloading segment %d of %d" %1147 (segnum+1, self._vup.num_segments))1148 self.log("downloading seg#%d of %d (%d%%)"1149 % (segnum, self._vup.num_segments,1150 100.0 * segnum / self._vup.num_segments))1151 # memory footprint: when the SegmentDownloader finishes pulling down1152 # all shares, we have 1*segment_size of usage.1153 segmentdler = SegmentDownloader(self, segnum,1154 self._verifycap.needed_shares,1155 self._results)1156 started = time.time()1157 d = segmentdler.start()1158 def _finished_fetching(res):1159 elapsed = time.time() - started1160 self._results.timings["cumulative_fetch"] += elapsed1161 return res1162 if self._results:1163 d.addCallback(_finished_fetching)1164 # pause before using more memory1165 d.addCallback(self._check_for_pause)1166 # while the codec does its job, we hit 2*segment_size1167 def _started_decode(res):1168 self._started_decode = time.time()1169 return res1170 if self._results:1171 d.addCallback(_started_decode)1172 if segnum + 1 == self._vup.num_segments:1173 codec = self._tail_codec1174 else:1175 codec = self._codec1176 d.addCallback(lambda (shares, shareids): codec.decode(shares, shareids))1177 # once the codec is done, we drop back to 1*segment_size, because1178 # 'shares' goes out of scope. The memory usage is all in the1179 # plaintext now, spread out into a bunch of tiny buffers.1180 def _finished_decode(res):1181 elapsed = time.time() - self._started_decode1182 self._results.timings["cumulative_decode"] += elapsed1183 return res1184 if self._results:1185 d.addCallback(_finished_decode)1186 1187 # pause/check-for-stop just before writing, to honor stopProducing1188 d.addCallback(self._check_for_pause)1189 d.addCallback(self._got_segment)1190 return d1191 1192 def _got_segment(self, buffers):1193 precondition(self._crypttext_hash_tree)1194 started_decrypt = time.time()1195 self._status.set_progress(float(self._current_segnum)/self._verifycap.size)1196 1197 if self._current_segnum + 1 == self._vup.num_segments:1198 # This is the last segment.1199 # Trim off any padding added by the upload side. We never send1200 # empty segments. If the data was an exact multiple of the1201 # segment size, the last segment will be full.1202 tail_buf_size = mathutil.div_ceil(self._vup.tail_segment_size, self._verifycap.needed_shares)1203 num_buffers_used = mathutil.div_ceil(self._vup.tail_data_size, tail_buf_size)1204 # Remove buffers which don't contain any part of the tail.1205 del buffers[num_buffers_used:]1206 # Remove the past-the-tail-part of the last buffer.1207 tail_in_last_buf = self._vup.tail_data_size % tail_buf_size1208 if tail_in_last_buf == 0:1209 tail_in_last_buf = tail_buf_size1210 buffers[-1] = buffers[-1][:tail_in_last_buf]1211 1212 # First compute the hash of this segment and check that it fits.1213 ch = hashutil.crypttext_segment_hasher()1214 for buffer in buffers:1215 self._ciphertext_hasher.update(buffer)1216 ch.update(buffer)1217 self._crypttext_hash_tree.set_hashes(leaves={self._current_segnum: ch.digest()})1218 1219 # Then write this segment to the target.1220 if not self._opened:1221 self._opened = True1222 self._target.open(self._verifycap.size)1223 1224 for buffer in buffers:1225 self._target.write(buffer)1226 self._bytes_done += len(buffer)1227 1228 self._status.set_progress(float(self._bytes_done)/self._verifycap.size)1229 self._current_segnum += 11230 1231 if self._results:1232 elapsed = time.time() - started_decrypt1233 self._results.timings["cumulative_decrypt"] += elapsed1234 1235 def _done(self, res):1236 self.log("download done")1237 if self._results:1238 now = time.time()1239 self._results.timings["total"] = now - self._started1240 self._results.timings["segments"] = now - self._started_fetching1241 if self._vup.crypttext_hash:1242 _assert(self._vup.crypttext_hash == self._ciphertext_hasher.digest(),1243 "bad crypttext_hash: computed=%s, expected=%s" %1244 (base32.b2a(self._ciphertext_hasher.digest()),1245 base32.b2a(self._vup.crypttext_hash)))1246 _assert(self._bytes_done == self._verifycap.size, self._bytes_done, self._verifycap.size)1247 self._status.set_progress(1)1248 self._target.close()1249 return self._target.finish()1250 def get_download_status(self):1251 return self._status1252 1253 1254 class ConsumerAdapter:1255 implements(IDownloadTarget, IConsumer)1256 def __init__(self, consumer):1257 self._consumer = consumer1258 1259 def registerProducer(self, producer, streaming):1260 self._consumer.registerProducer(producer, streaming)1261 def unregisterProducer(self):1262 self._consumer.unregisterProducer()1263 1264 def open(self, size):1265 pass1266 def write(self, data):1267 self._consumer.write(data)1268 def close(self):1269 pass1270 1271 def fail(self, why):1272 pass1273 def register_canceller(self, cb):1274 pass1275 def finish(self):1276 return self._consumer1277 # The following methods are just because the target might be a1278 # repairer.DownUpConnector, and just because the current CHKUpload object1279 # expects to find the storage index and encoding parameters in its1280 # Uploadable.1281 def set_storageindex(self, storageindex):1282 pass1283 def set_encodingparams(self, encodingparams):1284 pass1285 1286 1287 class Downloader:1288 """I am a service that allows file downloading.1289 """1290 # TODO: in fact, this service only downloads immutable files (URI:CHK:).1291 # It is scheduled to go away, to be replaced by filenode.download()1292 implements(IDownloader)1293 1294 def __init__(self, storage_broker, stats_provider):1295 self.storage_broker = storage_broker1296 self.stats_provider = stats_provider1297 self._all_downloads = weakref.WeakKeyDictionary() # for debugging1298 1299 def download(self, u, t, _log_msg_id=None, monitor=None, history=None):1300 assert isinstance(u, uri.CHKFileURI)1301 t = IDownloadTarget(t)1302 assert t.write1303 assert t.close1304 1305 if self.stats_provider:1306 # these counters are meant for network traffic, and don't1307 # include LIT files1308 self.stats_provider.count('downloader.files_downloaded', 1)1309 self.stats_provider.count('downloader.bytes_downloaded', u.get_size())1310 1311 target = DecryptingTarget(t, u.key, _log_msg_id=_log_msg_id)1312 if not monitor:1313 monitor=Monitor()1314 dl = CiphertextDownloader(self.storage_broker,1315 u.get_verify_cap(), target,1316 monitor=monitor)1317 self._all_downloads[dl] = None1318 if history:1319 history.add_download(dl.get_download_status())1320 d = dl.start()1321 return d -
new file src/allmydata/immutable/downloader/common.py
diff --git a/src/allmydata/immutable/downloader/__init__.py b/src/allmydata/immutable/downloader/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/allmydata/immutable/downloader/common.py b/src/allmydata/immutable/downloader/common.py new file mode 100644 index 0000000..e9dd271
- + 1 2 (AVAILABLE, PENDING, OVERDUE, COMPLETE, CORRUPT, DEAD, BADSEGNUM) = \ 3 ("AVAILABLE", "PENDING", "OVERDUE", "COMPLETE", "CORRUPT", "DEAD", "BADSEGNUM") 4 5 class BadSegmentNumberError(Exception): 6 pass 7 class WrongSegmentError(Exception): 8 pass 9 class BadCiphertextHashError(Exception): 10 pass 11 12 class DownloadStopped(Exception): 13 pass -
new file src/allmydata/immutable/downloader/fetcher.py
diff --git a/src/allmydata/immutable/downloader/fetcher.py b/src/allmydata/immutable/downloader/fetcher.py new file mode 100644 index 0000000..3918f65
- + 1 2 from twisted.python.failure import Failure 3 from foolscap.api import eventually 4 from allmydata.interfaces import NotEnoughSharesError, NoSharesError 5 from allmydata.util import log 6 from allmydata.util.dictutil import DictOfSets 7 from common import AVAILABLE, PENDING, OVERDUE, COMPLETE, CORRUPT, DEAD, \ 8 BADSEGNUM, BadSegmentNumberError 9 10 class SegmentFetcher: 11 """I am responsible for acquiring blocks for a single segment. I will use 12 the Share instances passed to my add_shares() method to locate, retrieve, 13 and validate those blocks. I expect my parent node to call my 14 no_more_shares() method when there are no more shares available. I will 15 call my parent's want_more_shares() method when I want more: I expect to 16 see at least one call to add_shares or no_more_shares afterwards. 17 18 When I have enough validated blocks, I will call my parent's 19 process_blocks() method with a dictionary that maps shnum to blockdata. 20 If I am unable to provide enough blocks, I will call my parent's 21 fetch_failed() method with (self, f). After either of these events, I 22 will shut down and do no further work. My parent can also call my stop() 23 method to have me shut down early.""" 24 25 def __init__(self, node, segnum, k): 26 self._node = node # _Node 27 self.segnum = segnum 28 self._k = k 29 self._shares = {} # maps non-dead Share instance to a state, one of 30 # (AVAILABLE, PENDING, OVERDUE, COMPLETE, CORRUPT). 31 # State transition map is: 32 # AVAILABLE -(send-read)-> PENDING 33 # PENDING -(timer)-> OVERDUE 34 # PENDING -(rx)-> COMPLETE, CORRUPT, DEAD, BADSEGNUM 35 # OVERDUE -(rx)-> COMPLETE, CORRUPT, DEAD, BADSEGNUM 36 # If a share becomes DEAD, it is removed from the 37 # dict. If it becomes BADSEGNUM, the whole fetch is 38 # terminated. 39 self._share_observers = {} # maps Share to EventStreamObserver for 40 # active ones 41 self._shnums = DictOfSets() # maps shnum to the shares that provide it 42 self._blocks = {} # maps shnum to validated block data 43 self._no_more_shares = False 44 self._bad_segnum = False 45 self._last_failure = None 46 self._running = True 47 48 def stop(self): 49 log.msg("SegmentFetcher(%s).stop" % self._node._si_prefix, 50 level=log.NOISY, umid="LWyqpg") 51 self._cancel_all_requests() 52 self._running = False 53 self._shares.clear() # let GC work # ??? XXX 54 55 56 # called by our parent _Node 57 58 def add_shares(self, shares): 59 # called when ShareFinder locates a new share, and when a non-initial 60 # segment fetch is started and we already know about shares from the 61 # previous segment 62 for s in shares: 63 self._shares[s] = AVAILABLE 64 self._shnums.add(s._shnum, s) 65 eventually(self.loop) 66 67 def no_more_shares(self): 68 # ShareFinder tells us it's reached the end of its list 69 self._no_more_shares = True 70 eventually(self.loop) 71 72 # internal methods 73 74 def _count_shnums(self, *states): 75 """shnums for which at least one state is in the following list""" 76 shnums = [] 77 for shnum,shares in self._shnums.iteritems(): 78 matches = [s for s in shares if self._shares.get(s) in states] 79 if matches: 80 shnums.append(shnum) 81 return len(shnums) 82 83 def loop(self): 84 try: 85 # if any exception occurs here, kill the download 86 self._do_loop() 87 except BaseException: 88 self._node.fetch_failed(self, Failure()) 89 raise 90 91 def _do_loop(self): 92 k = self._k 93 if not self._running: 94 return 95 if self._bad_segnum: 96 # oops, we were asking for a segment number beyond the end of the 97 # file. This is an error. 98 self.stop() 99 e = BadSegmentNumberError("segnum=%d, numsegs=%d" % 100 (self.segnum, self._node.num_segments)) 101 f = Failure(e) 102 self._node.fetch_failed(self, f) 103 return 104 105 # are we done? 106 if self._count_shnums(COMPLETE) >= k: 107 # yay! 108 self.stop() 109 self._node.process_blocks(self.segnum, self._blocks) 110 return 111 112 # we may have exhausted everything 113 if (self._no_more_shares and 114 self._count_shnums(AVAILABLE, PENDING, OVERDUE, COMPLETE) < k): 115 # no more new shares are coming, and the remaining hopeful shares 116 # aren't going to be enough. boo! 117 118 log.msg("share states: %r" % (self._shares,), 119 level=log.NOISY, umid="0ThykQ") 120 if self._count_shnums(AVAILABLE, PENDING, OVERDUE, COMPLETE) == 0: 121 format = ("no shares (need %(k)d)." 122 " Last failure: %(last_failure)s") 123 args = { "k": k, 124 "last_failure": self._last_failure } 125 error = NoSharesError 126 else: 127 format = ("ran out of shares: %(complete)d complete," 128 " %(pending)d pending, %(overdue)d overdue," 129 " %(unused)d unused, need %(k)d." 130 " Last failure: %(last_failure)s") 131 args = {"complete": self._count_shnums(COMPLETE), 132 "pending": self._count_shnums(PENDING), 133 "overdue": self._count_shnums(OVERDUE), 134 # 'unused' should be zero 135 "unused": self._count_shnums(AVAILABLE), 136 "k": k, 137 "last_failure": self._last_failure, 138 } 139 error = NotEnoughSharesError 140 log.msg(format=format, level=log.UNUSUAL, umid="1DsnTg", **args) 141 e = error(format % args) 142 f = Failure(e) 143 self.stop() 144 self._node.fetch_failed(self, f) 145 return 146 147 # nope, not done. Are we "block-hungry" (i.e. do we want to send out 148 # more read requests, or do we think we have enough in flight 149 # already?) 150 while self._count_shnums(PENDING, COMPLETE) < k: 151 # we're hungry.. are there any unused shares? 152 sent = self._send_new_request() 153 if not sent: 154 break 155 156 # ok, now are we "share-hungry" (i.e. do we have enough known shares 157 # to make us happy, or should we ask the ShareFinder to get us more?) 158 if self._count_shnums(AVAILABLE, PENDING, COMPLETE) < k: 159 # we're hungry for more shares 160 self._node.want_more_shares() 161 # that will trigger the ShareFinder to keep looking 162 163 def _find_one(self, shares, state): 164 # TODO could choose fastest 165 for s in shares: 166 if self._shares[s] == state: 167 return s 168 # can never get here, caller has assert in case of code bug 169 170 def _send_new_request(self): 171 for shnum,shares in sorted(self._shnums.iteritems()): 172 states = [self._shares[s] for s in shares] 173 if COMPLETE in states or PENDING in states: 174 # don't send redundant requests 175 continue 176 if AVAILABLE not in states: 177 # no candidates for this shnum, move on 178 continue 179 # here's a candidate. Send a request. 180 s = self._find_one(shares, AVAILABLE) 181 assert s 182 self._shares[s] = PENDING 183 self._share_observers[s] = o = s.get_block(self.segnum) 184 o.subscribe(self._block_request_activity, share=s, shnum=shnum) 185 # TODO: build up a list of candidates, then walk through the 186 # list, sending requests to the most desireable servers, 187 # re-checking our block-hunger each time. For non-initial segment 188 # fetches, this would let us stick with faster servers. 189 return True 190 # nothing was sent: don't call us again until you have more shares to 191 # work with, or one of the existing shares has been declared OVERDUE 192 return False 193 194 def _cancel_all_requests(self): 195 for o in self._share_observers.values(): 196 o.cancel() 197 self._share_observers = {} 198 199 def _block_request_activity(self, share, shnum, state, block=None, f=None): 200 # called by Shares, in response to our s.send_request() calls. 201 if not self._running: 202 return 203 log.msg("SegmentFetcher(%s)._block_request_activity:" 204 " Share(sh%d-on-%s) -> %s" % 205 (self._node._si_prefix, shnum, share._peerid_s, state), 206 level=log.NOISY, umid="vilNWA") 207 # COMPLETE, CORRUPT, DEAD, BADSEGNUM are terminal. 208 if state in (COMPLETE, CORRUPT, DEAD, BADSEGNUM): 209 self._share_observers.pop(share, None) 210 if state is COMPLETE: 211 # 'block' is fully validated 212 self._shares[share] = COMPLETE 213 self._blocks[shnum] = block 214 elif state is OVERDUE: 215 self._shares[share] = OVERDUE 216 # OVERDUE is not terminal: it will eventually transition to 217 # COMPLETE, CORRUPT, or DEAD. 218 elif state is CORRUPT: 219 self._shares[share] = CORRUPT 220 elif state is DEAD: 221 del self._shares[share] 222 self._shnums[shnum].remove(share) 223 self._last_failure = f 224 elif state is BADSEGNUM: 225 self._shares[share] = BADSEGNUM # ??? 226 self._bad_segnum = True 227 eventually(self.loop) 228 229 -
new file src/allmydata/immutable/downloader/finder.py
diff --git a/src/allmydata/immutable/downloader/finder.py b/src/allmydata/immutable/downloader/finder.py new file mode 100644 index 0000000..9adee99
- + 1 2 import time 3 now = time.time 4 from foolscap.api import eventually 5 from allmydata.util import base32, log, idlib 6 from twisted.internet import reactor 7 8 from share import Share, CommonShare 9 10 def incidentally(res, f, *args, **kwargs): 11 """Add me to a Deferred chain like this: 12 d.addBoth(incidentally, func, arg) 13 and I'll behave as if you'd added the following function: 14 def _(res): 15 func(arg) 16 return res 17 This is useful if you want to execute an expression when the Deferred 18 fires, but don't care about its value. 19 """ 20 f(*args, **kwargs) 21 return res 22 23 class RequestToken: 24 def __init__(self, peerid): 25 self.peerid = peerid 26 27 class ShareFinder: 28 OVERDUE_TIMEOUT = 10.0 29 30 def __init__(self, storage_broker, verifycap, node, download_status, 31 logparent=None, max_outstanding_requests=10): 32 self.running = True # stopped by Share.stop, from Terminator 33 self.verifycap = verifycap 34 self._started = False 35 self._storage_broker = storage_broker 36 self.share_consumer = self.node = node 37 self.max_outstanding_requests = max_outstanding_requests 38 39 self._hungry = False 40 41 self._commonshares = {} # shnum to CommonShare instance 42 self.undelivered_shares = [] 43 self.pending_requests = set() 44 self.overdue_requests = set() # subset of pending_requests 45 self.overdue_timers = {} 46 47 self._storage_index = verifycap.storage_index 48 self._si_prefix = base32.b2a_l(self._storage_index[:8], 60) 49 self._node_logparent = logparent 50 self._download_status = download_status 51 self._lp = log.msg(format="ShareFinder[si=%(si)s] starting", 52 si=self._si_prefix, 53 level=log.NOISY, parent=logparent, umid="2xjj2A") 54 55 def start_finding_servers(self): 56 # don't get servers until somebody uses us: creating the 57 # ImmutableFileNode should not cause work to happen yet. Test case is 58 # test_dirnode, which creates us with storage_broker=None 59 if not self._started: 60 si = self.verifycap.storage_index 61 s = self._storage_broker.get_servers_for_index(si) 62 self._servers = iter(s) 63 self._started = True 64 65 def log(self, *args, **kwargs): 66 if "parent" not in kwargs: 67 kwargs["parent"] = self._lp 68 return log.msg(*args, **kwargs) 69 70 def stop(self): 71 self.running = False 72 while self.overdue_timers: 73 req,t = self.overdue_timers.popitem() 74 t.cancel() 75 76 # called by our parent CiphertextDownloader 77 def hungry(self): 78 self.log(format="ShareFinder[si=%(si)s] hungry", 79 si=self._si_prefix, level=log.NOISY, umid="NywYaQ") 80 self.start_finding_servers() 81 self._hungry = True 82 eventually(self.loop) 83 84 # internal methods 85 def loop(self): 86 undelivered_s = ",".join(["sh%d@%s" % 87 (s._shnum, idlib.shortnodeid_b2a(s._peerid)) 88 for s in self.undelivered_shares]) 89 pending_s = ",".join([idlib.shortnodeid_b2a(rt.peerid) 90 for rt in self.pending_requests]) # sort? 91 self.log(format="ShareFinder loop: running=%(running)s" 92 " hungry=%(hungry)s, undelivered=%(undelivered)s," 93 " pending=%(pending)s", 94 running=self.running, hungry=self._hungry, 95 undelivered=undelivered_s, pending=pending_s, 96 level=log.NOISY, umid="kRtS4Q") 97 if not self.running: 98 return 99 if not self._hungry: 100 return 101 if self.undelivered_shares: 102 sh = self.undelivered_shares.pop(0) 103 # they will call hungry() again if they want more 104 self._hungry = False 105 self.log(format="delivering Share(shnum=%(shnum)d, server=%(peerid)s)", 106 shnum=sh._shnum, peerid=sh._peerid_s, 107 level=log.NOISY, umid="2n1qQw") 108 eventually(self.share_consumer.got_shares, [sh]) 109 return 110 111 non_overdue = self.pending_requests - self.overdue_requests 112 if len(non_overdue) >= self.max_outstanding_requests: 113 # cannot send more requests, must wait for some to retire 114 return 115 116 server = None 117 try: 118 if self._servers: 119 server = self._servers.next() 120 except StopIteration: 121 self._servers = None 122 123 if server: 124 self.send_request(server) 125 # we loop again to get parallel queries. The check above will 126 # prevent us from looping forever. 127 eventually(self.loop) 128 return 129 130 if self.pending_requests: 131 # no server, but there are still requests in flight: maybe one of 132 # them will make progress 133 return 134 135 self.log(format="ShareFinder.loop: no_more_shares, ever", 136 level=log.UNUSUAL, umid="XjQlzg") 137 # we've run out of servers (so we can't send any more requests), and 138 # we have nothing in flight. No further progress can be made. They 139 # are destined to remain hungry. 140 self.share_consumer.no_more_shares() 141 142 def send_request(self, server): 143 peerid, rref = server 144 req = RequestToken(peerid) 145 self.pending_requests.add(req) 146 lp = self.log(format="sending DYHB to [%(peerid)s]", 147 peerid=idlib.shortnodeid_b2a(peerid), 148 level=log.NOISY, umid="Io7pyg") 149 d_ev = self._download_status.add_dyhb_sent(peerid, now()) 150 # TODO: get the timer from a Server object, it knows best 151 self.overdue_timers[req] = reactor.callLater(self.OVERDUE_TIMEOUT, 152 self.overdue, req) 153 d = rref.callRemote("get_buckets", self._storage_index) 154 d.addBoth(incidentally, self._request_retired, req) 155 d.addCallbacks(self._got_response, self._got_error, 156 callbackArgs=(rref.version, peerid, req, d_ev, lp), 157 errbackArgs=(peerid, req, d_ev, lp)) 158 d.addErrback(log.err, format="error in send_request", 159 level=log.WEIRD, parent=lp, umid="rpdV0w") 160 d.addCallback(incidentally, eventually, self.loop) 161 162 def _request_retired(self, req): 163 self.pending_requests.discard(req) 164 self.overdue_requests.discard(req) 165 if req in self.overdue_timers: 166 self.overdue_timers[req].cancel() 167 del self.overdue_timers[req] 168 169 def overdue(self, req): 170 del self.overdue_timers[req] 171 assert req in self.pending_requests # paranoia, should never be false 172 self.overdue_requests.add(req) 173 eventually(self.loop) 174 175 def _got_response(self, buckets, server_version, peerid, req, d_ev, lp): 176 shnums = sorted([shnum for shnum in buckets]) 177 d_ev.finished(shnums, now()) 178 if buckets: 179 shnums_s = ",".join([str(shnum) for shnum in shnums]) 180 self.log(format="got shnums [%(shnums)s] from [%(peerid)s]", 181 shnums=shnums_s, peerid=idlib.shortnodeid_b2a(peerid), 182 level=log.NOISY, parent=lp, umid="0fcEZw") 183 else: 184 self.log(format="no shares from [%(peerid)s]", 185 peerid=idlib.shortnodeid_b2a(peerid), 186 level=log.NOISY, parent=lp, umid="U7d4JA") 187 if self.node.num_segments is None: 188 best_numsegs = self.node.guessed_num_segments 189 else: 190 best_numsegs = self.node.num_segments 191 for shnum, bucket in buckets.iteritems(): 192 self._create_share(best_numsegs, shnum, bucket, server_version, 193 peerid) 194 195 def _create_share(self, best_numsegs, shnum, bucket, server_version, 196 peerid): 197 if shnum in self._commonshares: 198 cs = self._commonshares[shnum] 199 else: 200 cs = CommonShare(best_numsegs, self._si_prefix, shnum, 201 self._node_logparent) 202 # Share._get_satisfaction is responsible for updating 203 # CommonShare.set_numsegs after we know the UEB. Alternatives: 204 # 1: d = self.node.get_num_segments() 205 # d.addCallback(cs.got_numsegs) 206 # the problem is that the OneShotObserverList I was using 207 # inserts an eventual-send between _get_satisfaction's 208 # _satisfy_UEB and _satisfy_block_hash_tree, and the 209 # CommonShare didn't get the num_segs message before 210 # being asked to set block hash values. To resolve this 211 # would require an immediate ObserverList instead of 212 # an eventual-send -based one 213 # 2: break _get_satisfaction into Deferred-attached pieces. 214 # Yuck. 215 self._commonshares[shnum] = cs 216 s = Share(bucket, server_version, self.verifycap, cs, self.node, 217 self._download_status, peerid, shnum, 218 self._node_logparent) 219 self.undelivered_shares.append(s) 220 221 def _got_error(self, f, peerid, req, d_ev, lp): 222 d_ev.finished("error", now()) 223 self.log(format="got error from [%(peerid)s]", 224 peerid=idlib.shortnodeid_b2a(peerid), failure=f, 225 level=log.UNUSUAL, parent=lp, umid="zUKdCw") 226 227 -
new file src/allmydata/immutable/downloader/node.py
diff --git a/src/allmydata/immutable/downloader/node.py b/src/allmydata/immutable/downloader/node.py new file mode 100644 index 0000000..2991c9e
- + 1 2 import time 3 now = time.time 4 from twisted.python.failure import Failure 5 from twisted.internet import defer 6 from foolscap.api import eventually 7 from allmydata import uri 8 from allmydata.codec import CRSDecoder 9 from allmydata.util import base32, log, hashutil, mathutil, observer 10 from allmydata.interfaces import DEFAULT_MAX_SEGMENT_SIZE 11 from allmydata.hashtree import IncompleteHashTree, BadHashError, \ 12 NotEnoughHashesError 13 14 # local imports 15 from finder import ShareFinder 16 from fetcher import SegmentFetcher 17 from segmentation import Segmentation 18 from common import BadCiphertextHashError 19 20 class Cancel: 21 def __init__(self, f): 22 self._f = f 23 self.cancelled = False 24 def cancel(self): 25 if not self.cancelled: 26 self.cancelled = True 27 self._f(self) 28 29 class DownloadNode: 30 """Internal class which manages downloads and holds state. External 31 callers use CiphertextFileNode instead.""" 32 33 # Share._node points to me 34 def __init__(self, verifycap, storage_broker, secret_holder, 35 terminator, history, download_status): 36 assert isinstance(verifycap, uri.CHKFileVerifierURI) 37 self._verifycap = verifycap 38 self._storage_broker = storage_broker 39 self._si_prefix = base32.b2a_l(verifycap.storage_index[:8], 60) 40 self.running = True 41 if terminator: 42 terminator.register(self) # calls self.stop() at stopService() 43 # the rules are: 44 # 1: Only send network requests if you're active (self.running is True) 45 # 2: Use TimerService, not reactor.callLater 46 # 3: You can do eventual-sends any time. 47 # These rules should mean that once 48 # stopService()+flushEventualQueue() fires, everything will be done. 49 self._secret_holder = secret_holder 50 self._history = history 51 self._download_status = download_status 52 53 k, N = self._verifycap.needed_shares, self._verifycap.total_shares 54 self.share_hash_tree = IncompleteHashTree(N) 55 56 # we guess the segment size, so Segmentation can pull non-initial 57 # segments in a single roundtrip. This populates 58 # .guessed_segment_size, .guessed_num_segments, and 59 # .ciphertext_hash_tree (with a dummy, to let us guess which hashes 60 # we'll need) 61 self._build_guessed_tables(DEFAULT_MAX_SEGMENT_SIZE) 62 63 # filled in when we parse a valid UEB 64 self.have_UEB = False 65 self.segment_size = None 66 self.tail_segment_size = None 67 self.tail_segment_padded = None 68 self.num_segments = None 69 self.block_size = None 70 self.tail_block_size = None 71 72 # things to track callers that want data 73 74 # _segment_requests can have duplicates 75 self._segment_requests = [] # (segnum, d, cancel_handle) 76 self._active_segment = None # a SegmentFetcher, with .segnum 77 78 self._segsize_observers = observer.OneShotObserverList() 79 80 # we create one top-level logparent for this _Node, and another one 81 # for each read() call. Segmentation and get_segment() messages are 82 # associated with the read() call, everything else is tied to the 83 # _Node's log entry. 84 lp = log.msg(format="Immutable _Node(%(si)s) created: size=%(size)d," 85 " guessed_segsize=%(guessed_segsize)d," 86 " guessed_numsegs=%(guessed_numsegs)d", 87 si=self._si_prefix, size=verifycap.size, 88 guessed_segsize=self.guessed_segment_size, 89 guessed_numsegs=self.guessed_num_segments, 90 level=log.OPERATIONAL, umid="uJ0zAQ") 91 self._lp = lp 92 93 self._sharefinder = ShareFinder(storage_broker, verifycap, self, 94 self._download_status, lp) 95 self._shares = set() 96 97 def _build_guessed_tables(self, max_segment_size): 98 size = min(self._verifycap.size, max_segment_size) 99 s = mathutil.next_multiple(size, self._verifycap.needed_shares) 100 self.guessed_segment_size = s 101 r = self._calculate_sizes(self.guessed_segment_size) 102 self.guessed_num_segments = r["num_segments"] 103 # as with CommonShare, our ciphertext_hash_tree is a stub until we 104 # get the real num_segments 105 self.ciphertext_hash_tree = IncompleteHashTree(self.guessed_num_segments) 106 107 def __repr__(self): 108 return "Imm_Node(%s)" % (self._si_prefix,) 109 110 def stop(self): 111 # called by the Terminator at shutdown, mostly for tests 112 if self._active_segment: 113 self._active_segment.stop() 114 self._active_segment = None 115 self._sharefinder.stop() 116 117 # things called by outside callers, via CiphertextFileNode. get_segment() 118 # may also be called by Segmentation. 119 120 def read(self, consumer, offset=0, size=None, read_ev=None): 121 """I am the main entry point, from which FileNode.read() can get 122 data. I feed the consumer with the desired range of ciphertext. I 123 return a Deferred that fires (with the consumer) when the read is 124 finished. 125 126 Note that there is no notion of a 'file pointer': each call to read() 127 uses an independent offset= value.""" 128 # for concurrent operations: each gets its own Segmentation manager 129 if size is None: 130 size = self._verifycap.size 131 # clip size so offset+size does not go past EOF 132 size = min(size, self._verifycap.size-offset) 133 if read_ev is None: 134 read_ev = self._download_status.add_read_event(offset, size, now()) 135 136 lp = log.msg(format="imm Node(%(si)s).read(%(offset)d, %(size)d)", 137 si=base32.b2a(self._verifycap.storage_index)[:8], 138 offset=offset, size=size, 139 level=log.OPERATIONAL, parent=self._lp, umid="l3j3Ww") 140 if self._history: 141 sp = self._history.stats_provider 142 sp.count("downloader.files_downloaded", 1) # really read() calls 143 sp.count("downloader.bytes_downloaded", size) 144 s = Segmentation(self, offset, size, consumer, read_ev, lp) 145 # this raises an interesting question: what segments to fetch? if 146 # offset=0, always fetch the first segment, and then allow 147 # Segmentation to be responsible for pulling the subsequent ones if 148 # the first wasn't large enough. If offset>0, we're going to need an 149 # extra roundtrip to get the UEB (and therefore the segment size) 150 # before we can figure out which segment to get. TODO: allow the 151 # offset-table-guessing code (which starts by guessing the segsize) 152 # to assist the offset>0 process. 153 d = s.start() 154 def _done(res): 155 read_ev.finished(now()) 156 return res 157 d.addBoth(_done) 158 return d 159 160 def get_segment(self, segnum, logparent=None): 161 """Begin downloading a segment. I return a tuple (d, c): 'd' is a 162 Deferred that fires with (offset,data) when the desired segment is 163 available, and c is an object on which c.cancel() can be called to 164 disavow interest in the segment (after which 'd' will never fire). 165 166 You probably need to know the segment size before calling this, 167 unless you want the first few bytes of the file. If you ask for a 168 segment number which turns out to be too large, the Deferred will 169 errback with BadSegmentNumberError. 170 171 The Deferred fires with the offset of the first byte of the data 172 segment, so that you can call get_segment() before knowing the 173 segment size, and still know which data you received. 174 175 The Deferred can also errback with other fatal problems, such as 176 NotEnoughSharesError, NoSharesError, or BadCiphertextHashError. 177 """ 178 log.msg(format="imm Node(%(si)s).get_segment(%(segnum)d)", 179 si=base32.b2a(self._verifycap.storage_index)[:8], 180 segnum=segnum, 181 level=log.OPERATIONAL, parent=logparent, umid="UKFjDQ") 182 self._download_status.add_segment_request(segnum, now()) 183 d = defer.Deferred() 184 c = Cancel(self._cancel_request) 185 self._segment_requests.append( (segnum, d, c) ) 186 self._start_new_segment() 187 return (d, c) 188 189 def get_segsize(self): 190 """Return a Deferred that fires when we know the real segment size.""" 191 if self.segment_size: 192 return defer.succeed(self.segment_size) 193 # TODO: this downloads (and discards) the first segment of the file. 194 # We could make this more efficient by writing 195 # fetcher.SegmentSizeFetcher, with the job of finding a single valid 196 # share and extracting the UEB. We'd add Share.get_UEB() to request 197 # just the UEB. 198 (d,c) = self.get_segment(0) 199 # this ensures that an error during get_segment() will errback the 200 # caller, so Repair won't wait forever on completely missing files 201 d.addCallback(lambda ign: self._segsize_observers.when_fired()) 202 return d 203 204 # things called by the Segmentation object used to transform 205 # arbitrary-sized read() calls into quantized segment fetches 206 207 def _start_new_segment(self): 208 if self._active_segment is None and self._segment_requests: 209 segnum = self._segment_requests[0][0] 210 k = self._verifycap.needed_shares 211 log.msg(format="%(node)s._start_new_segment: segnum=%(segnum)d", 212 node=repr(self), segnum=segnum, 213 level=log.NOISY, umid="wAlnHQ") 214 self._active_segment = fetcher = SegmentFetcher(self, segnum, k) 215 active_shares = [s for s in self._shares if s.is_alive()] 216 fetcher.add_shares(active_shares) # this triggers the loop 217 218 219 # called by our child ShareFinder 220 def got_shares(self, shares): 221 self._shares.update(shares) 222 if self._active_segment: 223 self._active_segment.add_shares(shares) 224 def no_more_shares(self): 225 self._no_more_shares = True 226 if self._active_segment: 227 self._active_segment.no_more_shares() 228 229 # things called by our Share instances 230 231 def validate_and_store_UEB(self, UEB_s): 232 log.msg("validate_and_store_UEB", 233 level=log.OPERATIONAL, parent=self._lp, umid="7sTrPw") 234 h = hashutil.uri_extension_hash(UEB_s) 235 if h != self._verifycap.uri_extension_hash: 236 raise BadHashError 237 UEB_dict = uri.unpack_extension(UEB_s) 238 self._parse_and_store_UEB(UEB_dict) # sets self._stuff 239 # TODO: a malformed (but authentic) UEB could throw an assertion in 240 # _parse_and_store_UEB, and we should abandon the download. 241 self.have_UEB = True 242 243 def _parse_and_store_UEB(self, d): 244 # Note: the UEB contains needed_shares and total_shares. These are 245 # redundant and inferior (the filecap contains the authoritative 246 # values). However, because it is possible to encode the same file in 247 # multiple ways, and the encoders might choose (poorly) to use the 248 # same key for both (therefore getting the same SI), we might 249 # encounter shares for both types. The UEB hashes will be different, 250 # however, and we'll disregard the "other" encoding's shares as 251 # corrupted. 252 253 # therefore, we ignore d['total_shares'] and d['needed_shares']. 254 255 log.msg(format="UEB=%(ueb)s, vcap=%(vcap)s", 256 ueb=repr(d), vcap=self._verifycap.to_string(), 257 level=log.NOISY, parent=self._lp, umid="cVqZnA") 258 259 k, N = self._verifycap.needed_shares, self._verifycap.total_shares 260 261 self.segment_size = d['segment_size'] 262 self._segsize_observers.fire(self.segment_size) 263 264 r = self._calculate_sizes(self.segment_size) 265 self.tail_segment_size = r["tail_segment_size"] 266 self.tail_segment_padded = r["tail_segment_padded"] 267 self.num_segments = r["num_segments"] 268 self.block_size = r["block_size"] 269 self.tail_block_size = r["tail_block_size"] 270 log.msg("actual sizes: %s" % (r,), 271 level=log.NOISY, parent=self._lp, umid="PY6P5Q") 272 if (self.segment_size == self.guessed_segment_size 273 and self.num_segments == self.guessed_num_segments): 274 log.msg("my guess was right!", 275 level=log.NOISY, parent=self._lp, umid="x340Ow") 276 else: 277 log.msg("my guess was wrong! Extra round trips for me.", 278 level=log.NOISY, parent=self._lp, umid="tb7RJw") 279 280 # zfec.Decode() instantiation is fast, but still, let's use the same 281 # codec instance for all but the last segment. 3-of-10 takes 15us on 282 # my laptop, 25-of-100 is 900us, 3-of-255 is 97us, 25-of-255 is 283 # 2.5ms, worst-case 254-of-255 is 9.3ms 284 self._codec = CRSDecoder() 285 self._codec.set_params(self.segment_size, k, N) 286 287 288 # Ciphertext hash tree root is mandatory, so that there is at most 289 # one ciphertext that matches this read-cap or verify-cap. The 290 # integrity check on the shares is not sufficient to prevent the 291 # original encoder from creating some shares of file A and other 292 # shares of file B. self.ciphertext_hash_tree was a guess before: 293 # this is where we create it for real. 294 self.ciphertext_hash_tree = IncompleteHashTree(self.num_segments) 295 self.ciphertext_hash_tree.set_hashes({0: d['crypttext_root_hash']}) 296 297 self.share_hash_tree.set_hashes({0: d['share_root_hash']}) 298 299 # Our job is a fast download, not verification, so we ignore any 300 # redundant fields. The Verifier uses a different code path which 301 # does not ignore them. 302 303 def _calculate_sizes(self, segment_size): 304 # segments of ciphertext 305 size = self._verifycap.size 306 k = self._verifycap.needed_shares 307 308 # this assert matches the one in encode.py:127 inside 309 # Encoded._got_all_encoding_parameters, where the UEB is constructed 310 assert segment_size % k == 0 311 312 # the last segment is usually short. We don't store a whole segsize, 313 # but we do pad the segment up to a multiple of k, because the 314 # encoder requires that. 315 tail_segment_size = size % segment_size 316 if tail_segment_size == 0: 317 tail_segment_size = segment_size 318 padded = mathutil.next_multiple(tail_segment_size, k) 319 tail_segment_padded = padded 320 321 num_segments = mathutil.div_ceil(size, segment_size) 322 323 # each segment is turned into N blocks. All but the last are of size 324 # block_size, and the last is of size tail_block_size 325 block_size = segment_size / k 326 tail_block_size = tail_segment_padded / k 327 328 return { "tail_segment_size": tail_segment_size, 329 "tail_segment_padded": tail_segment_padded, 330 "num_segments": num_segments, 331 "block_size": block_size, 332 "tail_block_size": tail_block_size, 333 } 334 335 336 def process_share_hashes(self, share_hashes): 337 for hashnum in share_hashes: 338 if hashnum >= len(self.share_hash_tree): 339 # "BadHashError" is normally for e.g. a corrupt block. We 340 # sort of abuse it here to mean a badly numbered hash (which 341 # indicates corruption in the number bytes, rather than in 342 # the data bytes). 343 raise BadHashError("hashnum %d doesn't fit in hashtree(%d)" 344 % (hashnum, len(self.share_hash_tree))) 345 self.share_hash_tree.set_hashes(share_hashes) 346 347 def get_needed_ciphertext_hashes(self, segnum): 348 cht = self.ciphertext_hash_tree 349 return cht.needed_hashes(segnum, include_leaf=True) 350 def process_ciphertext_hashes(self, hashes): 351 assert self.num_segments is not None 352 # this may raise BadHashError or NotEnoughHashesError 353 self.ciphertext_hash_tree.set_hashes(hashes) 354 355 356 # called by our child SegmentFetcher 357 358 def want_more_shares(self): 359 self._sharefinder.hungry() 360 361 def fetch_failed(self, sf, f): 362 assert sf is self._active_segment 363 self._active_segment = None 364 # deliver error upwards 365 for (d,c) in self._extract_requests(sf.segnum): 366 eventually(self._deliver, d, c, f) 367 368 def process_blocks(self, segnum, blocks): 369 d = defer.maybeDeferred(self._decode_blocks, segnum, blocks) 370 d.addCallback(self._check_ciphertext_hash, segnum) 371 def _deliver(result): 372 ds = self._download_status 373 if isinstance(result, Failure): 374 ds.add_segment_error(segnum, now()) 375 else: 376 (offset, segment, decodetime) = result 377 ds.add_segment_delivery(segnum, now(), 378 offset, len(segment), decodetime) 379 log.msg(format="delivering segment(%(segnum)d)", 380 segnum=segnum, 381 level=log.OPERATIONAL, parent=self._lp, 382 umid="j60Ojg") 383 for (d,c) in self._extract_requests(segnum): 384 eventually(self._deliver, d, c, result) 385 self._active_segment = None 386 self._start_new_segment() 387 d.addBoth(_deliver) 388 d.addErrback(lambda f: 389 log.err("unhandled error during process_blocks", 390 failure=f, level=log.WEIRD, 391 parent=self._lp, umid="MkEsCg")) 392 393 def _decode_blocks(self, segnum, blocks): 394 tail = (segnum == self.num_segments-1) 395 codec = self._codec 396 block_size = self.block_size 397 decoded_size = self.segment_size 398 if tail: 399 # account for the padding in the last segment 400 codec = CRSDecoder() 401 k, N = self._verifycap.needed_shares, self._verifycap.total_shares 402 codec.set_params(self.tail_segment_padded, k, N) 403 block_size = self.tail_block_size 404 decoded_size = self.tail_segment_padded 405 406 shares = [] 407 shareids = [] 408 for (shareid, share) in blocks.iteritems(): 409 assert len(share) == block_size 410 shareids.append(shareid) 411 shares.append(share) 412 del blocks 413 414 start = now() 415 d = codec.decode(shares, shareids) # segment 416 del shares 417 def _process(buffers): 418 decodetime = now() - start 419 segment = "".join(buffers) 420 assert len(segment) == decoded_size 421 del buffers 422 if tail: 423 segment = segment[:self.tail_segment_size] 424 return (segment, decodetime) 425 d.addCallback(_process) 426 return d 427 428 def _check_ciphertext_hash(self, (segment, decodetime), segnum): 429 assert self._active_segment.segnum == segnum 430 assert self.segment_size is not None 431 offset = segnum * self.segment_size 432 433 h = hashutil.crypttext_segment_hash(segment) 434 try: 435 self.ciphertext_hash_tree.set_hashes(leaves={segnum: h}) 436 return (offset, segment, decodetime) 437 except (BadHashError, NotEnoughHashesError): 438 format = ("hash failure in ciphertext_hash_tree:" 439 " segnum=%(segnum)d, SI=%(si)s") 440 log.msg(format=format, segnum=segnum, si=self._si_prefix, 441 failure=Failure(), 442 level=log.WEIRD, parent=self._lp, umid="MTwNnw") 443 # this is especially weird, because we made it past the share 444 # hash tree. It implies that we're using the wrong encoding, or 445 # that the uploader deliberately constructed a bad UEB. 446 msg = format % {"segnum": segnum, "si": self._si_prefix} 447 raise BadCiphertextHashError(msg) 448 449 def _deliver(self, d, c, result): 450 # this method exists to handle cancel() that occurs between 451 # _got_segment and _deliver 452 if not c.cancelled: 453 d.callback(result) # might actually be an errback 454 455 def _extract_requests(self, segnum): 456 """Remove matching requests and return their (d,c) tuples so that the 457 caller can retire them.""" 458 retire = [(d,c) for (segnum0, d, c) in self._segment_requests 459 if segnum0 == segnum] 460 self._segment_requests = [t for t in self._segment_requests 461 if t[0] != segnum] 462 return retire 463 464 def _cancel_request(self, c): 465 self._segment_requests = [t for t in self._segment_requests 466 if t[2] != c] 467 segnums = [segnum for (segnum,d,c) in self._segment_requests] 468 if self._active_segment.segnum not in segnums: 469 self._active_segment.stop() 470 self._active_segment = None 471 self._start_new_segment() -
new file src/allmydata/immutable/downloader/segmentation.py
diff --git a/src/allmydata/immutable/downloader/segmentation.py b/src/allmydata/immutable/downloader/segmentation.py new file mode 100644 index 0000000..4890195
- + 1 2 import time 3 now = time.time 4 from zope.interface import implements 5 from twisted.internet import defer 6 from twisted.internet.interfaces import IPushProducer 7 from foolscap.api import eventually 8 from allmydata.util import log 9 from allmydata.util.spans import overlap 10 11 from common import BadSegmentNumberError, WrongSegmentError, DownloadStopped 12 13 class Segmentation: 14 """I am responsible for a single offset+size read of the file. I handle 15 segmentation: I figure out which segments are necessary, request them 16 (from my CiphertextDownloader) in order, and trim the segments down to 17 match the offset+size span. I use the Producer/Consumer interface to only 18 request one segment at a time. 19 """ 20 implements(IPushProducer) 21 def __init__(self, node, offset, size, consumer, read_ev, logparent=None): 22 self._node = node 23 self._hungry = True 24 self._active_segnum = None 25 self._cancel_segment_request = None 26 # these are updated as we deliver data. At any given time, we still 27 # want to download file[offset:offset+size] 28 self._offset = offset 29 self._size = size 30 assert offset+size <= node._verifycap.size 31 self._consumer = consumer 32 self._read_ev = read_ev 33 self._start_pause = None 34 self._lp = logparent 35 36 def start(self): 37 self._alive = True 38 self._deferred = defer.Deferred() 39 self._consumer.registerProducer(self, True) 40 self._maybe_fetch_next() 41 return self._deferred 42 43 def _maybe_fetch_next(self): 44 if not self._alive or not self._hungry: 45 return 46 if self._active_segnum is not None: 47 return 48 self._fetch_next() 49 50 def _fetch_next(self): 51 if self._size == 0: 52 # done! 53 self._alive = False 54 self._hungry = False 55 self._consumer.unregisterProducer() 56 self._deferred.callback(self._consumer) 57 return 58 n = self._node 59 have_actual_segment_size = n.segment_size is not None 60 guess_s = "" 61 if not have_actual_segment_size: 62 guess_s = "probably " 63 segment_size = n.segment_size or n.guessed_segment_size 64 if self._offset == 0: 65 # great! we want segment0 for sure 66 wanted_segnum = 0 67 else: 68 # this might be a guess 69 wanted_segnum = self._offset // segment_size 70 log.msg(format="_fetch_next(offset=%(offset)d) %(guess)swants segnum=%(segnum)d", 71 offset=self._offset, guess=guess_s, segnum=wanted_segnum, 72 level=log.NOISY, parent=self._lp, umid="5WfN0w") 73 self._active_segnum = wanted_segnum 74 d,c = n.get_segment(wanted_segnum, self._lp) 75 self._cancel_segment_request = c 76 d.addBoth(self._request_retired) 77 d.addCallback(self._got_segment, wanted_segnum) 78 if not have_actual_segment_size: 79 # we can retry once 80 d.addErrback(self._retry_bad_segment) 81 d.addErrback(self._error) 82 83 def _request_retired(self, res): 84 self._active_segnum = None 85 self._cancel_segment_request = None 86 return res 87 88 def _got_segment(self, (segment_start,segment,decodetime), wanted_segnum): 89 self._cancel_segment_request = None 90 # we got file[segment_start:segment_start+len(segment)] 91 # we want file[self._offset:self._offset+self._size] 92 log.msg(format="Segmentation got data:" 93 " want [%(wantstart)d-%(wantend)d)," 94 " given [%(segstart)d-%(segend)d), for segnum=%(segnum)d", 95 wantstart=self._offset, wantend=self._offset+self._size, 96 segstart=segment_start, segend=segment_start+len(segment), 97 segnum=wanted_segnum, 98 level=log.OPERATIONAL, parent=self._lp, umid="32dHcg") 99 100 o = overlap(segment_start, len(segment), self._offset, self._size) 101 # the overlap is file[o[0]:o[0]+o[1]] 102 if not o or o[0] != self._offset: 103 # we didn't get the first byte, so we can't use this segment 104 log.msg("Segmentation handed wrong data:" 105 " want [%d-%d), given [%d-%d), for segnum=%d," 106 " for si=%s" 107 % (self._offset, self._offset+self._size, 108 segment_start, segment_start+len(segment), 109 wanted_segnum, self._node._si_prefix), 110 level=log.UNUSUAL, parent=self._lp, umid="STlIiA") 111 # we may retry if the segnum we asked was based on a guess 112 raise WrongSegmentError("I was given the wrong data.") 113 offset_in_segment = self._offset - segment_start 114 desired_data = segment[offset_in_segment:offset_in_segment+o[1]] 115 116 self._offset += len(desired_data) 117 self._size -= len(desired_data) 118 self._consumer.write(desired_data) 119 # the consumer might call our .pauseProducing() inside that write() 120 # call, setting self._hungry=False 121 self._read_ev.update(len(desired_data), 0, 0) 122 self._maybe_fetch_next() 123 124 def _retry_bad_segment(self, f): 125 f.trap(WrongSegmentError, BadSegmentNumberError) 126 # we guessed the segnum wrong: either one that doesn't overlap with 127 # the start of our desired region, or one that's beyond the end of 128 # the world. Now that we have the right information, we're allowed to 129 # retry once. 130 assert self._node.segment_size is not None 131 return self._maybe_fetch_next() 132 133 def _error(self, f): 134 log.msg("Error in Segmentation", failure=f, 135 level=log.WEIRD, parent=self._lp, umid="EYlXBg") 136 self._alive = False 137 self._hungry = False 138 self._consumer.unregisterProducer() 139 self._deferred.errback(f) 140 141 def stopProducing(self): 142 self._hungry = False 143 self._alive = False 144 # cancel any outstanding segment request 145 if self._cancel_segment_request: 146 self._cancel_segment_request.cancel() 147 self._cancel_segment_request = None 148 e = DownloadStopped("our Consumer called stopProducing()") 149 self._deferred.errback(e) 150 151 def pauseProducing(self): 152 self._hungry = False 153 self._start_pause = now() 154 def resumeProducing(self): 155 self._hungry = True 156 eventually(self._maybe_fetch_next) 157 if self._start_pause is not None: 158 paused = now() - self._start_pause 159 self._read_ev.update(0, 0, paused) 160 self._start_pause = None -
new file src/allmydata/immutable/downloader/share.py
diff --git a/src/allmydata/immutable/downloader/share.py b/src/allmydata/immutable/downloader/share.py new file mode 100644 index 0000000..e3c9017
- + 1 2 import struct 3 import time 4 now = time.time 5 6 from twisted.python.failure import Failure 7 from foolscap.api import eventually 8 from allmydata.util import base32, log, hashutil, mathutil 9 from allmydata.util.spans import Spans, DataSpans 10 from allmydata.interfaces import HASH_SIZE 11 from allmydata.hashtree import IncompleteHashTree, BadHashError, \ 12 NotEnoughHashesError 13 14 from allmydata.immutable.layout import make_write_bucket_proxy 15 from allmydata.util.observer import EventStreamObserver 16 from common import COMPLETE, CORRUPT, DEAD, BADSEGNUM 17 18 19 class LayoutInvalid(Exception): 20 pass 21 class DataUnavailable(Exception): 22 pass 23 24 class Share: 25 """I represent a single instance of a single share (e.g. I reference the 26 shnum2 for share SI=abcde on server xy12t, not the one on server ab45q). 27 I am associated with a CommonShare that remembers data that is held in 28 common among e.g. SI=abcde/shnum2 across all servers. I am also 29 associated with a CiphertextFileNode for e.g. SI=abcde (all shares, all 30 servers). 31 """ 32 # this is a specific implementation of IShare for tahoe's native storage 33 # servers. A different backend would use a different class. 34 35 def __init__(self, rref, server_version, verifycap, commonshare, node, 36 download_status, peerid, shnum, logparent): 37 self._rref = rref 38 self._server_version = server_version 39 self._node = node # holds share_hash_tree and UEB 40 self.actual_segment_size = node.segment_size # might still be None 41 # XXX change node.guessed_segment_size to 42 # node.best_guess_segment_size(), which should give us the real ones 43 # if known, else its guess. 44 self._guess_offsets(verifycap, node.guessed_segment_size) 45 self.actual_offsets = None 46 self._UEB_length = None 47 self._commonshare = commonshare # holds block_hash_tree 48 self._download_status = download_status 49 self._peerid = peerid 50 self._peerid_s = base32.b2a(peerid)[:5] 51 self._storage_index = verifycap.storage_index 52 self._si_prefix = base32.b2a(verifycap.storage_index)[:8] 53 self._shnum = shnum 54 # self._alive becomes False upon fatal corruption or server error 55 self._alive = True 56 self._lp = log.msg(format="%(share)s created", share=repr(self), 57 level=log.NOISY, parent=logparent, umid="P7hv2w") 58 59 self._pending = Spans() # request sent but no response received yet 60 self._received = DataSpans() # ACK response received, with data 61 self._unavailable = Spans() # NAK response received, no data 62 63 # any given byte of the share can be in one of four states: 64 # in: _wanted, _requested, _received 65 # FALSE FALSE FALSE : don't care about it at all 66 # TRUE FALSE FALSE : want it, haven't yet asked for it 67 # TRUE TRUE FALSE : request is in-flight 68 # or didn't get it 69 # FALSE TRUE TRUE : got it, haven't used it yet 70 # FALSE TRUE FALSE : got it and used it 71 # FALSE FALSE FALSE : block consumed, ready to ask again 72 # 73 # when we request data and get a NAK, we leave it in _requested 74 # to remind ourself to not ask for it again. We don't explicitly 75 # remove it from anything (maybe this should change). 76 # 77 # We retain the hashtrees in the Node, so we leave those spans in 78 # _requested (and never ask for them again, as long as the Node is 79 # alive). But we don't retain data blocks (too big), so when we 80 # consume a data block, we remove it from _requested, so a later 81 # download can re-fetch it. 82 83 self._requested_blocks = [] # (segnum, set(observer2..)) 84 ver = server_version["http://allmydata.org/tahoe/protocols/storage/v1"] 85 self._overrun_ok = ver["tolerates-immutable-read-overrun"] 86 # If _overrun_ok and we guess the offsets correctly, we can get 87 # everything in one RTT. If _overrun_ok and we guess wrong, we might 88 # need two RTT (but we could get lucky and do it in one). If overrun 89 # is *not* ok (tahoe-1.3.0 or earlier), we need four RTT: 1=version, 90 # 2=offset table, 3=UEB_length and everything else (hashes, block), 91 # 4=UEB. 92 93 self.had_corruption = False # for unit tests 94 95 def __repr__(self): 96 return "Share(sh%d-on-%s)" % (self._shnum, self._peerid_s) 97 98 def is_alive(self): 99 # XXX: reconsider. If the share sees a single error, should it remain 100 # dead for all time? Or should the next segment try again? This DEAD 101 # state is stored elsewhere too (SegmentFetcher per-share states?) 102 # and needs to be consistent. We clear _alive in self._fail(), which 103 # is called upon a network error, or layout failure, or hash failure 104 # in the UEB or a hash tree. We do not _fail() for a hash failure in 105 # a block, but of course we still tell our callers about 106 # state=CORRUPT so they'll find a different share. 107 return self._alive 108 109 def _guess_offsets(self, verifycap, guessed_segment_size): 110 self.guessed_segment_size = guessed_segment_size 111 size = verifycap.size 112 k = verifycap.needed_shares 113 N = verifycap.total_shares 114 r = self._node._calculate_sizes(guessed_segment_size) 115 # num_segments, block_size/tail_block_size 116 # guessed_segment_size/tail_segment_size/tail_segment_padded 117 share_size = mathutil.div_ceil(size, k) 118 # share_size is the amount of block data that will be put into each 119 # share, summed over all segments. It does not include hashes, the 120 # UEB, or other overhead. 121 122 # use the upload-side code to get this as accurate as possible 123 ht = IncompleteHashTree(N) 124 num_share_hashes = len(ht.needed_hashes(0, include_leaf=True)) 125 wbp = make_write_bucket_proxy(None, share_size, r["block_size"], 126 r["num_segments"], num_share_hashes, 0, 127 None) 128 self._fieldsize = wbp.fieldsize 129 self._fieldstruct = wbp.fieldstruct 130 self.guessed_offsets = wbp._offsets 131 132 # called by our client, the SegmentFetcher 133 def get_block(self, segnum): 134 """Add a block number to the list of requests. This will eventually 135 result in a fetch of the data necessary to validate the block, then 136 the block itself. The fetch order is generally 137 first-come-first-served, but requests may be answered out-of-order if 138 data becomes available sooner. 139 140 I return an EventStreamObserver, which has two uses. The first is to 141 call o.subscribe(), which gives me a place to send state changes and 142 eventually the data block. The second is o.cancel(), which removes 143 the request (if it is still active). 144 145 I will distribute the following events through my EventStreamObserver: 146 - state=OVERDUE: ?? I believe I should have had an answer by now. 147 You may want to ask another share instead. 148 - state=BADSEGNUM: the segnum you asked for is too large. I must 149 fetch a valid UEB before I can determine this, 150 so the notification is asynchronous 151 - state=COMPLETE, block=data: here is a valid block 152 - state=CORRUPT: this share contains corrupted data 153 - state=DEAD, f=Failure: the server reported an error, this share 154 is unusable 155 """ 156 log.msg("%s.get_block(%d)" % (repr(self), segnum), 157 level=log.NOISY, parent=self._lp, umid="RTo9MQ") 158 assert segnum >= 0 159 o = EventStreamObserver() 160 o.set_canceler(self, "_cancel_block_request") 161 for i,(segnum0,observers) in enumerate(self._requested_blocks): 162 if segnum0 == segnum: 163 observers.add(o) 164 break 165 else: 166 self._requested_blocks.append( (segnum, set([o])) ) 167 eventually(self.loop) 168 return o 169 170 def _cancel_block_request(self, o): 171 new_requests = [] 172 for e in self._requested_blocks: 173 (segnum0, observers) = e 174 observers.discard(o) 175 if observers: 176 new_requests.append(e) 177 self._requested_blocks = new_requests 178 179 # internal methods 180 def _active_segnum_and_observers(self): 181 if self._requested_blocks: 182 # we only retrieve information for one segment at a time, to 183 # minimize alacrity (first come, first served) 184 return self._requested_blocks[0] 185 return None, [] 186 187 def loop(self): 188 try: 189 # if any exceptions occur here, kill the download 190 log.msg("%s.loop, reqs=[%s], pending=%s, received=%s," 191 " unavailable=%s" % 192 (repr(self), 193 ",".join([str(req[0]) for req in self._requested_blocks]), 194 self._pending.dump(), self._received.dump(), 195 self._unavailable.dump() ), 196 level=log.NOISY, parent=self._lp, umid="BaL1zw") 197 self._do_loop() 198 # all exception cases call self._fail(), which clears self._alive 199 except (BadHashError, NotEnoughHashesError, LayoutInvalid), e: 200 # Abandon this share. We do this if we see corruption in the 201 # offset table, the UEB, or a hash tree. We don't abandon the 202 # whole share if we see corruption in a data block (we abandon 203 # just the one block, and still try to get data from other blocks 204 # on the same server). In theory, we could get good data from a 205 # share with a corrupt UEB (by first getting the UEB from some 206 # other share), or corrupt hash trees, but the logic to decide 207 # when this is safe is non-trivial. So for now, give up at the 208 # first sign of corruption. 209 # 210 # _satisfy_*() code which detects corruption should first call 211 # self._signal_corruption(), and then raise the exception. 212 log.msg(format="corruption detected in %(share)s", 213 share=repr(self), 214 level=log.UNUSUAL, parent=self._lp, umid="gWspVw") 215 self._fail(Failure(e), log.UNUSUAL) 216 except DataUnavailable, e: 217 # Abandon this share. 218 log.msg(format="need data that will never be available" 219 " from %s: pending=%s, received=%s, unavailable=%s" % 220 (repr(self), 221 self._pending.dump(), self._received.dump(), 222 self._unavailable.dump() ), 223 level=log.UNUSUAL, parent=self._lp, umid="F7yJnQ") 224 self._fail(Failure(e), log.UNUSUAL) 225 except BaseException: 226 self._fail(Failure()) 227 raise 228 log.msg("%s.loop done, reqs=[%s], pending=%s, received=%s," 229 " unavailable=%s" % 230 (repr(self), 231 ",".join([str(req[0]) for req in self._requested_blocks]), 232 self._pending.dump(), self._received.dump(), 233 self._unavailable.dump() ), 234 level=log.NOISY, parent=self._lp, umid="9lRaRA") 235 236 def _do_loop(self): 237 # we are (eventually) called after all state transitions: 238 # new segments added to self._requested_blocks 239 # new data received from servers (responses to our read() calls) 240 # impatience timer fires (server appears slow) 241 if not self._alive: 242 return 243 244 # First, consume all of the information that we currently have, for 245 # all the segments people currently want. 246 while self._get_satisfaction(): 247 pass 248 249 # When we get no satisfaction (from the data we've received so far), 250 # we determine what data we desire (to satisfy more requests). The 251 # number of segments is finite, so I can't get no satisfaction 252 # forever. 253 wanted, needed = self._desire() 254 255 # Finally, send out requests for whatever we need (desire minus 256 # have). You can't always get what you want, but if you try 257 # sometimes, you just might find, you get what you need. 258 self._send_requests(wanted + needed) 259 260 # and sometimes you can't even get what you need 261 disappointment = needed & self._unavailable 262 if len(disappointment): 263 self.had_corruption = True 264 raise DataUnavailable("need %s but will never get it" % 265 disappointment.dump()) 266 267 def _get_satisfaction(self): 268 # return True if we retired a data block, and should therefore be 269 # called again. Return False if we don't retire a data block (even if 270 # we do retire some other data, like hash chains). 271 272 if self.actual_offsets is None: 273 if not self._satisfy_offsets(): 274 # can't even look at anything without the offset table 275 return False 276 277 if not self._node.have_UEB: 278 if not self._satisfy_UEB(): 279 # can't check any hashes without the UEB 280 return False 281 self.actual_segment_size = self._node.segment_size # might be updated 282 assert self.actual_segment_size is not None 283 284 # knowing the UEB means knowing num_segments. Despite the redundancy, 285 # this is the best place to set this. CommonShare.set_numsegs will 286 # ignore duplicate calls. 287 assert self._node.num_segments is not None 288 cs = self._commonshare 289 cs.set_numsegs(self._node.num_segments) 290 291 segnum, observers = self._active_segnum_and_observers() 292 # if segnum is None, we don't really need to do anything (we have no 293 # outstanding readers right now), but we'll fill in the bits that 294 # aren't tied to any particular segment. 295 296 if segnum is not None and segnum >= self._node.num_segments: 297 for o in observers: 298 o.notify(state=BADSEGNUM) 299 self._requested_blocks.pop(0) 300 return True 301 302 if self._node.share_hash_tree.needed_hashes(self._shnum): 303 if not self._satisfy_share_hash_tree(): 304 # can't check block_hash_tree without a root 305 return False 306 307 if cs.need_block_hash_root(): 308 block_hash_root = self._node.share_hash_tree.get_leaf(self._shnum) 309 cs.set_block_hash_root(block_hash_root) 310 311 if segnum is None: 312 return False # we don't want any particular segment right now 313 314 # block_hash_tree 315 needed_hashes = self._commonshare.get_needed_block_hashes(segnum) 316 if needed_hashes: 317 if not self._satisfy_block_hash_tree(needed_hashes): 318 # can't check block without block_hash_tree 319 return False 320 321 # ciphertext_hash_tree 322 needed_hashes = self._node.get_needed_ciphertext_hashes(segnum) 323 if needed_hashes: 324 if not self._satisfy_ciphertext_hash_tree(needed_hashes): 325 # can't check decoded blocks without ciphertext_hash_tree 326 return False 327 328 # data blocks 329 return self._satisfy_data_block(segnum, observers) 330 331 def _satisfy_offsets(self): 332 version_s = self._received.get(0, 4) 333 if version_s is None: 334 return False 335 (version,) = struct.unpack(">L", version_s) 336 if version == 1: 337 table_start = 0x0c 338 self._fieldsize = 0x4 339 self._fieldstruct = "L" 340 elif version == 2: 341 table_start = 0x14 342 self._fieldsize = 0x8 343 self._fieldstruct = "Q" 344 else: 345 self.had_corruption = True 346 raise LayoutInvalid("unknown version %d (I understand 1 and 2)" 347 % version) 348 offset_table_size = 6 * self._fieldsize 349 table_s = self._received.pop(table_start, offset_table_size) 350 if table_s is None: 351 return False 352 fields = struct.unpack(">"+6*self._fieldstruct, table_s) 353 offsets = {} 354 for i,field in enumerate(['data', 355 'plaintext_hash_tree', # UNUSED 356 'crypttext_hash_tree', 357 'block_hashes', 358 'share_hashes', 359 'uri_extension', 360 ] ): 361 offsets[field] = fields[i] 362 self.actual_offsets = offsets 363 log.msg("actual offsets: data=%d, plaintext_hash_tree=%d, crypttext_hash_tree=%d, block_hashes=%d, share_hashes=%d, uri_extension=%d" % tuple(fields)) 364 self._received.remove(0, 4) # don't need this anymore 365 366 # validate the offsets a bit 367 share_hashes_size = offsets["uri_extension"] - offsets["share_hashes"] 368 if share_hashes_size < 0 or share_hashes_size % (2+HASH_SIZE) != 0: 369 # the share hash chain is stored as (hashnum,hash) pairs 370 self.had_corruption = True 371 raise LayoutInvalid("share hashes malformed -- should be a" 372 " multiple of %d bytes -- not %d" % 373 (2+HASH_SIZE, share_hashes_size)) 374 block_hashes_size = offsets["share_hashes"] - offsets["block_hashes"] 375 if block_hashes_size < 0 or block_hashes_size % (HASH_SIZE) != 0: 376 # the block hash tree is stored as a list of hashes 377 self.had_corruption = True 378 raise LayoutInvalid("block hashes malformed -- should be a" 379 " multiple of %d bytes -- not %d" % 380 (HASH_SIZE, block_hashes_size)) 381 # we only look at 'crypttext_hash_tree' if the UEB says we're 382 # actually using it. Same with 'plaintext_hash_tree'. This gives us 383 # some wiggle room: a place to stash data for later extensions. 384 385 return True 386 387 def _satisfy_UEB(self): 388 o = self.actual_offsets 389 fsize = self._fieldsize 390 UEB_length_s = self._received.get(o["uri_extension"], fsize) 391 if not UEB_length_s: 392 return False 393 (UEB_length,) = struct.unpack(">"+self._fieldstruct, UEB_length_s) 394 UEB_s = self._received.pop(o["uri_extension"]+fsize, UEB_length) 395 if not UEB_s: 396 return False 397 self._received.remove(o["uri_extension"], fsize) 398 try: 399 self._node.validate_and_store_UEB(UEB_s) 400 return True 401 except (LayoutInvalid, BadHashError), e: 402 # TODO: if this UEB was bad, we'll keep trying to validate it 403 # over and over again. Only log.err on the first one, or better 404 # yet skip all but the first 405 f = Failure(e) 406 self._signal_corruption(f, o["uri_extension"], fsize+UEB_length) 407 self.had_corruption = True 408 raise 409 410 def _satisfy_share_hash_tree(self): 411 # the share hash chain is stored as (hashnum,hash) tuples, so you 412 # can't fetch just the pieces you need, because you don't know 413 # exactly where they are. So fetch everything, and parse the results 414 # later. 415 o = self.actual_offsets 416 hashlen = o["uri_extension"] - o["share_hashes"] 417 assert hashlen % (2+HASH_SIZE) == 0 418 hashdata = self._received.get(o["share_hashes"], hashlen) 419 if not hashdata: 420 return False 421 share_hashes = {} 422 for i in range(0, hashlen, 2+HASH_SIZE): 423 (hashnum,) = struct.unpack(">H", hashdata[i:i+2]) 424 hashvalue = hashdata[i+2:i+2+HASH_SIZE] 425 share_hashes[hashnum] = hashvalue 426 # TODO: if they give us an empty set of hashes, 427 # process_share_hashes() won't fail. We must ensure that this 428 # situation doesn't allow unverified shares through. Manual testing 429 # shows that set_block_hash_root() throws an assert because an 430 # internal node is None instead of an actual hash, but we want 431 # something better. It's probably best to add a method to 432 # IncompleteHashTree which takes a leaf number and raises an 433 # exception unless that leaf is present and fully validated. 434 try: 435 self._node.process_share_hashes(share_hashes) 436 # adds to self._node.share_hash_tree 437 except (BadHashError, NotEnoughHashesError), e: 438 f = Failure(e) 439 self._signal_corruption(f, o["share_hashes"], hashlen) 440 self.had_corruption = True 441 raise 442 self._received.remove(o["share_hashes"], hashlen) 443 return True 444 445 def _signal_corruption(self, f, start, offset): 446 # there was corruption somewhere in the given range 447 reason = "corruption in share[%d-%d): %s" % (start, start+offset, 448 str(f.value)) 449 self._rref.callRemoteOnly("advise_corrupt_share", reason) 450 451 def _satisfy_block_hash_tree(self, needed_hashes): 452 o_bh = self.actual_offsets["block_hashes"] 453 block_hashes = {} 454 for hashnum in needed_hashes: 455 hashdata = self._received.get(o_bh+hashnum*HASH_SIZE, HASH_SIZE) 456 if hashdata: 457 block_hashes[hashnum] = hashdata 458 else: 459 return False # missing some hashes 460 # note that we don't submit any hashes to the block_hash_tree until 461 # we've gotten them all, because the hash tree will throw an 462 # exception if we only give it a partial set (which it therefore 463 # cannot validate) 464 try: 465 self._commonshare.process_block_hashes(block_hashes) 466 except (BadHashError, NotEnoughHashesError), e: 467 f = Failure(e) 468 hashnums = ",".join([str(n) for n in sorted(block_hashes.keys())]) 469 log.msg(format="hash failure in block_hashes=(%(hashnums)s)," 470 " from %(share)s", 471 hashnums=hashnums, shnum=self._shnum, share=repr(self), 472 failure=f, level=log.WEIRD, parent=self._lp, umid="yNyFdA") 473 hsize = max(0, max(needed_hashes)) * HASH_SIZE 474 self._signal_corruption(f, o_bh, hsize) 475 self.had_corruption = True 476 raise 477 for hashnum in needed_hashes: 478 self._received.remove(o_bh+hashnum*HASH_SIZE, HASH_SIZE) 479 return True 480 481 def _satisfy_ciphertext_hash_tree(self, needed_hashes): 482 start = self.actual_offsets["crypttext_hash_tree"] 483 hashes = {} 484 for hashnum in needed_hashes: 485 hashdata = self._received.get(start+hashnum*HASH_SIZE, HASH_SIZE) 486 if hashdata: 487 hashes[hashnum] = hashdata 488 else: 489 return False # missing some hashes 490 # we don't submit any hashes to the ciphertext_hash_tree until we've 491 # gotten them all 492 try: 493 self._node.process_ciphertext_hashes(hashes) 494 except (BadHashError, NotEnoughHashesError), e: 495 f = Failure(e) 496 hashnums = ",".join([str(n) for n in sorted(hashes.keys())]) 497 log.msg(format="hash failure in ciphertext_hashes=(%(hashnums)s)," 498 " from %(share)s", 499 hashnums=hashnums, share=repr(self), failure=f, 500 level=log.WEIRD, parent=self._lp, umid="iZI0TA") 501 hsize = max(0, max(needed_hashes))*HASH_SIZE 502 self._signal_corruption(f, start, hsize) 503 self.had_corruption = True 504 raise 505 for hashnum in needed_hashes: 506 self._received.remove(start+hashnum*HASH_SIZE, HASH_SIZE) 507 return True 508 509 def _satisfy_data_block(self, segnum, observers): 510 tail = (segnum == self._node.num_segments-1) 511 datastart = self.actual_offsets["data"] 512 blockstart = datastart + segnum * self._node.block_size 513 blocklen = self._node.block_size 514 if tail: 515 blocklen = self._node.tail_block_size 516 517 block = self._received.pop(blockstart, blocklen) 518 if not block: 519 log.msg("no data for block %s (want [%d:+%d])" % (repr(self), 520 blockstart, blocklen)) 521 return False 522 log.msg(format="%(share)s._satisfy_data_block [%(start)d:+%(length)d]", 523 share=repr(self), start=blockstart, length=blocklen, 524 level=log.NOISY, parent=self._lp, umid="uTDNZg") 525 # this block is being retired, either as COMPLETE or CORRUPT, since 526 # no further data reads will help 527 assert self._requested_blocks[0][0] == segnum 528 try: 529 self._commonshare.check_block(segnum, block) 530 # hurrah, we have a valid block. Deliver it. 531 for o in observers: 532 # goes to SegmentFetcher._block_request_activity 533 o.notify(state=COMPLETE, block=block) 534 except (BadHashError, NotEnoughHashesError), e: 535 # rats, we have a corrupt block. Notify our clients that they 536 # need to look elsewhere, and advise the server. Unlike 537 # corruption in other parts of the share, this doesn't cause us 538 # to abandon the whole share. 539 f = Failure(e) 540 log.msg(format="hash failure in block %(segnum)d, from %(share)s", 541 segnum=segnum, share=repr(self), failure=f, 542 level=log.WEIRD, parent=self._lp, umid="mZjkqA") 543 for o in observers: 544 o.notify(state=CORRUPT) 545 self._signal_corruption(f, blockstart, blocklen) 546 self.had_corruption = True 547 # in either case, we've retired this block 548 self._requested_blocks.pop(0) 549 # popping the request keeps us from turning around and wanting the 550 # block again right away 551 return True # got satisfaction 552 553 def _desire(self): 554 segnum, observers = self._active_segnum_and_observers() # maybe None 555 556 # 'want_it' is for data we merely want: we know that we don't really 557 # need it. This includes speculative reads, like the first 1KB of the 558 # share (for the offset table) and the first 2KB of the UEB. 559 # 560 # 'need_it' is for data that, if we have the real offset table, we'll 561 # need. If we are only guessing at the offset table, it's merely 562 # wanted. (The share is abandoned if we can't get data that we really 563 # need). 564 # 565 # 'gotta_gotta_have_it' is for data that we absolutely need, 566 # independent of whether we're still guessing about the offset table: 567 # the version number and the offset table itself. 568 # 569 # Mr. Popeil, I'm in trouble, need your assistance on the double. Aww.. 570 571 desire = Spans(), Spans(), Spans() 572 (want_it, need_it, gotta_gotta_have_it) = desire 573 574 self.actual_segment_size = self._node.segment_size # might be updated 575 o = self.actual_offsets or self.guessed_offsets 576 segsize = self.actual_segment_size or self.guessed_segment_size 577 r = self._node._calculate_sizes(segsize) 578 579 if not self.actual_offsets: 580 # all _desire functions add bits to the three desire[] spans 581 self._desire_offsets(desire) 582 583 # we can use guessed offsets as long as this server tolerates 584 # overrun. Otherwise, we must wait for the offsets to arrive before 585 # we try to read anything else. 586 if self.actual_offsets or self._overrun_ok: 587 if not self._node.have_UEB: 588 self._desire_UEB(desire, o) 589 # They might ask for a segment that doesn't look right. 590 # _satisfy() will catch+reject bad segnums once we know the UEB 591 # (and therefore segsize and numsegs), so we'll only fail this 592 # test if we're still guessing. We want to avoid asking the 593 # hashtrees for needed_hashes() for bad segnums. So don't enter 594 # _desire_hashes or _desire_data unless the segnum looks 595 # reasonable. 596 if segnum < r["num_segments"]: 597 # XXX somehow we're getting here for sh5. we don't yet know 598 # the actual_segment_size, we're still working off the guess. 599 # the ciphertext_hash_tree has been corrected, but the 600 # commonshare._block_hash_tree is still in the guessed state. 601 self._desire_share_hashes(desire, o) 602 if segnum is not None: 603 self._desire_block_hashes(desire, o, segnum) 604 self._desire_data(desire, o, r, segnum, segsize) 605 else: 606 log.msg("_desire: segnum(%d) looks wrong (numsegs=%d)" 607 % (segnum, r["num_segments"]), 608 level=log.UNUSUAL, parent=self._lp, umid="tuYRQQ") 609 610 log.msg("end _desire: want_it=%s need_it=%s gotta=%s" 611 % (want_it.dump(), need_it.dump(), gotta_gotta_have_it.dump())) 612 if self.actual_offsets: 613 return (want_it, need_it+gotta_gotta_have_it) 614 else: 615 return (want_it+need_it, gotta_gotta_have_it) 616 617 def _desire_offsets(self, desire): 618 (want_it, need_it, gotta_gotta_have_it) = desire 619 if self._overrun_ok: 620 # easy! this includes version number, sizes, and offsets 621 want_it.add(0, 1024) 622 return 623 624 # v1 has an offset table that lives [0x0,0x24). v2 lives [0x0,0x44). 625 # To be conservative, only request the data that we know lives there, 626 # even if that means more roundtrips. 627 628 gotta_gotta_have_it.add(0, 4) # version number, always safe 629 version_s = self._received.get(0, 4) 630 if not version_s: 631 return 632 (version,) = struct.unpack(">L", version_s) 633 # The code in _satisfy_offsets will have checked this version 634 # already. There is no code path to get this far with version>2. 635 assert 1 <= version <= 2, "can't get here, version=%d" % version 636 if version == 1: 637 table_start = 0x0c 638 fieldsize = 0x4 639 elif version == 2: 640 table_start = 0x14 641 fieldsize = 0x8 642 offset_table_size = 6 * fieldsize 643 gotta_gotta_have_it.add(table_start, offset_table_size) 644 645 def _desire_UEB(self, desire, o): 646 (want_it, need_it, gotta_gotta_have_it) = desire 647 648 # UEB data is stored as (length,data). 649 if self._overrun_ok: 650 # We can pre-fetch 2kb, which should probably cover it. If it 651 # turns out to be larger, we'll come back here later with a known 652 # length and fetch the rest. 653 want_it.add(o["uri_extension"], 2048) 654 # now, while that is probably enough to fetch the whole UEB, it 655 # might not be, so we need to do the next few steps as well. In 656 # most cases, the following steps will not actually add anything 657 # to need_it 658 659 need_it.add(o["uri_extension"], self._fieldsize) 660 # only use a length if we're sure it's correct, otherwise we'll 661 # probably fetch a huge number 662 if not self.actual_offsets: 663 return 664 UEB_length_s = self._received.get(o["uri_extension"], self._fieldsize) 665 if UEB_length_s: 666 (UEB_length,) = struct.unpack(">"+self._fieldstruct, UEB_length_s) 667 # we know the length, so make sure we grab everything 668 need_it.add(o["uri_extension"]+self._fieldsize, UEB_length) 669 670 def _desire_share_hashes(self, desire, o): 671 (want_it, need_it, gotta_gotta_have_it) = desire 672 673 if self._node.share_hash_tree.needed_hashes(self._shnum): 674 hashlen = o["uri_extension"] - o["share_hashes"] 675 need_it.add(o["share_hashes"], hashlen) 676 677 def _desire_block_hashes(self, desire, o, segnum): 678 (want_it, need_it, gotta_gotta_have_it) = desire 679 680 # block hash chain 681 for hashnum in self._commonshare.get_needed_block_hashes(segnum): 682 need_it.add(o["block_hashes"]+hashnum*HASH_SIZE, HASH_SIZE) 683 684 # ciphertext hash chain 685 for hashnum in self._node.get_needed_ciphertext_hashes(segnum): 686 need_it.add(o["crypttext_hash_tree"]+hashnum*HASH_SIZE, HASH_SIZE) 687 688 def _desire_data(self, desire, o, r, segnum, segsize): 689 (want_it, need_it, gotta_gotta_have_it) = desire 690 tail = (segnum == r["num_segments"]-1) 691 datastart = o["data"] 692 blockstart = datastart + segnum * r["block_size"] 693 blocklen = r["block_size"] 694 if tail: 695 blocklen = r["tail_block_size"] 696 need_it.add(blockstart, blocklen) 697 698 def _send_requests(self, desired): 699 ask = desired - self._pending - self._received.get_spans() 700 log.msg("%s._send_requests, desired=%s, pending=%s, ask=%s" % 701 (repr(self), desired.dump(), self._pending.dump(), ask.dump()), 702 level=log.NOISY, parent=self._lp, umid="E94CVA") 703 # XXX At one time, this code distinguished between data blocks and 704 # hashes, and made sure to send (small) requests for hashes before 705 # sending (big) requests for blocks. The idea was to make sure that 706 # all hashes arrive before the blocks, so the blocks can be consumed 707 # and released in a single turn. I removed this for simplicity. 708 # Reconsider the removal: maybe bring it back. 709 ds = self._download_status 710 711 for (start, length) in ask: 712 # TODO: quantize to reasonably-large blocks 713 self._pending.add(start, length) 714 lp = log.msg(format="%(share)s._send_request" 715 " [%(start)d:+%(length)d]", 716 share=repr(self), 717 start=start, length=length, 718 level=log.NOISY, parent=self._lp, umid="sgVAyA") 719 req_ev = ds.add_request_sent(self._peerid, self._shnum, 720 start, length, now()) 721 d = self._send_request(start, length) 722 d.addCallback(self._got_data, start, length, req_ev, lp) 723 d.addErrback(self._got_error, start, length, req_ev, lp) 724 d.addCallback(self._trigger_loop) 725 d.addErrback(lambda f: 726 log.err(format="unhandled error during send_request", 727 failure=f, parent=self._lp, 728 level=log.WEIRD, umid="qZu0wg")) 729 730 def _send_request(self, start, length): 731 return self._rref.callRemote("read", start, length) 732 733 def _got_data(self, data, start, length, req_ev, lp): 734 req_ev.finished(len(data), now()) 735 if not self._alive: 736 return 737 log.msg(format="%(share)s._got_data [%(start)d:+%(length)d] -> %(datalen)d", 738 share=repr(self), start=start, length=length, datalen=len(data), 739 level=log.NOISY, parent=lp, umid="5Qn6VQ") 740 self._pending.remove(start, length) 741 self._received.add(start, data) 742 743 # if we ask for [a:c], and we get back [a:b] (b<c), that means we're 744 # never going to get [b:c]. If we really need that data, this block 745 # will never complete. The easiest way to get into this situation is 746 # to hit a share with a corrupted offset table, or one that's somehow 747 # been truncated. On the other hand, when overrun_ok is true, we ask 748 # for data beyond the end of the share all the time (it saves some 749 # RTT when we don't know the length of the share ahead of time). So 750 # not every asked-for-but-not-received byte is fatal. 751 if len(data) < length: 752 self._unavailable.add(start+len(data), length-len(data)) 753 754 # XXX if table corruption causes our sections to overlap, then one 755 # consumer (i.e. block hash tree) will pop/remove the data that 756 # another consumer (i.e. block data) mistakenly thinks it needs. It 757 # won't ask for that data again, because the span is in 758 # self._requested. But that span won't be in self._unavailable 759 # because we got it back from the server. TODO: handle this properly 760 # (raise DataUnavailable). Then add sanity-checking 761 # no-overlaps-allowed tests to the offset-table unpacking code to 762 # catch this earlier. XXX 763 764 # accumulate a wanted/needed span (not as self._x, but passed into 765 # desire* functions). manage a pending/in-flight list. when the 766 # requests are sent out, empty/discard the wanted/needed span and 767 # populate/augment the pending list. when the responses come back, 768 # augment either received+data or unavailable. 769 770 # if a corrupt offset table results in double-usage, we'll send 771 # double requests. 772 773 # the wanted/needed span is only "wanted" for the first pass. Once 774 # the offset table arrives, it's all "needed". 775 776 def _got_error(self, f, start, length, req_ev, lp): 777 req_ev.finished("error", now()) 778 log.msg(format="error requesting %(start)d+%(length)d" 779 " from %(server)s for si %(si)s", 780 start=start, length=length, 781 server=self._peerid_s, si=self._si_prefix, 782 failure=f, parent=lp, level=log.UNUSUAL, umid="BZgAJw") 783 # retire our observers, assuming we won't be able to make any 784 # further progress 785 self._fail(f, log.UNUSUAL) 786 787 def _trigger_loop(self, res): 788 if self._alive: 789 eventually(self.loop) 790 return res 791 792 def _fail(self, f, level=log.WEIRD): 793 log.msg(format="abandoning %(share)s", 794 share=repr(self), failure=f, 795 level=level, parent=self._lp, umid="JKM2Og") 796 self._alive = False 797 for (segnum, observers) in self._requested_blocks: 798 for o in observers: 799 o.notify(state=DEAD, f=f) 800 801 802 class CommonShare: 803 """I hold data that is common across all instances of a single share, 804 like sh2 on both servers A and B. This is just the block hash tree. 805 """ 806 def __init__(self, guessed_numsegs, si_prefix, shnum, logparent): 807 self.si_prefix = si_prefix 808 self.shnum = shnum 809 # in the beginning, before we have the real UEB, we can only guess at 810 # the number of segments. But we want to ask for block hashes early. 811 # So if we're asked for which block hashes are needed before we know 812 # numsegs for sure, we return a guess. 813 self._block_hash_tree = IncompleteHashTree(guessed_numsegs) 814 self._know_numsegs = False 815 self._logparent = logparent 816 817 def set_numsegs(self, numsegs): 818 if self._know_numsegs: 819 return 820 self._block_hash_tree = IncompleteHashTree(numsegs) 821 self._know_numsegs = True 822 823 def need_block_hash_root(self): 824 return bool(not self._block_hash_tree[0]) 825 826 def set_block_hash_root(self, roothash): 827 assert self._know_numsegs 828 self._block_hash_tree.set_hashes({0: roothash}) 829 830 def get_needed_block_hashes(self, segnum): 831 # XXX: include_leaf=True needs thought: how did the old downloader do 832 # it? I think it grabbed *all* block hashes and set them all at once. 833 # Since we want to fetch less data, we either need to fetch the leaf 834 # too, or wait to set the block hashes until we've also received the 835 # block itself, so we can hash it too, and set the chain+leaf all at 836 # the same time. 837 return self._block_hash_tree.needed_hashes(segnum, include_leaf=True) 838 839 def process_block_hashes(self, block_hashes): 840 assert self._know_numsegs 841 # this may raise BadHashError or NotEnoughHashesError 842 self._block_hash_tree.set_hashes(block_hashes) 843 844 def check_block(self, segnum, block): 845 assert self._know_numsegs 846 h = hashutil.block_hash(block) 847 # this may raise BadHashError or NotEnoughHashesError 848 self._block_hash_tree.set_hashes(leaves={segnum: h}) -
new file src/allmydata/immutable/downloader/status.py
diff --git a/src/allmydata/immutable/downloader/status.py b/src/allmydata/immutable/downloader/status.py new file mode 100644 index 0000000..5d60db0
- + 1 2 import itertools 3 from zope.interface import implements 4 from allmydata.interfaces import IDownloadStatus 5 6 class RequestEvent: 7 def __init__(self, download_status, tag): 8 self._download_status = download_status 9 self._tag = tag 10 def finished(self, received, when): 11 self._download_status.add_request_finished(self._tag, received, when) 12 13 class DYHBEvent: 14 def __init__(self, download_status, tag): 15 self._download_status = download_status 16 self._tag = tag 17 def finished(self, shnums, when): 18 self._download_status.add_dyhb_finished(self._tag, shnums, when) 19 20 class ReadEvent: 21 def __init__(self, download_status, tag): 22 self._download_status = download_status 23 self._tag = tag 24 def update(self, bytes, decrypttime, pausetime): 25 self._download_status.update_read_event(self._tag, bytes, 26 decrypttime, pausetime) 27 def finished(self, finishtime): 28 self._download_status.finish_read_event(self._tag, finishtime) 29 30 class DownloadStatus: 31 # There is one DownloadStatus for each CiphertextFileNode. The status 32 # object will keep track of all activity for that node. 33 implements(IDownloadStatus) 34 statusid_counter = itertools.count(0) 35 36 def __init__(self, storage_index, size): 37 self.storage_index = storage_index 38 self.size = size 39 self.counter = self.statusid_counter.next() 40 self.helper = False 41 self.started = None 42 # self.dyhb_requests tracks "do you have a share" requests and 43 # responses. It maps serverid to a tuple of: 44 # send time 45 # tuple of response shnums (None if response hasn't arrived, "error") 46 # response time (None if response hasn't arrived yet) 47 self.dyhb_requests = {} 48 49 # self.requests tracks share-data requests and responses. It maps 50 # serverid to a tuple of: 51 # shnum, 52 # start,length, (of data requested) 53 # send time 54 # response length (None if reponse hasn't arrived yet, or "error") 55 # response time (None if response hasn't arrived) 56 self.requests = {} 57 58 # self.segment_events tracks segment requests and delivery. It is a 59 # list of: 60 # type ("request", "delivery", "error") 61 # segment number 62 # event time 63 # segment start (file offset of first byte, None except in "delivery") 64 # segment length (only in "delivery") 65 # time spent in decode (only in "delivery") 66 self.segment_events = [] 67 68 # self.read_events tracks read() requests. It is a list of: 69 # start,length (of data requested) 70 # request time 71 # finish time (None until finished) 72 # bytes returned (starts at 0, grows as segments are delivered) 73 # time spent in decrypt (None for ciphertext-only reads) 74 # time spent paused 75 self.read_events = [] 76 77 self.known_shares = [] # (serverid, shnum) 78 self.problems = [] 79 80 81 def add_dyhb_sent(self, serverid, when): 82 r = (when, None, None) 83 if serverid not in self.dyhb_requests: 84 self.dyhb_requests[serverid] = [] 85 self.dyhb_requests[serverid].append(r) 86 tag = (serverid, len(self.dyhb_requests[serverid])-1) 87 return DYHBEvent(self, tag) 88 89 def add_dyhb_finished(self, tag, shnums, when): 90 # received="error" on error, else tuple(shnums) 91 (serverid, index) = tag 92 r = self.dyhb_requests[serverid][index] 93 (sent, _, _) = r 94 r = (sent, shnums, when) 95 self.dyhb_requests[serverid][index] = r 96 97 def add_request_sent(self, serverid, shnum, start, length, when): 98 r = (shnum, start, length, when, None, None) 99 if serverid not in self.requests: 100 self.requests[serverid] = [] 101 self.requests[serverid].append(r) 102 tag = (serverid, len(self.requests[serverid])-1) 103 return RequestEvent(self, tag) 104 105 def add_request_finished(self, tag, received, when): 106 # received="error" on error, else len(data) 107 (serverid, index) = tag 108 r = self.requests[serverid][index] 109 (shnum, start, length, sent, _, _) = r 110 r = (shnum, start, length, sent, received, when) 111 self.requests[serverid][index] = r 112 113 def add_segment_request(self, segnum, when): 114 if self.started is None: 115 self.started = when 116 r = ("request", segnum, when, None, None, None) 117 self.segment_events.append(r) 118 def add_segment_delivery(self, segnum, when, start, length, decodetime): 119 r = ("delivery", segnum, when, start, length, decodetime) 120 self.segment_events.append(r) 121 def add_segment_error(self, segnum, when): 122 r = ("error", segnum, when, None, None, None) 123 self.segment_events.append(r) 124 125 def add_read_event(self, start, length, when): 126 if self.started is None: 127 self.started = when 128 r = (start, length, when, None, 0, 0, 0) 129 self.read_events.append(r) 130 tag = len(self.read_events)-1 131 return ReadEvent(self, tag) 132 def update_read_event(self, tag, bytes_d, decrypt_d, paused_d): 133 r = self.read_events[tag] 134 (start, length, requesttime, finishtime, bytes, decrypt, paused) = r 135 bytes += bytes_d 136 decrypt += decrypt_d 137 paused += paused_d 138 r = (start, length, requesttime, finishtime, bytes, decrypt, paused) 139 self.read_events[tag] = r 140 def finish_read_event(self, tag, finishtime): 141 r = self.read_events[tag] 142 (start, length, requesttime, _, bytes, decrypt, paused) = r 143 r = (start, length, requesttime, finishtime, bytes, decrypt, paused) 144 self.read_events[tag] = r 145 146 def add_known_share(self, serverid, shnum): 147 self.known_shares.append( (serverid, shnum) ) 148 149 def add_problem(self, p): 150 self.problems.append(p) 151 152 # IDownloadStatus methods 153 def get_counter(self): 154 return self.counter 155 def get_storage_index(self): 156 return self.storage_index 157 def get_size(self): 158 return self.size 159 def get_status(self): 160 return "not impl yet" # TODO 161 def get_progress(self): 162 return 0.1 # TODO 163 def using_helper(self): 164 return False 165 def get_active(self): 166 return False # TODO 167 def get_started(self): 168 return self.started 169 def get_results(self): 170 return None # TODO -
src/allmydata/immutable/filenode.py
diff --git a/src/allmydata/immutable/filenode.py b/src/allmydata/immutable/filenode.py index 70044a7..1d5be94 100644
a b 1 import copy, os.path, stat 2 from cStringIO import StringIO 1 2 import binascii 3 import copy 4 import time 5 now = time.time 3 6 from zope.interface import implements 4 7 from twisted.internet import defer 5 from twisted.internet.interfaces import IPushProducer 6 from twisted.protocols import basic 7 from foolscap.api import eventually 8 from allmydata.interfaces import IImmutableFileNode, ICheckable, \ 9 IDownloadTarget, IUploadResults 10 from allmydata.util import dictutil, log, base32 11 from allmydata.uri import CHKFileURI, LiteralFileURI 12 from allmydata.immutable.checker import Checker 13 from allmydata.check_results import CheckResults, CheckAndRepairResults 14 from allmydata.immutable.repairer import Repairer 15 from allmydata.immutable import download 16 17 class _ImmutableFileNodeBase(object): 18 implements(IImmutableFileNode, ICheckable) 19 20 def get_write_uri(self): 21 return None 22 23 def get_readonly_uri(self): 24 return self.get_uri() 25 26 def is_mutable(self): 27 return False 28 29 def is_readonly(self): 30 return True 31 32 def is_unknown(self): 33 return False 34 35 def is_allowed_in_immutable_directory(self): 36 return True 37 38 def raise_error(self): 39 pass 40 41 def __hash__(self): 42 return self.u.__hash__() 43 def __eq__(self, other): 44 if isinstance(other, _ImmutableFileNodeBase): 45 return self.u.__eq__(other.u) 46 else: 47 return False 48 def __ne__(self, other): 49 if isinstance(other, _ImmutableFileNodeBase): 50 return self.u.__eq__(other.u) 51 else: 52 return True 53 54 class PortionOfFile: 55 # like a list slice (things[2:14]), but for a file on disk 56 def __init__(self, fn, offset=0, size=None): 57 self.f = open(fn, "rb") 58 self.f.seek(offset) 59 self.bytes_left = size 60 61 def read(self, size=None): 62 # bytes_to_read = min(size, self.bytes_left), but None>anything 63 if size is None: 64 bytes_to_read = self.bytes_left 65 elif self.bytes_left is None: 66 bytes_to_read = size 67 else: 68 bytes_to_read = min(size, self.bytes_left) 69 data = self.f.read(bytes_to_read) 70 if self.bytes_left is not None: 71 self.bytes_left -= len(data) 72 return data 73 74 class DownloadCache: 75 implements(IDownloadTarget) 76 77 def __init__(self, filecap, storage_index, downloader, 78 cachedirectorymanager): 79 self._downloader = downloader 80 self._uri = filecap 81 self._storage_index = storage_index 82 self.milestones = set() # of (offset,size,Deferred) 83 self.cachedirectorymanager = cachedirectorymanager 84 self.cachefile = None 85 self.download_in_progress = False 86 # five states: 87 # new ImmutableFileNode, no downloads ever performed 88 # new ImmutableFileNode, leftover file (partial) 89 # new ImmutableFileNode, leftover file (whole) 90 # download in progress, not yet complete 91 # download complete 92 93 def when_range_available(self, offset, size): 94 assert isinstance(offset, (int,long)) 95 assert isinstance(size, (int,long)) 96 97 d = defer.Deferred() 98 self.milestones.add( (offset,size,d) ) 99 self._check_milestones() 100 if self.milestones and not self.download_in_progress: 101 self.download_in_progress = True 102 log.msg(format=("immutable filenode read [%(si)s]: " + 103 "starting download"), 104 si=base32.b2a(self._storage_index), 105 umid="h26Heg", level=log.OPERATIONAL) 106 d2 = self._downloader.download(self._uri, self) 107 d2.addBoth(self._download_done) 108 d2.addErrback(self._download_failed) 109 d2.addErrback(log.err, umid="cQaM9g") 110 return d 111 112 def read(self, consumer, offset, size): 113 assert offset+size <= self.get_filesize() 114 if not self.cachefile: 115 self.cachefile = self.cachedirectorymanager.get_file(base32.b2a(self._storage_index)) 116 f = PortionOfFile(self.cachefile.get_filename(), offset, size) 117 d = basic.FileSender().beginFileTransfer(f, consumer) 118 d.addCallback(lambda lastSent: consumer) 119 return d 120 121 def _download_done(self, res): 122 # clear download_in_progress, so failed downloads can be re-tried 123 self.download_in_progress = False 124 return res 125 126 def _download_failed(self, f): 127 # tell anyone who's waiting that we failed 128 for m in self.milestones: 129 (offset,size,d) = m 130 eventually(d.errback, f) 131 self.milestones.clear() 132 133 def _check_milestones(self): 134 current_size = self.get_filesize() 135 for m in list(self.milestones): 136 (offset,size,d) = m 137 if offset+size <= current_size: 138 log.msg(format=("immutable filenode read [%(si)s] " + 139 "%(offset)d+%(size)d vs %(filesize)d: " + 140 "done"), 141 si=base32.b2a(self._storage_index), 142 offset=offset, size=size, filesize=current_size, 143 umid="nuedUg", level=log.NOISY) 144 self.milestones.discard(m) 145 eventually(d.callback, None) 146 else: 147 log.msg(format=("immutable filenode read [%(si)s] " + 148 "%(offset)d+%(size)d vs %(filesize)d: " + 149 "still waiting"), 150 si=base32.b2a(self._storage_index), 151 offset=offset, size=size, filesize=current_size, 152 umid="8PKOhg", level=log.NOISY) 153 154 def get_filesize(self): 155 if not self.cachefile: 156 self.cachefile = self.cachedirectorymanager.get_file(base32.b2a(self._storage_index)) 157 try: 158 filesize = os.stat(self.cachefile.get_filename())[stat.ST_SIZE] 159 except OSError: 160 filesize = 0 161 return filesize 162 163 164 def open(self, size): 165 if not self.cachefile: 166 self.cachefile = self.cachedirectorymanager.get_file(base32.b2a(self._storage_index)) 167 self.f = open(self.cachefile.get_filename(), "wb") 168 169 def write(self, data): 170 self.f.write(data) 171 self._check_milestones() 172 173 def close(self): 174 self.f.close() 175 self._check_milestones() 176 177 def fail(self, why): 178 pass 179 def register_canceller(self, cb): 180 pass 181 def finish(self): 182 return None 183 # The following methods are just because the target might be a 184 # repairer.DownUpConnector, and just because the current CHKUpload object 185 # expects to find the storage index and encoding parameters in its 186 # Uploadable. 187 def set_storageindex(self, storageindex): 188 pass 189 def set_encodingparams(self, encodingparams): 190 pass 8 from twisted.internet.interfaces import IConsumer 191 9 10 from allmydata.interfaces import IImmutableFileNode, IUploadResults 11 from allmydata import uri 12 from allmydata.check_results import CheckResults, CheckAndRepairResults 13 from allmydata.util.dictutil import DictOfSets 14 from pycryptopp.cipher.aes import AES 192 15 193 class ImmutableFileNode(_ImmutableFileNodeBase, log.PrefixingLogMixin): 194 def __init__(self, filecap, storage_broker, secret_holder, 195 downloader, history, cachedirectorymanager): 196 assert isinstance(filecap, CHKFileURI) 197 self.u = filecap 16 # local imports 17 from allmydata.immutable.checker import Checker 18 from allmydata.immutable.repairer import Repairer 19 from allmydata.immutable.downloader.node import DownloadNode 20 from allmydata.immutable.downloader.status import DownloadStatus 21 22 class CiphertextFileNode: 23 def __init__(self, verifycap, storage_broker, secret_holder, 24 terminator, history, download_status=None): 25 assert isinstance(verifycap, uri.CHKFileVerifierURI) 26 self._verifycap = verifycap 198 27 self._storage_broker = storage_broker 199 28 self._secret_holder = secret_holder 200 self._downloader = downloader 201 self._history = history 202 storage_index = self.get_storage_index() 203 self.download_cache = DownloadCache(filecap, storage_index, downloader, 204 cachedirectorymanager) 205 prefix = self.u.get_verify_cap().to_string() 206 log.PrefixingLogMixin.__init__(self, "allmydata.immutable.filenode", prefix=prefix) 207 self.log("starting", level=log.OPERATIONAL) 29 if download_status is None: 30 ds = DownloadStatus(verifycap.storage_index, verifycap.size) 31 if history: 32 history.add_download(ds) 33 download_status = ds 34 self._node = DownloadNode(verifycap, storage_broker, secret_holder, 35 terminator, history, download_status) 36 37 def read(self, consumer, offset=0, size=None, read_ev=None): 38 """I am the main entry point, from which FileNode.read() can get 39 data. I feed the consumer with the desired range of ciphertext. I 40 return a Deferred that fires (with the consumer) when the read is 41 finished.""" 42 return self._node.read(consumer, offset, size, read_ev) 43 44 def get_segment(self, segnum): 45 """Begin downloading a segment. I return a tuple (d, c): 'd' is a 46 Deferred that fires with (offset,data) when the desired segment is 47 available, and c is an object on which c.cancel() can be called to 48 disavow interest in the segment (after which 'd' will never fire). 49 50 You probably need to know the segment size before calling this, 51 unless you want the first few bytes of the file. If you ask for a 52 segment number which turns out to be too large, the Deferred will 53 errback with BadSegmentNumberError. 54 55 The Deferred fires with the offset of the first byte of the data 56 segment, so that you can call get_segment() before knowing the 57 segment size, and still know which data you received. 58 """ 59 return self._node.get_segment(segnum) 60 61 def get_segment_size(self): 62 # return a Deferred that fires with the file's real segment size 63 return self._node.get_segsize() 208 64 209 def get_size(self): 210 return self.u.get_size() 211 def get_current_size(self): 212 return defer.succeed(self.get_size()) 213 214 def get_cap(self): 215 return self.u 216 def get_readcap(self): 217 return self.u.get_readonly() 65 def get_storage_index(self): 66 return self._verifycap.storage_index 218 67 def get_verify_cap(self): 219 return self.u.get_verify_cap() 220 def get_repair_cap(self): 221 # CHK files can be repaired with just the verifycap 222 return self.u.get_verify_cap() 68 return self._verifycap 69 def get_size(self): 70 return self._verifycap.size 223 71 224 def get_uri(self):225 return self.u.to_string()72 def raise_error(self): 73 pass 226 74 227 def get_storage_index(self):228 return self.u.get_storage_index()229 75 230 76 def check_and_repair(self, monitor, verify=False, add_lease=False): 231 verifycap = self.get_verify_cap() 77 verifycap = self._verifycap 78 storage_index = verifycap.storage_index 232 79 sb = self._storage_broker 233 80 servers = sb.get_all_servers() 234 81 sh = self._secret_holder … … class ImmutableFileNode(_ImmutableFileNodeBase, log.PrefixingLogMixin): 238 85 monitor=monitor) 239 86 d = c.start() 240 87 def _maybe_repair(cr): 241 crr = CheckAndRepairResults(s elf.u.get_storage_index())88 crr = CheckAndRepairResults(storage_index) 242 89 crr.pre_repair_results = cr 243 90 if cr.is_healthy(): 244 91 crr.post_repair_results = cr … … class ImmutableFileNode(_ImmutableFileNodeBase, log.PrefixingLogMixin): 248 95 crr.repair_successful = False # until proven successful 249 96 def _gather_repair_results(ur): 250 97 assert IUploadResults.providedBy(ur), ur 251 # clone the cr -- check results to form the basic of the prr -- post-repair results 98 # clone the cr (check results) to form the basis of the 99 # prr (post-repair results) 252 100 prr = CheckResults(cr.uri, cr.storage_index) 253 101 prr.data = copy.deepcopy(cr.data) 254 102 255 103 sm = prr.data['sharemap'] 256 assert isinstance(sm, dictutil.DictOfSets), sm104 assert isinstance(sm, DictOfSets), sm 257 105 sm.update(ur.sharemap) 258 106 servers_responding = set(prr.data['servers-responding']) 259 107 servers_responding.union(ur.sharemap.iterkeys()) 260 108 prr.data['servers-responding'] = list(servers_responding) 261 109 prr.data['count-shares-good'] = len(sm) 262 110 prr.data['count-good-share-hosts'] = len(sm) 263 is_healthy = bool(len(sm) >= self.u.total_shares)264 is_recoverable = bool(len(sm) >= self.u.needed_shares)111 is_healthy = bool(len(sm) >= verifycap.total_shares) 112 is_recoverable = bool(len(sm) >= verifycap.needed_shares) 265 113 prr.set_healthy(is_healthy) 266 114 prr.set_recoverable(is_recoverable) 267 115 crr.repair_successful = is_healthy 268 prr.set_needs_rebalancing(len(sm) >= self.u.total_shares)116 prr.set_needs_rebalancing(len(sm) >= verifycap.total_shares) 269 117 270 118 crr.post_repair_results = prr 271 119 return crr … … class ImmutableFileNode(_ImmutableFileNodeBase, log.PrefixingLogMixin): 275 123 crr.repair_successful = False 276 124 crr.repair_failure = f 277 125 return f 278 r = Repairer(s torage_broker=sb, secret_holder=sh,279 verifycap=verifycap,monitor=monitor)126 r = Repairer(self, storage_broker=sb, secret_holder=sh, 127 monitor=monitor) 280 128 d = r.start() 281 129 d.addCallbacks(_gather_repair_results, _repair_error) 282 130 return d … … class ImmutableFileNode(_ImmutableFileNodeBase, log.PrefixingLogMixin): 285 133 return d 286 134 287 135 def check(self, monitor, verify=False, add_lease=False): 288 verifycap = self. get_verify_cap()136 verifycap = self._verifycap 289 137 sb = self._storage_broker 290 138 servers = sb.get_all_servers() 291 139 sh = self._secret_holder … … class ImmutableFileNode(_ImmutableFileNodeBase, log.PrefixingLogMixin): 295 143 monitor=monitor) 296 144 return v.start() 297 145 146 147 class DecryptingConsumer: 148 """I sit between a CiphertextDownloader (which acts as a Producer) and 149 the real Consumer, decrypting everything that passes by. The real 150 Consumer sees the real Producer, but the Producer sees us instead of the 151 real consumer.""" 152 implements(IConsumer) 153 154 def __init__(self, consumer, readkey, offset, read_event): 155 self._consumer = consumer 156 self._read_event = read_event 157 # TODO: pycryptopp CTR-mode needs random-access operations: I want 158 # either a=AES(readkey, offset) or better yet both of: 159 # a=AES(readkey, offset=0) 160 # a.process(ciphertext, offset=xyz) 161 # For now, we fake it with the existing iv= argument. 162 offset_big = offset // 16 163 offset_small = offset % 16 164 iv = binascii.unhexlify("%032x" % offset_big) 165 self._decryptor = AES(readkey, iv=iv) 166 self._decryptor.process("\x00"*offset_small) 167 168 def registerProducer(self, producer, streaming): 169 # this passes through, so the real consumer can flow-control the real 170 # producer. Therefore we don't need to provide any IPushProducer 171 # methods. We implement all the IConsumer methods as pass-throughs, 172 # and only intercept write() to perform decryption. 173 self._consumer.registerProducer(producer, streaming) 174 def unregisterProducer(self): 175 self._consumer.unregisterProducer() 176 def write(self, ciphertext): 177 started = now() 178 plaintext = self._decryptor.process(ciphertext) 179 elapsed = now() - started 180 self._read_event.update(0, elapsed, 0) 181 self._consumer.write(plaintext) 182 183 class ImmutableFileNode: 184 implements(IImmutableFileNode) 185 186 # I wrap a CiphertextFileNode with a decryption key 187 def __init__(self, filecap, storage_broker, secret_holder, terminator, 188 history): 189 assert isinstance(filecap, uri.CHKFileURI) 190 verifycap = filecap.get_verify_cap() 191 ds = DownloadStatus(verifycap.storage_index, verifycap.size) 192 if history: 193 history.add_download(ds) 194 self._download_status = ds 195 self._cnode = CiphertextFileNode(verifycap, storage_broker, 196 secret_holder, terminator, history, ds) 197 assert isinstance(filecap, uri.CHKFileURI) 198 self.u = filecap 199 self._readkey = filecap.key 200 201 # TODO: I'm not sure about this.. what's the use case for node==node? If 202 # we keep it here, we should also put this on CiphertextFileNode 203 def __hash__(self): 204 return self.u.__hash__() 205 def __eq__(self, other): 206 if isinstance(other, ImmutableFileNode): 207 return self.u.__eq__(other.u) 208 else: 209 return False 210 def __ne__(self, other): 211 if isinstance(other, ImmutableFileNode): 212 return self.u.__eq__(other.u) 213 else: 214 return True 215 298 216 def read(self, consumer, offset=0, size=None): 299 self.log("read", offset=offset, size=size, 300 umid="UPP8FA", level=log.OPERATIONAL) 301 if size is None: 302 size = self.get_size() - offset 303 size = min(size, self.get_size() - offset) 304 305 if offset == 0 and size == self.get_size(): 306 # don't use the cache, just do a normal streaming download 307 self.log("doing normal full download", umid="VRSBwg", level=log.OPERATIONAL) 308 target = download.ConsumerAdapter(consumer) 309 return self._downloader.download(self.get_cap(), target, 310 self._parentmsgid, 311 history=self._history) 312 313 d = self.download_cache.when_range_available(offset, size) 314 d.addCallback(lambda res: 315 self.download_cache.read(consumer, offset, size)) 217 actual_size = size 218 if actual_size == None: 219 actual_size = self.u.size 220 actual_size = actual_size - offset 221 read_ev = self._download_status.add_read_event(offset,actual_size, 222 now()) 223 decryptor = DecryptingConsumer(consumer, self._readkey, offset, read_ev) 224 d = self._cnode.read(decryptor, offset, size, read_ev) 225 d.addCallback(lambda dc: consumer) 316 226 return d 317 227 318 class LiteralProducer: 319 implements(IPushProducer) 320 def resumeProducing(self): 321 pass 322 def stopProducing(self): 228 def raise_error(self): 323 229 pass 324 230 231 def get_write_uri(self): 232 return None 325 233 326 class LiteralFileNode(_ImmutableFileNodeBase): 327 328 def __init__(self, filecap): 329 assert isinstance(filecap, LiteralFileURI) 330 self.u = filecap 331 332 def get_size(self): 333 return len(self.u.data) 334 def get_current_size(self): 335 return defer.succeed(self.get_size()) 234 def get_readonly_uri(self): 235 return self.get_uri() 336 236 237 def get_uri(self): 238 return self.u.to_string() 337 239 def get_cap(self): 338 240 return self.u 339 241 def get_readcap(self): 340 return self.u 242 return self.u.get_readonly() 341 243 def get_verify_cap(self): 342 return None244 return self.u.get_verify_cap() 343 245 def get_repair_cap(self): 344 return None 345 346 def get_uri(self): 347 return self.u.to_string() 246 # CHK files can be repaired with just the verifycap 247 return self.u.get_verify_cap() 348 248 349 249 def get_storage_index(self): 350 return None250 return self.u.get_storage_index() 351 251 352 def check(self, monitor, verify=False, add_lease=False): 353 return defer.succeed(None) 252 def get_size(self): 253 return self.u.get_size() 254 def get_current_size(self): 255 return defer.succeed(self.get_size()) 354 256 355 def check_and_repair(self, monitor, verify=False, add_lease=False):356 return defer.succeed(None)257 def is_mutable(self): 258 return False 357 259 358 def read(self, consumer, offset=0, size=None): 359 if size is None: 360 data = self.u.data[offset:] 361 else: 362 data = self.u.data[offset:offset+size] 363 364 # We use twisted.protocols.basic.FileSender, which only does 365 # non-streaming, i.e. PullProducer, where the receiver/consumer must 366 # ask explicitly for each chunk of data. There are only two places in 367 # the Twisted codebase that can't handle streaming=False, both of 368 # which are in the upload path for an FTP/SFTP server 369 # (protocols.ftp.FileConsumer and 370 # vfs.adapters.ftp._FileToConsumerAdapter), neither of which is 371 # likely to be used as the target for a Tahoe download. 372 373 d = basic.FileSender().beginFileTransfer(StringIO(data), consumer) 374 d.addCallback(lambda lastSent: consumer) 375 return d 260 def is_readonly(self): 261 return True 262 263 def is_unknown(self): 264 return False 265 266 def is_allowed_in_immutable_directory(self): 267 return True 268 269 def check_and_repair(self, monitor, verify=False, add_lease=False): 270 return self._cnode.check_and_repair(monitor, verify, add_lease) 271 def check(self, monitor, verify=False, add_lease=False): 272 return self._cnode.check(monitor, verify, add_lease) -
src/allmydata/immutable/layout.py
diff --git a/src/allmydata/immutable/layout.py b/src/allmydata/immutable/layout.py index 6e07da7..27fb844 100644
a b limitations described in #346. 74 74 # they are still provided when writing so that older versions of Tahoe can 75 75 # read them. 76 76 77 FORCE_V2 = False # set briefly by unit tests to make small-sized V2 shares 78 77 79 def make_write_bucket_proxy(rref, data_size, block_size, num_segments, 78 80 num_share_hashes, uri_extension_size_max, nodeid): 79 81 # Use layout v1 for small files, so they'll be readable by older versions 80 82 # (<tahoe-1.3.0). Use layout v2 for large files; they'll only be readable 81 83 # by tahoe-1.3.0 or later. 82 84 try: 85 if FORCE_V2: 86 raise FileTooLargeError 83 87 wbp = WriteBucketProxy(rref, data_size, block_size, num_segments, 84 88 num_share_hashes, uri_extension_size_max, nodeid) 85 89 except FileTooLargeError: -
new file src/allmydata/immutable/literal.py
diff --git a/src/allmydata/immutable/literal.py b/src/allmydata/immutable/literal.py new file mode 100644 index 0000000..09466cb
- + 1 from cStringIO import StringIO 2 from zope.interface import implements 3 from twisted.internet import defer 4 from twisted.internet.interfaces import IPushProducer 5 from twisted.protocols import basic 6 from allmydata.interfaces import IImmutableFileNode, ICheckable 7 from allmydata.uri import LiteralFileURI 8 9 class _ImmutableFileNodeBase(object): 10 implements(IImmutableFileNode, ICheckable) 11 12 def get_write_uri(self): 13 return None 14 15 def get_readonly_uri(self): 16 return self.get_uri() 17 18 def is_mutable(self): 19 return False 20 21 def is_readonly(self): 22 return True 23 24 def is_unknown(self): 25 return False 26 27 def is_allowed_in_immutable_directory(self): 28 return True 29 30 def raise_error(self): 31 pass 32 33 def __hash__(self): 34 return self.u.__hash__() 35 def __eq__(self, other): 36 if isinstance(other, _ImmutableFileNodeBase): 37 return self.u.__eq__(other.u) 38 else: 39 return False 40 def __ne__(self, other): 41 if isinstance(other, _ImmutableFileNodeBase): 42 return self.u.__eq__(other.u) 43 else: 44 return True 45 46 47 class LiteralProducer: 48 implements(IPushProducer) 49 def resumeProducing(self): 50 pass 51 def stopProducing(self): 52 pass 53 54 55 class LiteralFileNode(_ImmutableFileNodeBase): 56 57 def __init__(self, filecap): 58 assert isinstance(filecap, LiteralFileURI) 59 self.u = filecap 60 61 def get_size(self): 62 return len(self.u.data) 63 def get_current_size(self): 64 return defer.succeed(self.get_size()) 65 66 def get_cap(self): 67 return self.u 68 def get_readcap(self): 69 return self.u 70 def get_verify_cap(self): 71 return None 72 def get_repair_cap(self): 73 return None 74 75 def get_uri(self): 76 return self.u.to_string() 77 78 def get_storage_index(self): 79 return None 80 81 def check(self, monitor, verify=False, add_lease=False): 82 return defer.succeed(None) 83 84 def check_and_repair(self, monitor, verify=False, add_lease=False): 85 return defer.succeed(None) 86 87 def read(self, consumer, offset=0, size=None): 88 if size is None: 89 data = self.u.data[offset:] 90 else: 91 data = self.u.data[offset:offset+size] 92 93 # We use twisted.protocols.basic.FileSender, which only does 94 # non-streaming, i.e. PullProducer, where the receiver/consumer must 95 # ask explicitly for each chunk of data. There are only two places in 96 # the Twisted codebase that can't handle streaming=False, both of 97 # which are in the upload path for an FTP/SFTP server 98 # (protocols.ftp.FileConsumer and 99 # vfs.adapters.ftp._FileToConsumerAdapter), neither of which is 100 # likely to be used as the target for a Tahoe download. 101 102 d = basic.FileSender().beginFileTransfer(StringIO(data), consumer) 103 d.addCallback(lambda lastSent: consumer) 104 return d -
src/allmydata/immutable/repairer.py
diff --git a/src/allmydata/immutable/repairer.py b/src/allmydata/immutable/repairer.py index fa6a604..64fb9a1 100644
a b 1 1 from zope.interface import implements 2 2 from twisted.internet import defer 3 3 from allmydata.storage.server import si_b2a 4 from allmydata.util import log, observer 5 from allmydata.util.assertutil import precondition, _assert 6 from allmydata.uri import CHKFileVerifierURI 7 from allmydata.interfaces import IEncryptedUploadable, IDownloadTarget 8 from twisted.internet.interfaces import IConsumer 4 from allmydata.util import log, consumer 5 from allmydata.util.assertutil import precondition 6 from allmydata.interfaces import IEncryptedUploadable 9 7 10 from allmydata.immutable import download, upload 11 12 import collections 8 from allmydata.immutable import upload 13 9 14 10 class Repairer(log.PrefixingLogMixin): 11 implements(IEncryptedUploadable) 15 12 """I generate any shares which were not available and upload them to 16 13 servers. 17 14 … … class Repairer(log.PrefixingLogMixin): 43 40 cancelled (by invoking its raise_if_cancelled() method). 44 41 """ 45 42 46 def __init__(self, storage_broker, secret_holder, verifycap, monitor): 47 assert precondition(isinstance(verifycap, CHKFileVerifierURI)) 48 49 logprefix = si_b2a(verifycap.get_storage_index())[:5] 43 def __init__(self, filenode, storage_broker, secret_holder, monitor): 44 logprefix = si_b2a(filenode.get_storage_index())[:5] 50 45 log.PrefixingLogMixin.__init__(self, "allmydata.immutable.repairer", 51 46 prefix=logprefix) 52 47 self._filenode = filenode 53 48 self._storage_broker = storage_broker 54 49 self._secret_holder = secret_holder 55 self._verifycap = verifycap56 50 self._monitor = monitor 51 self._offset = 0 57 52 58 53 def start(self): 59 54 self.log("starting repair") 60 duc = DownUpConnector() 61 dl = download.CiphertextDownloader(self._storage_broker, 62 self._verifycap, target=duc, 63 monitor=self._monitor) 64 ul = upload.CHKUploader(self._storage_broker, self._secret_holder) 65 66 d = defer.Deferred() 67 68 # If the upload or the download fails or is stopped, then the repair 69 # failed. 70 def _errb(f): 71 d.errback(f) 72 return None 73 74 # If the upload succeeds, then the repair has succeeded. 75 def _cb(res): 76 d.callback(res) 77 ul.start(duc).addCallbacks(_cb, _errb) 78 79 # If the download fails or is stopped, then the repair failed. 80 d2 = dl.start() 81 d2.addErrback(_errb) 82 83 # We ignore the callback from d2. Is this right? Ugh. 84 55 d = self._filenode.get_segment_size() 56 def _got_segsize(segsize): 57 vcap = self._filenode.get_verify_cap() 58 k = vcap.needed_shares 59 N = vcap.total_shares 60 happy = upload.BaseUploadable.default_encoding_param_happy 61 self._encodingparams = (k, happy, N, segsize) 62 ul = upload.CHKUploader(self._storage_broker, self._secret_holder) 63 return ul.start(self) # I am the IEncryptedUploadable 64 d.addCallback(_got_segsize) 85 65 return d 86 66 87 class DownUpConnector(log.PrefixingLogMixin):88 implements(IEncryptedUploadable, IDownloadTarget, IConsumer)89 """I act like an 'encrypted uploadable' -- something that a local90 uploader can read ciphertext from in order to upload the ciphertext.91 However, unbeknownst to the uploader, I actually download the ciphertext92 from a CiphertextDownloader instance as it is needed.93 94 On the other hand, I act like a 'download target' -- something that a95 local downloader can write ciphertext to as it downloads the ciphertext.96 That downloader doesn't realize, of course, that I'm just turning around97 and giving the ciphertext to the uploader."""98 99 # The theory behind this class is nice: just satisfy two separate100 # interfaces. The implementation is slightly horrible, because of101 # "impedance mismatch" -- the downloader expects to be able to102 # synchronously push data in, and the uploader expects to be able to read103 # data out with a "read(THIS_SPECIFIC_LENGTH)" which returns a deferred.104 # The two interfaces have different APIs for pausing/unpausing. The105 # uploader requests metadata like size and encodingparams which the106 # downloader provides either eventually or not at all (okay I just now107 # extended the downloader to provide encodingparams). Most of this108 # slightly horrible code would disappear if CiphertextDownloader just109 # used this object as an IConsumer (plus maybe a couple of other methods)110 # and if the Uploader simply expected to be treated as an IConsumer (plus111 # maybe a couple of other things).112 113 def __init__(self, buflim=2**19):114 """If we're already holding at least buflim bytes, then tell the115 downloader to pause until we have less than buflim bytes."""116 log.PrefixingLogMixin.__init__(self, "allmydata.immutable.repairer")117 self.buflim = buflim118 self.bufs = collections.deque() # list of strings119 self.bufsiz = 0 # how many bytes total in bufs120 121 # list of deferreds which will fire with the requested ciphertext122 self.next_read_ds = collections.deque()123 124 # how many bytes of ciphertext were requested by each deferred125 self.next_read_lens = collections.deque()126 127 self._size_osol = observer.OneShotObserverList()128 self._encodingparams_osol = observer.OneShotObserverList()129 self._storageindex_osol = observer.OneShotObserverList()130 self._closed_to_pusher = False131 132 # once seg size is available, the following attribute will be created133 # to hold it:134 135 # self.encodingparams # (provided by the object which is pushing data136 # into me, required by the object which is pulling data out of me)137 138 # open() will create the following attribute:139 # self.size # size of the whole file (provided by the object which is140 # pushing data into me, required by the object which is pulling data141 # out of me)142 143 # set_upload_status() will create the following attribute:144 145 # self.upload_status # XXX do we need to actually update this? Is146 # anybody watching the results during a repair?147 148 def _satisfy_reads_if_possible(self):149 assert bool(self.next_read_ds) == bool(self.next_read_lens)150 while self.next_read_ds and ((self.bufsiz >= self.next_read_lens[0])151 or self._closed_to_pusher):152 nrd = self.next_read_ds.popleft()153 nrl = self.next_read_lens.popleft()154 155 # Pick out the requested number of bytes from self.bufs, turn it156 # into a string, and callback the deferred with that.157 res = []158 ressize = 0159 while ressize < nrl and self.bufs:160 nextbuf = self.bufs.popleft()161 res.append(nextbuf)162 ressize += len(nextbuf)163 if ressize > nrl:164 extra = ressize - nrl165 self.bufs.appendleft(nextbuf[:-extra])166 res[-1] = nextbuf[:-extra]167 assert _assert(sum(len(x) for x in res) <= nrl, [len(x) for x in res], nrl)168 assert _assert(sum(len(x) for x in res) == nrl or self._closed_to_pusher, [len(x) for x in res], nrl)169 self.bufsiz -= nrl170 if self.bufsiz < self.buflim and self.producer:171 self.producer.resumeProducing()172 nrd.callback(res)173 174 # methods to satisfy the IConsumer and IDownloadTarget interfaces. (From175 # the perspective of a downloader I am an IDownloadTarget and an176 # IConsumer.)177 def registerProducer(self, producer, streaming):178 assert streaming # We know how to handle only streaming producers.179 self.producer = producer # the downloader180 def unregisterProducer(self):181 self.producer = None182 def open(self, size):183 self.size = size184 self._size_osol.fire(self.size)185 def set_encodingparams(self, encodingparams):186 self.encodingparams = encodingparams187 self._encodingparams_osol.fire(self.encodingparams)188 def set_storageindex(self, storageindex):189 self.storageindex = storageindex190 self._storageindex_osol.fire(self.storageindex)191 def write(self, data):192 precondition(data) # please don't write empty strings193 self.bufs.append(data)194 self.bufsiz += len(data)195 self._satisfy_reads_if_possible()196 if self.bufsiz >= self.buflim and self.producer:197 self.producer.pauseProducing()198 def finish(self):199 pass200 def close(self):201 self._closed_to_pusher = True202 # Any reads which haven't been satisfied by now are going to203 # have to be satisfied with short reads.204 self._satisfy_reads_if_possible()205 67 206 68 # methods to satisfy the IEncryptedUploader interface 207 69 # (From the perspective of an uploader I am an IEncryptedUploadable.) 208 70 def set_upload_status(self, upload_status): 209 71 self.upload_status = upload_status 210 72 def get_size(self): 211 if hasattr(self, 'size'): # attribute created by self.open() 212 return defer.succeed(self.size) 213 else: 214 return self._size_osol.when_fired() 73 size = self._filenode.get_size() 74 assert size is not None 75 return defer.succeed(size) 215 76 def get_all_encoding_parameters(self): 216 # We have to learn the encoding params from pusher. 217 if hasattr(self, 'encodingparams'): 218 # attribute created by self.set_encodingparams() 219 return defer.succeed(self.encodingparams) 220 else: 221 return self._encodingparams_osol.when_fired() 77 return defer.succeed(self._encodingparams) 222 78 def read_encrypted(self, length, hash_only): 223 """Returns a deferred which eventually fire dwith the requested224 ciphertext ."""79 """Returns a deferred which eventually fires with the requested 80 ciphertext, as a list of strings.""" 225 81 precondition(length) # please don't ask to read 0 bytes 226 d = defer.Deferred()227 self.next_read_ds.append(d)228 self. next_read_lens.append(length)229 self._satisfy_reads_if_possible()82 mc = consumer.MemoryConsumer() 83 d = self._filenode.read(mc, self._offset, length) 84 self._offset += length 85 d.addCallback(lambda ign: mc.chunks) 230 86 return d 231 87 def get_storage_index(self): 232 # We have to learn the storage index from pusher. 233 if hasattr(self, 'storageindex'): 234 # attribute created by self.set_storageindex() 235 return defer.succeed(self.storageindex) 236 else: 237 return self._storageindex.when_fired() 88 return self._filenode.get_storage_index() 89 def close(self): 90 pass -
src/allmydata/immutable/upload.py
diff --git a/src/allmydata/immutable/upload.py b/src/allmydata/immutable/upload.py index dc46800..a3f8c92 100644
a b from allmydata.util.assertutil import precondition 20 20 from allmydata.util.rrefutil import add_version_to_remote_reference 21 21 from allmydata.interfaces import IUploadable, IUploader, IUploadResults, \ 22 22 IEncryptedUploadable, RIEncryptedUploadable, IUploadStatus, \ 23 NoServersError, InsufficientVersionError, UploadUnhappinessError 23 NoServersError, InsufficientVersionError, UploadUnhappinessError, \ 24 DEFAULT_MAX_SEGMENT_SIZE 24 25 from allmydata.immutable import layout 25 26 from pycryptopp.cipher.aes import AES 26 27 … … class AssistedUploader: 1205 1206 return self._upload_status 1206 1207 1207 1208 class BaseUploadable: 1208 default_max_segment_size = 128*KiB # overridden by max_segment_size 1209 # this is overridden by max_segment_size 1210 default_max_segment_size = DEFAULT_MAX_SEGMENT_SIZE 1209 1211 default_encoding_param_k = 3 # overridden by encoding_parameters 1210 1212 default_encoding_param_happy = 7 1211 1213 default_encoding_param_n = 10 -
src/allmydata/interfaces.py
diff --git a/src/allmydata/interfaces.py b/src/allmydata/interfaces.py index 4cfe9c9..3a7fa7f 100644
a b WriteEnablerSecret = Hash # used to protect mutable bucket modifications 24 24 LeaseRenewSecret = Hash # used to protect bucket lease renewal requests 25 25 LeaseCancelSecret = Hash # used to protect bucket lease cancellation requests 26 26 27 KiB = 1024 28 DEFAULT_MAX_SEGMENT_SIZE = 128*KiB 29 27 30 class RIStubClient(RemoteInterface): 28 31 """Each client publishes a service announcement for a dummy object called 29 32 the StubClient. This object doesn't actually offer any services, but the -
src/allmydata/nodemaker.py
diff --git a/src/allmydata/nodemaker.py b/src/allmydata/nodemaker.py index c852f68..3b74d90 100644
a b 1 1 import weakref 2 2 from zope.interface import implements 3 3 from allmydata.interfaces import INodeMaker 4 from allmydata.immutable.filenode import ImmutableFileNode, LiteralFileNode 4 from allmydata.immutable.literal import LiteralFileNode 5 from allmydata.immutable.filenode import ImmutableFileNode, CiphertextFileNode 5 6 from allmydata.immutable.upload import Data 6 7 from allmydata.mutable.filenode import MutableFileNode 7 8 from allmydata.dirnode import DirectoryNode, pack_children … … class NodeMaker: 12 13 implements(INodeMaker) 13 14 14 15 def __init__(self, storage_broker, secret_holder, history, 15 uploader, downloader, download_cache_dirman,16 uploader, terminator, 16 17 default_encoding_parameters, key_generator): 17 18 self.storage_broker = storage_broker 18 19 self.secret_holder = secret_holder 19 20 self.history = history 20 21 self.uploader = uploader 21 self.downloader = downloader 22 self.download_cache_dirman = download_cache_dirman 22 self.terminator = terminator 23 23 self.default_encoding_parameters = default_encoding_parameters 24 24 self.key_generator = key_generator 25 25 … … class NodeMaker: 29 29 return LiteralFileNode(cap) 30 30 def _create_immutable(self, cap): 31 31 return ImmutableFileNode(cap, self.storage_broker, self.secret_holder, 32 self.downloader, self.history, 33 self.download_cache_dirman) 32 self.terminator, self.history) 33 def _create_immutable_verifier(self, cap): 34 return CiphertextFileNode(cap, self.storage_broker, self.secret_holder, 35 self.terminator, self.history) 34 36 def _create_mutable(self, cap): 35 37 n = MutableFileNode(self.storage_broker, self.secret_holder, 36 38 self.default_encoding_parameters, … … class NodeMaker: 73 75 return self._create_lit(cap) 74 76 if isinstance(cap, uri.CHKFileURI): 75 77 return self._create_immutable(cap) 78 if isinstance(cap, uri.CHKFileVerifierURI): 79 return self._create_immutable_verifier(cap) 76 80 if isinstance(cap, (uri.ReadonlySSKFileURI, uri.WriteableSSKFileURI)): 77 81 return self._create_mutable(cap) 78 82 if isinstance(cap, (uri.DirectoryURI, -
src/allmydata/test/no_network.py
diff --git a/src/allmydata/test/no_network.py b/src/allmydata/test/no_network.py index 771dffd..a1c475d 100644
a b class NoNetworkGrid(service.MultiService): 223 223 fileutil.make_dirs(serverdir) 224 224 ss = StorageServer(serverdir, serverid, stats_provider=SimpleStats(), 225 225 readonly_storage=readonly) 226 ss._no_network_server_number = i 226 227 return ss 227 228 228 229 def add_server(self, i, ss): … … class GridTestMixin: 319 320 pass 320 321 return sorted(shares) 321 322 323 def copy_shares(self, uri): 324 shares = {} 325 for (shnum, serverid, sharefile) in self.find_shares(uri): 326 shares[sharefile] = open(sharefile, "rb").read() 327 return shares 328 329 def restore_all_shares(self, shares): 330 for sharefile, data in shares.items(): 331 open(sharefile, "wb").write(data) 332 322 333 def delete_share(self, (shnum, serverid, sharefile)): 323 334 os.unlink(sharefile) 324 335 … … class GridTestMixin: 339 350 corruptdata = corruptor(sharedata, debug=debug) 340 351 open(i_sharefile, "wb").write(corruptdata) 341 352 353 def corrupt_all_shares(self, uri, corruptor, debug=False): 354 for (i_shnum, i_serverid, i_sharefile) in self.find_shares(uri): 355 sharedata = open(i_sharefile, "rb").read() 356 corruptdata = corruptor(sharedata, debug=debug) 357 open(i_sharefile, "wb").write(corruptdata) 358 342 359 def GET(self, urlpath, followRedirect=False, return_response=False, 343 360 method="GET", clientnum=0, **kwargs): 344 361 # if return_response=True, this fires with (data, statuscode, -
src/allmydata/test/test_cli.py
diff --git a/src/allmydata/test/test_cli.py b/src/allmydata/test/test_cli.py index cec32e4..1e88053 100644
a b class Errors(GridTestMixin, CLITestMixin, unittest.TestCase): 2300 2300 self.delete_shares_numbered(ur.uri, range(1,10)) 2301 2301 d.addCallback(_stash_bad) 2302 2302 2303 # the download is abandoned as soon as it's clear that we won't get 2304 # enough shares. The one remaining share might be in either the 2305 # COMPLETE or the PENDING state. 2306 in_complete_msg = "ran out of shares: 1 complete, 0 pending, 0 overdue, 0 unused, need 3" 2307 in_pending_msg = "ran out of shares: 0 complete, 1 pending, 0 overdue, 0 unused, need 3" 2308 2303 2309 d.addCallback(lambda ign: self.do_cli("get", self.uri_1share)) 2304 2310 def _check1((rc, out, err)): 2305 2311 self.failIfEqual(rc, 0) 2306 2312 self.failUnless("410 Gone" in err, err) 2307 2313 self.failUnlessIn("NotEnoughSharesError: ", err) 2308 self.failUnlessIn("Failed to get enough shareholders: have 1, need 3", err) 2314 self.failUnless(in_complete_msg in err or in_pending_msg in err, 2315 err) 2309 2316 d.addCallback(_check1) 2310 2317 2311 2318 targetf = os.path.join(self.basedir, "output") … … class Errors(GridTestMixin, CLITestMixin, unittest.TestCase): 2314 2321 self.failIfEqual(rc, 0) 2315 2322 self.failUnless("410 Gone" in err, err) 2316 2323 self.failUnlessIn("NotEnoughSharesError: ", err) 2317 self.failUnlessIn("Failed to get enough shareholders: have 1, need 3", err) 2324 self.failUnless(in_complete_msg in err or in_pending_msg in err, 2325 err) 2318 2326 self.failIf(os.path.exists(targetf)) 2319 2327 d.addCallback(_check2) 2320 2328 -
src/allmydata/test/test_dirnode.py
diff --git a/src/allmydata/test/test_dirnode.py b/src/allmydata/test/test_dirnode.py index 7d8d66d..8122def 100644
a b class Packing(testutil.ReallyEqualMixin, unittest.TestCase): 1202 1202 def test_unpack_and_pack_behavior(self): 1203 1203 known_tree = b32decode(self.known_tree) 1204 1204 nodemaker = NodeMaker(None, None, None, 1205 None, None, None,1205 None, None, 1206 1206 {"k": 3, "n": 10}, None) 1207 1207 write_uri = "URI:SSK-RO:e3mdrzfwhoq42hy5ubcz6rp3o4:ybyibhnp3vvwuq2vaw2ckjmesgkklfs6ghxleztqidihjyofgw7q" 1208 1208 filenode = nodemaker.create_from_cap(write_uri) … … class Packing(testutil.ReallyEqualMixin, unittest.TestCase): 1264 1264 return kids 1265 1265 1266 1266 def test_deep_immutable(self): 1267 nm = NodeMaker(None, None, None, None, None, None, {"k": 3, "n": 10}, 1268 None) 1267 nm = NodeMaker(None, None, None, None, None, {"k": 3, "n": 10}, None) 1269 1268 fn = MinimalFakeMutableFile() 1270 1269 1271 1270 kids = self._make_kids(nm, ["imm", "lit", "write", "read", … … class FakeNodeMaker(NodeMaker): 1359 1358 class FakeClient2(Client): 1360 1359 def __init__(self): 1361 1360 self.nodemaker = FakeNodeMaker(None, None, None, 1362 None, None, None,1361 None, None, 1363 1362 {"k":3,"n":10}, None) 1364 1363 def create_node_from_uri(self, rwcap, rocap): 1365 1364 return self.nodemaker.create_from_cap(rwcap, rocap) … … class Deleter(GridTestMixin, testutil.ReallyEqualMixin, unittest.TestCase): 1643 1642 def _do_delete(ignored): 1644 1643 nm = UCWEingNodeMaker(c0.storage_broker, c0._secret_holder, 1645 1644 c0.get_history(), c0.getServiceNamed("uploader"), 1646 c0.downloader, 1647 c0.download_cache_dirman, 1645 c0.terminator, 1648 1646 c0.get_encoding_parameters(), 1649 1647 c0._key_generator) 1650 1648 n = nm.create_from_cap(self.root_uri) -
src/allmydata/test/test_download.py
diff --git a/src/allmydata/test/test_download.py b/src/allmydata/test/test_download.py index b54bf01..520eaf2 100644
a b 5 5 6 6 import os 7 7 from twisted.trial import unittest 8 from twisted.internet import defer, reactor 8 9 from allmydata import uri 9 10 from allmydata.storage.server import storage_index_to_dir 10 from allmydata.util import base32, fileutil 11 from allmydata.util.consumer import download_to_data 12 from allmydata.immutable import upload 11 from allmydata.util import base32, fileutil, spans, log 12 from allmydata.util.consumer import download_to_data, MemoryConsumer 13 from allmydata.immutable import upload, layout 13 14 from allmydata.test.no_network import GridTestMixin 15 from allmydata.test.common import ShouldFailMixin 16 from allmydata.interfaces import NotEnoughSharesError, NoSharesError 17 from allmydata.immutable.downloader.common import BadSegmentNumberError, \ 18 BadCiphertextHashError, DownloadStopped 19 from allmydata.codec import CRSDecoder 20 from foolscap.eventual import fireEventually, flushEventualQueue 14 21 15 22 plaintext = "This is a moderate-sized file.\n" * 10 16 23 mutable_plaintext = "This is a moderate-sized mutable file.\n" * 10 … … mutable_shares = { 68 75 } 69 76 #--------- END stored_shares.py ---------------- 70 77 71 class DownloadTest(GridTestMixin, unittest.TestCase): 72 timeout = 2400 # It takes longer than 240 seconds on Zandr's ARM box. 73 def test_download(self): 74 self.basedir = self.mktemp() 75 self.set_up_grid() 76 self.c0 = self.g.clients[0] 77 78 # do this to create the shares 79 #return self.create_shares() 80 81 self.load_shares() 82 d = self.download_immutable() 83 d.addCallback(self.download_mutable) 84 return d 78 class _Base(GridTestMixin, ShouldFailMixin): 85 79 86 80 def create_shares(self, ignored=None): 87 81 u = upload.Data(plaintext, None) … … class DownloadTest(GridTestMixin, unittest.TestCase): 178 172 def _got_data(data): 179 173 self.failUnlessEqual(data, plaintext) 180 174 d.addCallback(_got_data) 175 # make sure we can use the same node twice 176 d.addCallback(lambda ign: download_to_data(n)) 177 d.addCallback(_got_data) 181 178 return d 182 179 183 180 def download_mutable(self, ignored=None): … … class DownloadTest(GridTestMixin, unittest.TestCase): 188 185 d.addCallback(_got_data) 189 186 return d 190 187 188 class DownloadTest(_Base, unittest.TestCase): 189 timeout = 2400 # It takes longer than 240 seconds on Zandr's ARM box. 190 def test_download(self): 191 self.basedir = self.mktemp() 192 self.set_up_grid() 193 self.c0 = self.g.clients[0] 194 195 # do this to create the shares 196 #return self.create_shares() 197 198 self.load_shares() 199 d = self.download_immutable() 200 d.addCallback(self.download_mutable) 201 return d 202 203 def test_download_failover(self): 204 self.basedir = self.mktemp() 205 self.set_up_grid() 206 self.c0 = self.g.clients[0] 207 208 self.load_shares() 209 si = uri.from_string(immutable_uri).get_storage_index() 210 si_dir = storage_index_to_dir(si) 211 212 n = self.c0.create_node_from_uri(immutable_uri) 213 d = download_to_data(n) 214 def _got_data(data): 215 self.failUnlessEqual(data, plaintext) 216 d.addCallback(_got_data) 217 218 def _clobber_some_shares(ign): 219 # find the three shares that were used, and delete them. Then 220 # download again, forcing the downloader to fail over to other 221 # shares 222 for s in n._cnode._node._shares: 223 for clientnum in immutable_shares: 224 for shnum in immutable_shares[clientnum]: 225 if s._shnum == shnum: 226 fn = os.path.join(self.get_serverdir(clientnum), 227 "shares", si_dir, str(shnum)) 228 os.unlink(fn) 229 d.addCallback(_clobber_some_shares) 230 d.addCallback(lambda ign: download_to_data(n)) 231 d.addCallback(_got_data) 232 233 def _clobber_most_shares(ign): 234 # delete all but one of the shares that are still alive 235 live_shares = [s for s in n._cnode._node._shares if s.is_alive()] 236 save_me = live_shares[0]._shnum 237 for clientnum in immutable_shares: 238 for shnum in immutable_shares[clientnum]: 239 if shnum == save_me: 240 continue 241 fn = os.path.join(self.get_serverdir(clientnum), 242 "shares", si_dir, str(shnum)) 243 if os.path.exists(fn): 244 os.unlink(fn) 245 # now the download should fail with NotEnoughSharesError 246 return self.shouldFail(NotEnoughSharesError, "1shares", None, 247 download_to_data, n) 248 d.addCallback(_clobber_most_shares) 249 250 def _clobber_all_shares(ign): 251 # delete the last remaining share 252 for clientnum in immutable_shares: 253 for shnum in immutable_shares[clientnum]: 254 fn = os.path.join(self.get_serverdir(clientnum), 255 "shares", si_dir, str(shnum)) 256 if os.path.exists(fn): 257 os.unlink(fn) 258 # now a new download should fail with NoSharesError. We want a 259 # new ImmutableFileNode so it will forget about the old shares. 260 # If we merely called create_node_from_uri() without first 261 # dereferencing the original node, the NodeMaker's _node_cache 262 # would give us back the old one. 263 n = None 264 n = self.c0.create_node_from_uri(immutable_uri) 265 return self.shouldFail(NoSharesError, "0shares", None, 266 download_to_data, n) 267 d.addCallback(_clobber_all_shares) 268 return d 269 270 def test_lost_servers(self): 271 # while downloading a file (after seg[0], before seg[1]), lose the 272 # three servers that we were using. The download should switch over 273 # to other servers. 274 self.basedir = self.mktemp() 275 self.set_up_grid() 276 self.c0 = self.g.clients[0] 277 278 # upload a file with multiple segments, so we can catch the download 279 # in the middle. 280 u = upload.Data(plaintext, None) 281 u.max_segment_size = 70 # 5 segs 282 d = self.c0.upload(u) 283 def _uploaded(ur): 284 self.uri = ur.uri 285 self.n = self.c0.create_node_from_uri(self.uri) 286 return download_to_data(self.n) 287 d.addCallback(_uploaded) 288 def _got_data(data): 289 self.failUnlessEqual(data, plaintext) 290 d.addCallback(_got_data) 291 def _kill_some_servers(): 292 # find the three shares that were used, and delete them. Then 293 # download again, forcing the downloader to fail over to other 294 # shares 295 servers = [] 296 shares = sorted([s._shnum for s in self.n._cnode._node._shares]) 297 self.failUnlessEqual(shares, [0,1,2]) 298 # break the RIBucketReader references 299 for s in self.n._cnode._node._shares: 300 s._rref.broken = True 301 for servernum in immutable_shares: 302 for shnum in immutable_shares[servernum]: 303 if s._shnum == shnum: 304 ss = self.g.servers_by_number[servernum] 305 servers.append(ss) 306 # and, for good measure, break the RIStorageServer references 307 # too, just in case the downloader gets more aggressive in the 308 # future and tries to re-fetch the same share. 309 for ss in servers: 310 wrapper = self.g.servers_by_id[ss.my_nodeid] 311 wrapper.broken = True 312 def _download_again(ign): 313 c = StallingConsumer(_kill_some_servers) 314 return self.n.read(c) 315 d.addCallback(_download_again) 316 def _check_failover(c): 317 self.failUnlessEqual("".join(c.chunks), plaintext) 318 shares = sorted([s._shnum for s in self.n._cnode._node._shares]) 319 # we should now be using more shares than we were before 320 self.failIfEqual(shares, [0,1,2]) 321 d.addCallback(_check_failover) 322 return d 323 324 def test_badguess(self): 325 self.basedir = self.mktemp() 326 self.set_up_grid() 327 self.c0 = self.g.clients[0] 328 self.load_shares() 329 n = self.c0.create_node_from_uri(immutable_uri) 330 331 # Cause the downloader to guess a segsize that's too low, so it will 332 # ask for a segment number that's too high (beyond the end of the 333 # real list, causing BadSegmentNumberError), to exercise 334 # Segmentation._retry_bad_segment 335 336 con1 = MemoryConsumer() 337 n._cnode._node._build_guessed_tables(90) 338 # plaintext size of 310 bytes, wrong-segsize of 90 bytes, will make 339 # us think that file[180:200] is in the third segment (segnum=2), but 340 # really there's only one segment 341 d = n.read(con1, 180, 20) 342 def _done(res): 343 self.failUnlessEqual("".join(con1.chunks), plaintext[180:200]) 344 d.addCallback(_done) 345 return d 346 347 def test_simultaneous_badguess(self): 348 self.basedir = self.mktemp() 349 self.set_up_grid() 350 self.c0 = self.g.clients[0] 351 352 # upload a file with multiple segments, and a non-default segsize, to 353 # exercise the offset-guessing code. Because we don't tell the 354 # downloader about the unusual segsize, it will guess wrong, and have 355 # to do extra roundtrips to get the correct data. 356 u = upload.Data(plaintext, None) 357 u.max_segment_size = 70 # 5 segs, 8-wide hashtree 358 con1 = MemoryConsumer() 359 con2 = MemoryConsumer() 360 d = self.c0.upload(u) 361 def _uploaded(ur): 362 n = self.c0.create_node_from_uri(ur.uri) 363 d1 = n.read(con1, 70, 20) 364 d2 = n.read(con2, 140, 20) 365 return defer.gatherResults([d1,d2]) 366 d.addCallback(_uploaded) 367 def _done(res): 368 self.failUnlessEqual("".join(con1.chunks), plaintext[70:90]) 369 self.failUnlessEqual("".join(con2.chunks), plaintext[140:160]) 370 d.addCallback(_done) 371 return d 372 373 def test_simultaneous_goodguess(self): 374 self.basedir = self.mktemp() 375 self.set_up_grid() 376 self.c0 = self.g.clients[0] 377 378 # upload a file with multiple segments, and a non-default segsize, to 379 # exercise the offset-guessing code. This time we *do* tell the 380 # downloader about the unusual segsize, so it can guess right. 381 u = upload.Data(plaintext, None) 382 u.max_segment_size = 70 # 5 segs, 8-wide hashtree 383 con1 = MemoryConsumer() 384 con2 = MemoryConsumer() 385 d = self.c0.upload(u) 386 def _uploaded(ur): 387 n = self.c0.create_node_from_uri(ur.uri) 388 n._cnode._node._build_guessed_tables(u.max_segment_size) 389 d1 = n.read(con1, 70, 20) 390 #d2 = n.read(con2, 140, 20) # XXX 391 d2 = defer.succeed(None) 392 return defer.gatherResults([d1,d2]) 393 d.addCallback(_uploaded) 394 def _done(res): 395 self.failUnlessEqual("".join(con1.chunks), plaintext[70:90]) 396 self.failUnlessEqual("".join(con2.chunks), plaintext[140:160]) 397 #d.addCallback(_done) 398 return d 399 400 def test_sequential_goodguess(self): 401 self.basedir = self.mktemp() 402 self.set_up_grid() 403 self.c0 = self.g.clients[0] 404 data = (plaintext*100)[:30000] # multiple of k 405 406 # upload a file with multiple segments, and a non-default segsize, to 407 # exercise the offset-guessing code. This time we *do* tell the 408 # downloader about the unusual segsize, so it can guess right. 409 u = upload.Data(data, None) 410 u.max_segment_size = 6000 # 5 segs, 8-wide hashtree 411 con1 = MemoryConsumer() 412 con2 = MemoryConsumer() 413 d = self.c0.upload(u) 414 def _uploaded(ur): 415 n = self.c0.create_node_from_uri(ur.uri) 416 n._cnode._node._build_guessed_tables(u.max_segment_size) 417 d = n.read(con1, 12000, 20) 418 def _read1(ign): 419 self.failUnlessEqual("".join(con1.chunks), data[12000:12020]) 420 return n.read(con2, 24000, 20) 421 d.addCallback(_read1) 422 def _read2(ign): 423 self.failUnlessEqual("".join(con2.chunks), data[24000:24020]) 424 d.addCallback(_read2) 425 return d 426 d.addCallback(_uploaded) 427 return d 428 429 430 def test_simultaneous_get_blocks(self): 431 self.basedir = self.mktemp() 432 self.set_up_grid() 433 self.c0 = self.g.clients[0] 434 435 self.load_shares() 436 stay_empty = [] 437 438 n = self.c0.create_node_from_uri(immutable_uri) 439 d = download_to_data(n) 440 def _use_shares(ign): 441 shares = list(n._cnode._node._shares) 442 s0 = shares[0] 443 # make sure .cancel works too 444 o0 = s0.get_block(0) 445 o0.subscribe(lambda **kwargs: stay_empty.append(kwargs)) 446 o1 = s0.get_block(0) 447 o2 = s0.get_block(0) 448 o0.cancel() 449 o3 = s0.get_block(1) # state=BADSEGNUM 450 d1 = defer.Deferred() 451 d2 = defer.Deferred() 452 d3 = defer.Deferred() 453 o1.subscribe(lambda **kwargs: d1.callback(kwargs)) 454 o2.subscribe(lambda **kwargs: d2.callback(kwargs)) 455 o3.subscribe(lambda **kwargs: d3.callback(kwargs)) 456 return defer.gatherResults([d1,d2,d3]) 457 d.addCallback(_use_shares) 458 def _done(res): 459 r1,r2,r3 = res 460 self.failUnlessEqual(r1["state"], "COMPLETE") 461 self.failUnlessEqual(r2["state"], "COMPLETE") 462 self.failUnlessEqual(r3["state"], "BADSEGNUM") 463 self.failUnless("block" in r1) 464 self.failUnless("block" in r2) 465 self.failIf(stay_empty) 466 d.addCallback(_done) 467 return d 468 469 def test_download_no_overrun(self): 470 self.basedir = self.mktemp() 471 self.set_up_grid() 472 self.c0 = self.g.clients[0] 473 474 self.load_shares() 475 476 # tweak the client's copies of server-version data, so it believes 477 # that they're old and can't handle reads that overrun the length of 478 # the share. This exercises a different code path. 479 for (peerid, rref) in self.c0.storage_broker.get_all_servers(): 480 v1 = rref.version["http://allmydata.org/tahoe/protocols/storage/v1"] 481 v1["tolerates-immutable-read-overrun"] = False 482 483 n = self.c0.create_node_from_uri(immutable_uri) 484 d = download_to_data(n) 485 def _got_data(data): 486 self.failUnlessEqual(data, plaintext) 487 d.addCallback(_got_data) 488 return d 489 490 def test_download_segment(self): 491 self.basedir = self.mktemp() 492 self.set_up_grid() 493 self.c0 = self.g.clients[0] 494 self.load_shares() 495 n = self.c0.create_node_from_uri(immutable_uri) 496 cn = n._cnode 497 (d,c) = cn.get_segment(0) 498 def _got_segment((offset,data,decodetime)): 499 self.failUnlessEqual(offset, 0) 500 self.failUnlessEqual(len(data), len(plaintext)) 501 d.addCallback(_got_segment) 502 return d 503 504 def test_download_segment_cancel(self): 505 self.basedir = self.mktemp() 506 self.set_up_grid() 507 self.c0 = self.g.clients[0] 508 self.load_shares() 509 n = self.c0.create_node_from_uri(immutable_uri) 510 cn = n._cnode 511 (d,c) = cn.get_segment(0) 512 fired = [] 513 d.addCallback(fired.append) 514 c.cancel() 515 d = fireEventually() 516 d.addCallback(flushEventualQueue) 517 def _check(ign): 518 self.failUnlessEqual(fired, []) 519 d.addCallback(_check) 520 return d 521 522 def test_download_bad_segment(self): 523 self.basedir = self.mktemp() 524 self.set_up_grid() 525 self.c0 = self.g.clients[0] 526 self.load_shares() 527 n = self.c0.create_node_from_uri(immutable_uri) 528 cn = n._cnode 529 def _try_download(): 530 (d,c) = cn.get_segment(1) 531 return d 532 d = self.shouldFail(BadSegmentNumberError, "badseg", 533 "segnum=1, numsegs=1", 534 _try_download) 535 return d 536 537 def test_download_segment_terminate(self): 538 self.basedir = self.mktemp() 539 self.set_up_grid() 540 self.c0 = self.g.clients[0] 541 self.load_shares() 542 n = self.c0.create_node_from_uri(immutable_uri) 543 cn = n._cnode 544 (d,c) = cn.get_segment(0) 545 fired = [] 546 d.addCallback(fired.append) 547 self.c0.terminator.disownServiceParent() 548 d = fireEventually() 549 d.addCallback(flushEventualQueue) 550 def _check(ign): 551 self.failUnlessEqual(fired, []) 552 d.addCallback(_check) 553 return d 554 555 def test_pause(self): 556 self.basedir = self.mktemp() 557 self.set_up_grid() 558 self.c0 = self.g.clients[0] 559 self.load_shares() 560 n = self.c0.create_node_from_uri(immutable_uri) 561 c = PausingConsumer() 562 d = n.read(c) 563 def _downloaded(mc): 564 newdata = "".join(mc.chunks) 565 self.failUnlessEqual(newdata, plaintext) 566 d.addCallback(_downloaded) 567 return d 568 569 def test_pause_then_stop(self): 570 self.basedir = self.mktemp() 571 self.set_up_grid() 572 self.c0 = self.g.clients[0] 573 self.load_shares() 574 n = self.c0.create_node_from_uri(immutable_uri) 575 c = PausingAndStoppingConsumer() 576 d = self.shouldFail(DownloadStopped, "test_pause_then_stop", 577 "our Consumer called stopProducing()", 578 n.read, c) 579 return d 580 581 def test_stop(self): 582 # use a download targetthat does an immediate stop (ticket #473) 583 self.basedir = self.mktemp() 584 self.set_up_grid() 585 self.c0 = self.g.clients[0] 586 self.load_shares() 587 n = self.c0.create_node_from_uri(immutable_uri) 588 c = StoppingConsumer() 589 d = self.shouldFail(DownloadStopped, "test_stop", 590 "our Consumer called stopProducing()", 591 n.read, c) 592 return d 593 594 def test_download_segment_bad_ciphertext_hash(self): 595 # The crypttext_hash_tree asserts the integrity of the decoded 596 # ciphertext, and exists to detect two sorts of problems. The first 597 # is a bug in zfec decode. The second is the "two-sided t-shirt" 598 # attack (found by Christian Grothoff), in which a malicious uploader 599 # creates two sets of shares (one for file A, second for file B), 600 # uploads a combination of them (shares 0-4 of A, 5-9 of B), and then 601 # builds an otherwise normal UEB around those shares: their goal is 602 # to give their victim a filecap which sometimes downloads the good A 603 # contents, and sometimes the bad B contents, depending upon which 604 # servers/shares they can get to. Having a hash of the ciphertext 605 # forces them to commit to exactly one version. (Christian's prize 606 # for finding this problem was a t-shirt with two sides: the shares 607 # of file A on the front, B on the back). 608 609 # creating a set of shares with this property is too hard, although 610 # it'd be nice to do so and confirm our fix. (it requires a lot of 611 # tampering with the uploader). So instead, we just damage the 612 # decoder. The tail decoder is rebuilt each time, so we need to use a 613 # file with multiple segments. 614 self.basedir = self.mktemp() 615 self.set_up_grid() 616 self.c0 = self.g.clients[0] 617 618 u = upload.Data(plaintext, None) 619 u.max_segment_size = 60 # 6 segs 620 d = self.c0.upload(u) 621 def _uploaded(ur): 622 n = self.c0.create_node_from_uri(ur.uri) 623 n._cnode._node._build_guessed_tables(u.max_segment_size) 624 625 d = download_to_data(n) 626 def _break_codec(data): 627 # the codec isn't created until the UEB is retrieved 628 node = n._cnode._node 629 vcap = node._verifycap 630 k, N = vcap.needed_shares, vcap.total_shares 631 bad_codec = BrokenDecoder() 632 bad_codec.set_params(node.segment_size, k, N) 633 node._codec = bad_codec 634 d.addCallback(_break_codec) 635 # now try to download it again. The broken codec will provide 636 # ciphertext that fails the hash test. 637 d.addCallback(lambda ign: 638 self.shouldFail(BadCiphertextHashError, "badhash", 639 "hash failure in " 640 "ciphertext_hash_tree: segnum=0", 641 download_to_data, n)) 642 return d 643 d.addCallback(_uploaded) 644 return d 645 646 def OFFtest_download_segment_XXX(self): 647 self.basedir = self.mktemp() 648 self.set_up_grid() 649 self.c0 = self.g.clients[0] 650 651 # upload a file with multiple segments, and a non-default segsize, to 652 # exercise the offset-guessing code. This time we *do* tell the 653 # downloader about the unusual segsize, so it can guess right. 654 u = upload.Data(plaintext, None) 655 u.max_segment_size = 70 # 5 segs, 8-wide hashtree 656 con1 = MemoryConsumer() 657 con2 = MemoryConsumer() 658 d = self.c0.upload(u) 659 def _uploaded(ur): 660 n = self.c0.create_node_from_uri(ur.uri) 661 n._cnode._node._build_guessed_tables(u.max_segment_size) 662 d1 = n.read(con1, 70, 20) 663 #d2 = n.read(con2, 140, 20) 664 d2 = defer.succeed(None) 665 return defer.gatherResults([d1,d2]) 666 d.addCallback(_uploaded) 667 def _done(res): 668 self.failUnlessEqual("".join(con1.chunks), plaintext[70:90]) 669 self.failUnlessEqual("".join(con2.chunks), plaintext[140:160]) 670 #d.addCallback(_done) 671 return d 672 673 def test_duplicate_shares(self): 674 self.basedir = self.mktemp() 675 self.set_up_grid() 676 self.c0 = self.g.clients[0] 677 678 self.load_shares() 679 # make sure everybody has a copy of sh0. The second server contacted 680 # will report two shares, and the ShareFinder will handle the 681 # duplicate by attaching both to the same CommonShare instance. 682 si = uri.from_string(immutable_uri).get_storage_index() 683 si_dir = storage_index_to_dir(si) 684 sh0_file = [sharefile 685 for (shnum, serverid, sharefile) 686 in self.find_shares(immutable_uri) 687 if shnum == 0][0] 688 sh0_data = open(sh0_file, "rb").read() 689 for clientnum in immutable_shares: 690 if 0 in immutable_shares[clientnum]: 691 continue 692 cdir = self.get_serverdir(clientnum) 693 target = os.path.join(cdir, "shares", si_dir, "0") 694 outf = open(target, "wb") 695 outf.write(sh0_data) 696 outf.close() 697 698 d = self.download_immutable() 699 return d 700 701 def test_verifycap(self): 702 self.basedir = self.mktemp() 703 self.set_up_grid() 704 self.c0 = self.g.clients[0] 705 self.load_shares() 706 707 n = self.c0.create_node_from_uri(immutable_uri) 708 vcap = n.get_verify_cap().to_string() 709 vn = self.c0.create_node_from_uri(vcap) 710 d = download_to_data(vn) 711 def _got_ciphertext(ciphertext): 712 self.failUnlessEqual(len(ciphertext), len(plaintext)) 713 self.failIfEqual(ciphertext, plaintext) 714 d.addCallback(_got_ciphertext) 715 return d 716 717 class BrokenDecoder(CRSDecoder): 718 def decode(self, shares, shareids): 719 d = CRSDecoder.decode(self, shares, shareids) 720 def _decoded(buffers): 721 def _corruptor(s, which): 722 return s[:which] + chr(ord(s[which])^0x01) + s[which+1:] 723 buffers[0] = _corruptor(buffers[0], 0) # flip lsb of first byte 724 return buffers 725 d.addCallback(_decoded) 726 return d 727 728 729 class PausingConsumer(MemoryConsumer): 730 def __init__(self): 731 MemoryConsumer.__init__(self) 732 self.size = 0 733 self.writes = 0 734 def write(self, data): 735 self.size += len(data) 736 self.writes += 1 737 if self.writes <= 2: 738 # we happen to use 4 segments, and want to avoid pausing on the 739 # last one (since then the _unpause timer will still be running) 740 self.producer.pauseProducing() 741 reactor.callLater(0.1, self._unpause) 742 return MemoryConsumer.write(self, data) 743 def _unpause(self): 744 self.producer.resumeProducing() 745 746 class PausingAndStoppingConsumer(PausingConsumer): 747 def write(self, data): 748 self.producer.pauseProducing() 749 reactor.callLater(0.5, self._stop) 750 def _stop(self): 751 self.producer.stopProducing() 752 753 class StoppingConsumer(PausingConsumer): 754 def write(self, data): 755 self.producer.stopProducing() 756 757 class StallingConsumer(MemoryConsumer): 758 def __init__(self, halfway_cb): 759 MemoryConsumer.__init__(self) 760 self.halfway_cb = halfway_cb 761 self.writes = 0 762 def write(self, data): 763 self.writes += 1 764 if self.writes == 1: 765 self.halfway_cb() 766 return MemoryConsumer.write(self, data) 767 768 class Corruption(_Base, unittest.TestCase): 769 770 def _corrupt_flip(self, ign, imm_uri, which): 771 log.msg("corrupt %d" % which) 772 def _corruptor(s, debug=False): 773 return s[:which] + chr(ord(s[which])^0x01) + s[which+1:] 774 self.corrupt_shares_numbered(imm_uri, [0], _corruptor) 775 776 def _corrupt_set(self, ign, imm_uri, which, newvalue): 777 log.msg("corrupt %d" % which) 778 def _corruptor(s, debug=False): 779 return s[:which] + chr(newvalue) + s[which+1:] 780 self.corrupt_shares_numbered(imm_uri, [0], _corruptor) 781 782 def test_each_byte(self): 783 # Setting catalog_detection=True performs an exhaustive test of the 784 # Downloader's response to corruption in the lsb of each byte of the 785 # 2070-byte share, with two goals: make sure we tolerate all forms of 786 # corruption (i.e. don't hang or return bad data), and make a list of 787 # which bytes can be corrupted without influencing the download 788 # (since we don't need every byte of the share). That takes 50s to 789 # run on my laptop and doesn't have any actual asserts, so we don't 790 # normally do that. 791 self.catalog_detection = False 792 793 self.basedir = "download/Corruption/each_byte" 794 self.set_up_grid() 795 self.c0 = self.g.clients[0] 796 797 # to exercise the block-hash-tree code properly, we need to have 798 # multiple segments. We don't tell the downloader about the different 799 # segsize, so it guesses wrong and must do extra roundtrips. 800 u = upload.Data(plaintext, None) 801 u.max_segment_size = 120 # 3 segs, 4-wide hashtree 802 803 if self.catalog_detection: 804 undetected = spans.Spans() 805 806 def _download(ign, imm_uri, which, expected): 807 n = self.c0.create_node_from_uri(imm_uri) 808 # for this test to work, we need to have a new Node each time. 809 # Make sure the NodeMaker's weakcache hasn't interfered. 810 assert not n._cnode._node._shares 811 d = download_to_data(n) 812 def _got_data(data): 813 self.failUnlessEqual(data, plaintext) 814 shnums = sorted([s._shnum for s in n._cnode._node._shares]) 815 no_sh0 = bool(0 not in shnums) 816 sh0 = [s for s in n._cnode._node._shares if s._shnum == 0] 817 sh0_had_corruption = False 818 if sh0 and sh0[0].had_corruption: 819 sh0_had_corruption = True 820 num_needed = len(n._cnode._node._shares) 821 if self.catalog_detection: 822 detected = no_sh0 or sh0_had_corruption or (num_needed!=3) 823 if not detected: 824 undetected.add(which, 1) 825 if expected == "no-sh0": 826 self.failIfIn(0, shnums) 827 elif expected == "0bad-need-3": 828 self.failIf(no_sh0) 829 self.failUnless(sh0[0].had_corruption) 830 self.failUnlessEqual(num_needed, 3) 831 elif expected == "need-4th": 832 self.failIf(no_sh0) 833 self.failUnless(sh0[0].had_corruption) 834 self.failIfEqual(num_needed, 3) 835 d.addCallback(_got_data) 836 return d 837 838 839 d = self.c0.upload(u) 840 def _uploaded(ur): 841 imm_uri = ur.uri 842 self.shares = self.copy_shares(imm_uri) 843 d = defer.succeed(None) 844 # 'victims' is a list of corruption tests to run. Each one flips 845 # the low-order bit of the specified offset in the share file (so 846 # offset=0 is the MSB of the container version, offset=15 is the 847 # LSB of the share version, offset=24 is the MSB of the 848 # data-block-offset, and offset=48 is the first byte of the first 849 # data-block). Each one also specifies what sort of corruption 850 # we're expecting to see. 851 no_sh0_victims = [0,1,2,3] # container version 852 need3_victims = [ ] # none currently in this category 853 # when the offsets are corrupted, the Share will be unable to 854 # retrieve the data it wants (because it thinks that data lives 855 # off in the weeds somewhere), and Share treats DataUnavailable 856 # as abandon-this-share, so in general we'll be forced to look 857 # for a 4th share. 858 need_4th_victims = [12,13,14,15, # share version 859 24,25,26,27, # offset[data] 860 32,33,34,35, # offset[crypttext_hash_tree] 861 36,37,38,39, # offset[block_hashes] 862 44,45,46,47, # offset[UEB] 863 ] 864 need_4th_victims.append(48) # block data 865 # when corrupting hash trees, we must corrupt a value that isn't 866 # directly set from somewhere else. Since we download data from 867 # seg0, corrupt something on its hash chain, like [2] (the 868 # right-hand child of the root) 869 need_4th_victims.append(600+2*32) # block_hashes[2] 870 # Share.loop is pretty conservative: it abandons the share at the 871 # first sign of corruption. It doesn't strictly need to be this 872 # way: if the UEB were corrupt, we could still get good block 873 # data from that share, as long as there was a good copy of the 874 # UEB elsewhere. If this behavior is relaxed, then corruption in 875 # the following fields (which are present in multiple shares) 876 # should fall into the "need3_victims" case instead of the 877 # "need_4th_victims" case. 878 need_4th_victims.append(376+2*32) # crypttext_hash_tree[2] 879 need_4th_victims.append(824) # share_hashes 880 need_4th_victims.append(994) # UEB length 881 need_4th_victims.append(998) # UEB 882 corrupt_me = ([(i,"no-sh0") for i in no_sh0_victims] + 883 [(i, "0bad-need-3") for i in need3_victims] + 884 [(i, "need-4th") for i in need_4th_victims]) 885 if self.catalog_detection: 886 corrupt_me = [(i, "") for i in range(len(self.sh0_orig))] 887 for i,expected in corrupt_me: 888 # All these tests result in a successful download. What we're 889 # measuring is how many shares the downloader had to use. 890 d.addCallback(self._corrupt_flip, imm_uri, i) 891 d.addCallback(_download, imm_uri, i, expected) 892 d.addCallback(lambda ign: self.restore_all_shares(self.shares)) 893 d.addCallback(fireEventually) 894 corrupt_values = [(3, 2, "no-sh0"), 895 (15, 2, "need-4th"), # share looks v2 896 ] 897 for i,newvalue,expected in corrupt_values: 898 d.addCallback(self._corrupt_set, imm_uri, i, newvalue) 899 d.addCallback(_download, imm_uri, i, expected) 900 d.addCallback(lambda ign: self.restore_all_shares(self.shares)) 901 d.addCallback(fireEventually) 902 return d 903 d.addCallback(_uploaded) 904 def _show_results(ign): 905 print 906 print ("of [0:%d], corruption ignored in %s" % 907 (len(self.sh0_orig), undetected.dump())) 908 if self.catalog_detection: 909 d.addCallback(_show_results) 910 # of [0:2070], corruption ignored in len=1133: 911 # [4-11],[16-23],[28-31],[152-439],[600-663],[1309-2069] 912 # [4-11]: container sizes 913 # [16-23]: share block/data sizes 914 # [152-375]: plaintext hash tree 915 # [376-408]: crypttext_hash_tree[0] (root) 916 # [408-439]: crypttext_hash_tree[1] (computed) 917 # [600-631]: block hash tree[0] (root) 918 # [632-663]: block hash tree[1] (computed) 919 # [1309-]: reserved+unused UEB space 920 return d 921 922 def test_failure(self): 923 # this test corrupts all shares in the same way, and asserts that the 924 # download fails. 925 926 self.basedir = "download/Corruption/failure" 927 self.set_up_grid() 928 self.c0 = self.g.clients[0] 929 930 # to exercise the block-hash-tree code properly, we need to have 931 # multiple segments. We don't tell the downloader about the different 932 # segsize, so it guesses wrong and must do extra roundtrips. 933 u = upload.Data(plaintext, None) 934 u.max_segment_size = 120 # 3 segs, 4-wide hashtree 935 936 d = self.c0.upload(u) 937 def _uploaded(ur): 938 imm_uri = ur.uri 939 self.shares = self.copy_shares(imm_uri) 940 941 corrupt_me = [(48, "block data", "Last failure: None"), 942 (600+2*32, "block_hashes[2]", "BadHashError"), 943 (376+2*32, "crypttext_hash_tree[2]", "BadHashError"), 944 (824, "share_hashes", "BadHashError"), 945 ] 946 def _download(imm_uri): 947 n = self.c0.create_node_from_uri(imm_uri) 948 # for this test to work, we need to have a new Node each time. 949 # Make sure the NodeMaker's weakcache hasn't interfered. 950 assert not n._cnode._node._shares 951 return download_to_data(n) 952 953 d = defer.succeed(None) 954 for i,which,substring in corrupt_me: 955 # All these tests result in a failed download. 956 d.addCallback(self._corrupt_flip_all, imm_uri, i) 957 d.addCallback(lambda ign: 958 self.shouldFail(NotEnoughSharesError, which, 959 substring, 960 _download, imm_uri)) 961 d.addCallback(lambda ign: self.restore_all_shares(self.shares)) 962 d.addCallback(fireEventually) 963 return d 964 d.addCallback(_uploaded) 965 966 return d 967 968 def _corrupt_flip_all(self, ign, imm_uri, which): 969 def _corruptor(s, debug=False): 970 return s[:which] + chr(ord(s[which])^0x01) + s[which+1:] 971 self.corrupt_all_shares(imm_uri, _corruptor) 972 973 class DownloadV2(_Base, unittest.TestCase): 974 # tests which exercise v2-share code. They first upload a file with 975 # FORCE_V2 set. 976 977 def setUp(self): 978 d = defer.maybeDeferred(_Base.setUp, self) 979 def _set_force_v2(ign): 980 self.old_force_v2 = layout.FORCE_V2 981 layout.FORCE_V2 = True 982 d.addCallback(_set_force_v2) 983 return d 984 def tearDown(self): 985 layout.FORCE_V2 = self.old_force_v2 986 return _Base.tearDown(self) 987 988 def test_download(self): 989 self.basedir = self.mktemp() 990 self.set_up_grid() 991 self.c0 = self.g.clients[0] 992 993 # upload a file 994 u = upload.Data(plaintext, None) 995 d = self.c0.upload(u) 996 def _uploaded(ur): 997 imm_uri = ur.uri 998 n = self.c0.create_node_from_uri(imm_uri) 999 return download_to_data(n) 1000 d.addCallback(_uploaded) 1001 return d 1002 1003 def test_download_no_overrun(self): 1004 self.basedir = self.mktemp() 1005 self.set_up_grid() 1006 self.c0 = self.g.clients[0] 1007 1008 # tweak the client's copies of server-version data, so it believes 1009 # that they're old and can't handle reads that overrun the length of 1010 # the share. This exercises a different code path. 1011 for (peerid, rref) in self.c0.storage_broker.get_all_servers(): 1012 v1 = rref.version["http://allmydata.org/tahoe/protocols/storage/v1"] 1013 v1["tolerates-immutable-read-overrun"] = False 1014 1015 # upload a file 1016 u = upload.Data(plaintext, None) 1017 d = self.c0.upload(u) 1018 def _uploaded(ur): 1019 imm_uri = ur.uri 1020 n = self.c0.create_node_from_uri(imm_uri) 1021 return download_to_data(n) 1022 d.addCallback(_uploaded) 1023 return d 1024 1025 def OFF_test_no_overrun_corrupt_shver(self): # unnecessary 1026 self.basedir = self.mktemp() 1027 self.set_up_grid() 1028 self.c0 = self.g.clients[0] 1029 1030 for (peerid, rref) in self.c0.storage_broker.get_all_servers(): 1031 v1 = rref.version["http://allmydata.org/tahoe/protocols/storage/v1"] 1032 v1["tolerates-immutable-read-overrun"] = False 1033 1034 # upload a file 1035 u = upload.Data(plaintext, None) 1036 d = self.c0.upload(u) 1037 def _uploaded(ur): 1038 imm_uri = ur.uri 1039 def _do_corrupt(which, newvalue): 1040 def _corruptor(s, debug=False): 1041 return s[:which] + chr(newvalue) + s[which+1:] 1042 self.corrupt_shares_numbered(imm_uri, [0], _corruptor) 1043 _do_corrupt(12+3, 0x00) 1044 n = self.c0.create_node_from_uri(imm_uri) 1045 d = download_to_data(n) 1046 def _got_data(data): 1047 self.failUnlessEqual(data, plaintext) 1048 d.addCallback(_got_data) 1049 return d 1050 d.addCallback(_uploaded) 1051 return d -
src/allmydata/test/test_encode.py
diff --git a/src/allmydata/test/test_encode.py b/src/allmydata/test/test_encode.py index 1108e18..c06fbbd 100644
a b 1 1 from zope.interface import implements 2 2 from twisted.trial import unittest 3 from twisted.internet import defer , reactor3 from twisted.internet import defer 4 4 from twisted.python.failure import Failure 5 5 from foolscap.api import fireEventually 6 from allmydata import hashtree,uri7 from allmydata.immutable import encode, upload, download6 from allmydata import uri 7 from allmydata.immutable import encode, upload, checker 8 8 from allmydata.util import hashutil 9 9 from allmydata.util.assertutil import _assert 10 from allmydata.util.consumer import MemoryConsumer 11 from allmydata.interfaces import IStorageBucketWriter, IStorageBucketReader, \ 12 NotEnoughSharesError, IStorageBroker, UploadUnhappinessError 13 from allmydata.monitor import Monitor 14 import allmydata.test.common_util as testutil 10 from allmydata.util.consumer import download_to_data 11 from allmydata.interfaces import IStorageBucketWriter, IStorageBucketReader 12 from allmydata.test.no_network import GridTestMixin 15 13 16 14 class LostPeerError(Exception): 17 15 pass … … class LostPeerError(Exception): 19 17 def flip_bit(good): # flips the last bit 20 18 return good[:-1] + chr(ord(good[-1]) ^ 0x01) 21 19 22 class FakeStorageBroker:23 implements(IStorageBroker)24 25 20 class FakeBucketReaderWriterProxy: 26 21 implements(IStorageBucketWriter, IStorageBucketReader) 27 22 # these are used for both reading and writing … … class FakeBucketReaderWriterProxy: 59 54 self.blocks[segmentnum] = data 60 55 return defer.maybeDeferred(_try) 61 56 62 def put_plaintext_hashes(self, hashes):63 def _try():64 assert not self.closed65 assert not self.plaintext_hashes66 self.plaintext_hashes = hashes67 return defer.maybeDeferred(_try)68 69 57 def put_crypttext_hashes(self, hashes): 70 58 def _try(): 71 59 assert not self.closed … … class ValidatedExtendedURIProxy(unittest.TestCase): 223 211 fb = FakeBucketReaderWriterProxy() 224 212 fb.put_uri_extension(uebstring) 225 213 verifycap = uri.CHKFileVerifierURI(storage_index='x'*16, uri_extension_hash=uebhash, needed_shares=self.K, total_shares=self.M, size=self.SIZE) 226 vup = download.ValidatedExtendedURIProxy(fb, verifycap)214 vup = checker.ValidatedExtendedURIProxy(fb, verifycap) 227 215 return vup.start() 228 216 229 217 def _test_accept(self, uebdict): … … class ValidatedExtendedURIProxy(unittest.TestCase): 237 225 238 226 def _test_reject(self, uebdict): 239 227 d = self._test(uebdict) 240 d.addBoth(self._should_fail, (KeyError, download.BadURIExtension))228 d.addBoth(self._should_fail, (KeyError, checker.BadURIExtension)) 241 229 return d 242 230 243 231 def test_accept_minimal(self): … … class Encode(unittest.TestCase): 333 321 334 322 return d 335 323 336 # a series of 3*3 tests to check out edge conditions. One axis is how the337 # plaintext is divided into segments: kn+(-1,0,1). Another way to express338 # that is that n%k == -1 or 0 or 1. For example, for 25-byte segments, we339 # might test 74 bytes, 75 bytes, and 76 bytes.340 341 # on the other axis is how many leaves in the block hash tree we wind up342 # with, relative to a power of 2, so 2^a+(-1,0,1). Each segment turns343 # into a single leaf. So we'd like to check out, e.g., 3 segments, 4344 # segments, and 5 segments.345 346 # that results in the following series of data lengths:347 # 3 segs: 74, 75, 51348 # 4 segs: 99, 100, 76349 # 5 segs: 124, 125, 101350 351 # all tests encode to 100 shares, which means the share hash tree will352 # have 128 leaves, which means that buckets will be given an 8-long share353 # hash chain354 355 # all 3-segment files will have a 4-leaf blockhashtree, and thus expect356 # to get 7 blockhashes. 4-segment files will also get 4-leaf block hash357 # trees and 7 blockhashes. 5-segment files will get 8-leaf block hash358 # trees, which get 15 blockhashes.359 360 324 def test_send_74(self): 361 325 # 3 segments (25, 25, 24) 362 326 return self.do_encode(25, 74, 100, 3, 7, 8) … … class Encode(unittest.TestCase): 387 351 # 5 segments: 25, 25, 25, 25, 1 388 352 return self.do_encode(25, 101, 100, 5, 15, 8) 389 353 390 class PausingConsumer(MemoryConsumer):391 def __init__(self):392 MemoryConsumer.__init__(self)393 self.size = 0394 self.writes = 0395 def write(self, data):396 self.size += len(data)397 self.writes += 1398 if self.writes <= 2:399 # we happen to use 4 segments, and want to avoid pausing on the400 # last one (since then the _unpause timer will still be running)401 self.producer.pauseProducing()402 reactor.callLater(0.1, self._unpause)403 return MemoryConsumer.write(self, data)404 def _unpause(self):405 self.producer.resumeProducing()406 407 class PausingAndStoppingConsumer(PausingConsumer):408 def write(self, data):409 self.producer.pauseProducing()410 reactor.callLater(0.5, self._stop)411 def _stop(self):412 self.producer.stopProducing()413 414 class StoppingConsumer(PausingConsumer):415 def write(self, data):416 self.producer.stopProducing()417 418 class Roundtrip(unittest.TestCase, testutil.ShouldFailMixin):419 timeout = 2400 # It takes longer than 240 seconds on Zandr's ARM box.420 def send_and_recover(self, k_and_happy_and_n=(25,75,100),421 AVAILABLE_SHARES=None,422 datalen=76,423 max_segment_size=25,424 bucket_modes={},425 recover_mode="recover",426 consumer=None,427 ):428 if AVAILABLE_SHARES is None:429 AVAILABLE_SHARES = k_and_happy_and_n[2]430 data = make_data(datalen)431 d = self.send(k_and_happy_and_n, AVAILABLE_SHARES,432 max_segment_size, bucket_modes, data)433 # that fires with (uri_extension_hash, e, shareholders)434 d.addCallback(self.recover, AVAILABLE_SHARES, recover_mode,435 consumer=consumer)436 # that fires with newdata437 def _downloaded((newdata, fd)):438 self.failUnless(newdata == data, str((len(newdata), len(data))))439 return fd440 d.addCallback(_downloaded)441 return d442 354 443 def send(self, k_and_happy_and_n, AVAILABLE_SHARES, max_segment_size, 444 bucket_modes, data): 445 k, happy, n = k_and_happy_and_n 446 NUM_SHARES = k_and_happy_and_n[2] 447 if AVAILABLE_SHARES is None: 448 AVAILABLE_SHARES = NUM_SHARES 449 e = encode.Encoder() 450 u = upload.Data(data, convergence="some convergence string") 451 # force use of multiple segments by using a low max_segment_size 452 u.max_segment_size = max_segment_size 453 u.encoding_param_k = k 454 u.encoding_param_happy = happy 455 u.encoding_param_n = n 456 eu = upload.EncryptAnUploadable(u) 457 d = e.set_encrypted_uploadable(eu) 458 459 shareholders = {} 460 def _ready(res): 461 k,happy,n = e.get_param("share_counts") 462 assert n == NUM_SHARES # else we'll be completely confused 463 servermap = {} 464 for shnum in range(NUM_SHARES): 465 mode = bucket_modes.get(shnum, "good") 466 peer = FakeBucketReaderWriterProxy(mode, "peer%d" % shnum) 467 shareholders[shnum] = peer 468 servermap.setdefault(shnum, set()).add(peer.get_peerid()) 469 e.set_shareholders(shareholders, servermap) 470 return e.start() 471 d.addCallback(_ready) 472 def _sent(res): 473 d1 = u.get_encryption_key() 474 d1.addCallback(lambda key: (res, key, shareholders)) 475 return d1 476 d.addCallback(_sent) 477 return d 355 class Roundtrip(GridTestMixin, unittest.TestCase): 478 356 479 def recover(self, (res, key, shareholders), AVAILABLE_SHARES, 480 recover_mode, consumer=None): 481 verifycap = res 482 483 if "corrupt_key" in recover_mode: 484 # we corrupt the key, so that the decrypted data is corrupted and 485 # will fail the plaintext hash check. Since we're manually 486 # attaching shareholders, the fact that the storage index is also 487 # corrupted doesn't matter. 488 key = flip_bit(key) 489 490 u = uri.CHKFileURI(key=key, 491 uri_extension_hash=verifycap.uri_extension_hash, 492 needed_shares=verifycap.needed_shares, 493 total_shares=verifycap.total_shares, 494 size=verifycap.size) 495 496 sb = FakeStorageBroker() 497 if not consumer: 498 consumer = MemoryConsumer() 499 innertarget = download.ConsumerAdapter(consumer) 500 target = download.DecryptingTarget(innertarget, u.key) 501 fd = download.CiphertextDownloader(sb, u.get_verify_cap(), target, monitor=Monitor()) 502 503 # we manually cycle the CiphertextDownloader through a number of steps that 504 # would normally be sequenced by a Deferred chain in 505 # CiphertextDownloader.start(), to give us more control over the process. 506 # In particular, by bypassing _get_all_shareholders, we skip 507 # permuted-peerlist selection. 508 for shnum, bucket in shareholders.items(): 509 if shnum < AVAILABLE_SHARES and bucket.closed: 510 fd.add_share_bucket(shnum, bucket) 511 fd._got_all_shareholders(None) 512 513 # Make it possible to obtain uri_extension from the shareholders. 514 # Arrange for shareholders[0] to be the first, so we can selectively 515 # corrupt the data it returns. 516 uri_extension_sources = shareholders.values() 517 uri_extension_sources.remove(shareholders[0]) 518 uri_extension_sources.insert(0, shareholders[0]) 519 520 d = defer.succeed(None) 521 522 # have the CiphertextDownloader retrieve a copy of uri_extension itself 523 d.addCallback(fd._obtain_uri_extension) 524 525 if "corrupt_crypttext_hashes" in recover_mode: 526 # replace everybody's crypttext hash trees with a different one 527 # (computed over a different file), then modify our uri_extension 528 # to reflect the new crypttext hash tree root 529 def _corrupt_crypttext_hashes(unused): 530 assert isinstance(fd._vup, download.ValidatedExtendedURIProxy), fd._vup 531 assert fd._vup.crypttext_root_hash, fd._vup 532 badhash = hashutil.tagged_hash("bogus", "data") 533 bad_crypttext_hashes = [badhash] * fd._vup.num_segments 534 badtree = hashtree.HashTree(bad_crypttext_hashes) 535 for bucket in shareholders.values(): 536 bucket.crypttext_hashes = list(badtree) 537 fd._crypttext_hash_tree = hashtree.IncompleteHashTree(fd._vup.num_segments) 538 fd._crypttext_hash_tree.set_hashes({0: badtree[0]}) 539 return fd._vup 540 d.addCallback(_corrupt_crypttext_hashes) 541 542 # also have the CiphertextDownloader ask for hash trees 543 d.addCallback(fd._get_crypttext_hash_tree) 544 545 d.addCallback(fd._download_all_segments) 546 d.addCallback(fd._done) 547 def _done(t): 548 newdata = "".join(consumer.chunks) 549 return (newdata, fd) 550 d.addCallback(_done) 551 return d 552 553 def test_not_enough_shares(self): 554 d = self.send_and_recover((4,8,10), AVAILABLE_SHARES=2) 555 def _done(res): 556 self.failUnless(isinstance(res, Failure)) 557 self.failUnless(res.check(NotEnoughSharesError)) 558 d.addBoth(_done) 559 return d 560 561 def test_one_share_per_peer(self): 562 return self.send_and_recover() 563 564 def test_74(self): 565 return self.send_and_recover(datalen=74) 566 def test_75(self): 567 return self.send_and_recover(datalen=75) 568 def test_51(self): 569 return self.send_and_recover(datalen=51) 570 571 def test_99(self): 572 return self.send_and_recover(datalen=99) 573 def test_100(self): 574 return self.send_and_recover(datalen=100) 575 def test_76(self): 576 return self.send_and_recover(datalen=76) 577 578 def test_124(self): 579 return self.send_and_recover(datalen=124) 580 def test_125(self): 581 return self.send_and_recover(datalen=125) 582 def test_101(self): 583 return self.send_and_recover(datalen=101) 584 585 def test_pause(self): 586 # use a download target that does pauseProducing/resumeProducing a 587 # few times, then finishes 588 c = PausingConsumer() 589 d = self.send_and_recover(consumer=c) 590 return d 591 592 def test_pause_then_stop(self): 593 # use a download target that pauses, then stops. 594 c = PausingAndStoppingConsumer() 595 d = self.shouldFail(download.DownloadStopped, "test_pause_then_stop", 596 "our Consumer called stopProducing()", 597 self.send_and_recover, consumer=c) 598 return d 599 600 def test_stop(self): 601 # use a download targetthat does an immediate stop (ticket #473) 602 c = StoppingConsumer() 603 d = self.shouldFail(download.DownloadStopped, "test_stop", 604 "our Consumer called stopProducing()", 605 self.send_and_recover, consumer=c) 606 return d 607 608 # the following tests all use 4-out-of-10 encoding 609 610 def test_bad_blocks(self): 611 # the first 6 servers have bad blocks, which will be caught by the 612 # blockhashes 613 modemap = dict([(i, "bad block") 614 for i in range(6)] 615 + [(i, "good") 616 for i in range(6, 10)]) 617 return self.send_and_recover((4,8,10), bucket_modes=modemap) 618 619 def test_bad_blocks_failure(self): 620 # the first 7 servers have bad blocks, which will be caught by the 621 # blockhashes, and the download will fail 622 modemap = dict([(i, "bad block") 623 for i in range(7)] 624 + [(i, "good") 625 for i in range(7, 10)]) 626 d = self.send_and_recover((4,8,10), bucket_modes=modemap) 627 def _done(res): 628 self.failUnless(isinstance(res, Failure), res) 629 self.failUnless(res.check(NotEnoughSharesError), res) 630 d.addBoth(_done) 631 return d 632 633 def test_bad_blockhashes(self): 634 # the first 6 servers have bad block hashes, so the blockhash tree 635 # will not validate 636 modemap = dict([(i, "bad blockhash") 637 for i in range(6)] 638 + [(i, "good") 639 for i in range(6, 10)]) 640 return self.send_and_recover((4,8,10), bucket_modes=modemap) 641 642 def test_bad_blockhashes_failure(self): 643 # the first 7 servers have bad block hashes, so the blockhash tree 644 # will not validate, and the download will fail 645 modemap = dict([(i, "bad blockhash") 646 for i in range(7)] 647 + [(i, "good") 648 for i in range(7, 10)]) 649 d = self.send_and_recover((4,8,10), bucket_modes=modemap) 650 def _done(res): 651 self.failUnless(isinstance(res, Failure)) 652 self.failUnless(res.check(NotEnoughSharesError), res) 653 d.addBoth(_done) 654 return d 655 656 def test_bad_sharehashes(self): 657 # the first 6 servers have bad block hashes, so the sharehash tree 658 # will not validate 659 modemap = dict([(i, "bad sharehash") 660 for i in range(6)] 661 + [(i, "good") 662 for i in range(6, 10)]) 663 return self.send_and_recover((4,8,10), bucket_modes=modemap) 664 665 def assertFetchFailureIn(self, fd, where): 666 expected = {"uri_extension": 0, 667 "crypttext_hash_tree": 0, 668 } 669 if where is not None: 670 expected[where] += 1 671 self.failUnlessEqual(fd._fetch_failures, expected) 672 673 def test_good(self): 674 # just to make sure the test harness works when we aren't 675 # intentionally causing failures 676 modemap = dict([(i, "good") for i in range(0, 10)]) 677 d = self.send_and_recover((4,8,10), bucket_modes=modemap) 678 d.addCallback(self.assertFetchFailureIn, None) 679 return d 680 681 def test_bad_uri_extension(self): 682 # the first server has a bad uri_extension block, so we will fail 683 # over to a different server. 684 modemap = dict([(i, "bad uri_extension") for i in range(1)] + 685 [(i, "good") for i in range(1, 10)]) 686 d = self.send_and_recover((4,8,10), bucket_modes=modemap) 687 d.addCallback(self.assertFetchFailureIn, "uri_extension") 688 return d 689 690 def test_bad_crypttext_hashroot(self): 691 # the first server has a bad crypttext hashroot, so we will fail 692 # over to a different server. 693 modemap = dict([(i, "bad crypttext hashroot") for i in range(1)] + 694 [(i, "good") for i in range(1, 10)]) 695 d = self.send_and_recover((4,8,10), bucket_modes=modemap) 696 d.addCallback(self.assertFetchFailureIn, "crypttext_hash_tree") 697 return d 698 699 def test_bad_crypttext_hashes(self): 700 # the first server has a bad crypttext hash block, so we will fail 701 # over to a different server. 702 modemap = dict([(i, "bad crypttext hash") for i in range(1)] + 703 [(i, "good") for i in range(1, 10)]) 704 d = self.send_and_recover((4,8,10), bucket_modes=modemap) 705 d.addCallback(self.assertFetchFailureIn, "crypttext_hash_tree") 706 return d 707 708 def test_bad_crypttext_hashes_failure(self): 709 # to test that the crypttext merkle tree is really being applied, we 710 # sneak into the download process and corrupt two things: we replace 711 # everybody's crypttext hashtree with a bad version (computed over 712 # bogus data), and we modify the supposedly-validated uri_extension 713 # block to match the new crypttext hashtree root. The download 714 # process should notice that the crypttext coming out of FEC doesn't 715 # match the tree, and fail. 716 717 modemap = dict([(i, "good") for i in range(0, 10)]) 718 d = self.send_and_recover((4,8,10), bucket_modes=modemap, 719 recover_mode=("corrupt_crypttext_hashes")) 720 def _done(res): 721 self.failUnless(isinstance(res, Failure)) 722 self.failUnless(res.check(hashtree.BadHashError), res) 723 d.addBoth(_done) 724 return d 357 # a series of 3*3 tests to check out edge conditions. One axis is how the 358 # plaintext is divided into segments: kn+(-1,0,1). Another way to express 359 # this is n%k == -1 or 0 or 1. For example, for 25-byte segments, we 360 # might test 74 bytes, 75 bytes, and 76 bytes. 725 361 726 def OFF_test_bad_plaintext(self): 727 # faking a decryption failure is easier: just corrupt the key 728 modemap = dict([(i, "good") for i in range(0, 10)]) 729 d = self.send_and_recover((4,8,10), bucket_modes=modemap, 730 recover_mode=("corrupt_key")) 731 def _done(res): 732 self.failUnless(isinstance(res, Failure)) 733 self.failUnless(res.check(hashtree.BadHashError), res) 734 d.addBoth(_done) 735 return d 362 # on the other axis is how many leaves in the block hash tree we wind up 363 # with, relative to a power of 2, so 2^a+(-1,0,1). Each segment turns 364 # into a single leaf. So we'd like to check out, e.g., 3 segments, 4 365 # segments, and 5 segments. 736 366 737 def test_bad_sharehashes_failure(self): 738 # all ten servers have bad share hashes, so the sharehash tree 739 # will not validate, and the download will fail 740 modemap = dict([(i, "bad sharehash") 741 for i in range(10)]) 742 d = self.send_and_recover((4,8,10), bucket_modes=modemap) 743 def _done(res): 744 self.failUnless(isinstance(res, Failure)) 745 self.failUnless(res.check(NotEnoughSharesError)) 746 d.addBoth(_done) 747 return d 367 # that results in the following series of data lengths: 368 # 3 segs: 74, 75, 51 369 # 4 segs: 99, 100, 76 370 # 5 segs: 124, 125, 101 748 371 749 def test_missing_sharehashes(self): 750 # the first 6 servers are missing their sharehashes, so the 751 # sharehash tree will not validate 752 modemap = dict([(i, "missing sharehash") 753 for i in range(6)] 754 + [(i, "good") 755 for i in range(6, 10)]) 756 return self.send_and_recover((4,8,10), bucket_modes=modemap) 757 758 def test_missing_sharehashes_failure(self): 759 # all servers are missing their sharehashes, so the sharehash tree will not validate, 760 # and the download will fail 761 modemap = dict([(i, "missing sharehash") 762 for i in range(10)]) 763 d = self.send_and_recover((4,8,10), bucket_modes=modemap) 764 def _done(res): 765 self.failUnless(isinstance(res, Failure), res) 766 self.failUnless(res.check(NotEnoughSharesError), res) 767 d.addBoth(_done) 768 return d 372 # all tests encode to 100 shares, which means the share hash tree will 373 # have 128 leaves, which means that buckets will be given an 8-long share 374 # hash chain 769 375 770 def test_lost_one_shareholder(self): 771 # we have enough shareholders when we start, but one segment in we 772 # lose one of them. The upload should still succeed, as long as we 773 # still have 'servers_of_happiness' peers left. 774 modemap = dict([(i, "good") for i in range(9)] + 775 [(i, "lost") for i in range(9, 10)]) 776 return self.send_and_recover((4,8,10), bucket_modes=modemap) 777 778 def test_lost_one_shareholder_early(self): 779 # we have enough shareholders when we choose peers, but just before 780 # we send the 'start' message, we lose one of them. The upload should 781 # still succeed, as long as we still have 'servers_of_happiness' peers 782 # left. 783 modemap = dict([(i, "good") for i in range(9)] + 784 [(i, "lost-early") for i in range(9, 10)]) 785 return self.send_and_recover((4,8,10), bucket_modes=modemap) 786 787 def test_lost_many_shareholders(self): 788 # we have enough shareholders when we start, but one segment in we 789 # lose all but one of them. The upload should fail. 790 modemap = dict([(i, "good") for i in range(1)] + 791 [(i, "lost") for i in range(1, 10)]) 792 d = self.send_and_recover((4,8,10), bucket_modes=modemap) 793 def _done(res): 794 self.failUnless(isinstance(res, Failure)) 795 self.failUnless(res.check(UploadUnhappinessError), res) 796 d.addBoth(_done) 376 # all 3-segment files will have a 4-leaf blockhashtree, and thus expect 377 # to get 7 blockhashes. 4-segment files will also get 4-leaf block hash 378 # trees and 7 blockhashes. 5-segment files will get 8-leaf block hash 379 # trees, which gets 15 blockhashes. 380 381 def test_74(self): return self.do_test_size(74) 382 def test_75(self): return self.do_test_size(75) 383 def test_51(self): return self.do_test_size(51) 384 def test_99(self): return self.do_test_size(99) 385 def test_100(self): return self.do_test_size(100) 386 def test_76(self): return self.do_test_size(76) 387 def test_124(self): return self.do_test_size(124) 388 def test_125(self): return self.do_test_size(125) 389 def test_101(self): return self.do_test_size(101) 390 391 def upload(self, data): 392 u = upload.Data(data, None) 393 u.max_segment_size = 25 394 u.encoding_param_k = 25 395 u.encoding_param_happy = 1 396 u.encoding_param_n = 100 397 d = self.c0.upload(u) 398 d.addCallback(lambda ur: self.c0.create_node_from_uri(ur.uri)) 399 # returns a FileNode 797 400 return d 798 401 799 def test_lost_all_shareholders(self): 800 # we have enough shareholders when we start, but one segment in we 801 # lose all of them. The upload should fail. 802 modemap = dict([(i, "lost") for i in range(10)]) 803 d = self.send_and_recover((4,8,10), bucket_modes=modemap) 804 def _done(res): 805 self.failUnless(isinstance(res, Failure)) 806 self.failUnless(res.check(UploadUnhappinessError)) 807 d.addBoth(_done) 402 def do_test_size(self, size): 403 self.basedir = self.mktemp() 404 self.set_up_grid() 405 self.c0 = self.g.clients[0] 406 DATA = "p"*size 407 d = self.upload(DATA) 408 d.addCallback(lambda n: download_to_data(n)) 409 def _downloaded(newdata): 410 self.failUnlessEqual(newdata, DATA) 411 d.addCallback(_downloaded) 808 412 return d -
src/allmydata/test/test_filenode.py
diff --git a/src/allmydata/test/test_filenode.py b/src/allmydata/test/test_filenode.py index 5f3feaa..61bb0e8 100644
a b 2 2 from twisted.trial import unittest 3 3 from allmydata import uri, client 4 4 from allmydata.monitor import Monitor 5 from allmydata.immutable.filenode import ImmutableFileNode, LiteralFileNode 5 from allmydata.immutable.literal import LiteralFileNode 6 from allmydata.immutable.filenode import ImmutableFileNode 6 7 from allmydata.mutable.filenode import MutableFileNode 7 from allmydata.util import hashutil , cachedir8 from allmydata.util import hashutil 8 9 from allmydata.util.consumer import download_to_data 9 10 10 11 class NotANode: … … class Node(unittest.TestCase): 30 31 needed_shares=3, 31 32 total_shares=10, 32 33 size=1000) 33 cf = cachedir.CacheFile("none") 34 fn1 = ImmutableFileNode(u, None, None, None, None, cf) 35 fn2 = ImmutableFileNode(u, None, None, None, None, cf) 34 fn1 = ImmutableFileNode(u, None, None, None, None) 35 fn2 = ImmutableFileNode(u, None, None, None, None) 36 36 self.failUnlessEqual(fn1, fn2) 37 37 self.failIfEqual(fn1, "I am not a filenode") 38 38 self.failIfEqual(fn1, NotANode()) -
src/allmydata/test/test_hung_server.py
diff --git a/src/allmydata/test/test_hung_server.py b/src/allmydata/test/test_hung_server.py index b1def16..8856ce2 100644
a b from allmydata.mutable.common import UnrecoverableFileError 10 10 from allmydata.storage.common import storage_index_to_dir 11 11 from allmydata.test.no_network import GridTestMixin 12 12 from allmydata.test.common import ShouldFailMixin, _corrupt_share_data 13 from allmydata.util.pollmixin import PollMixin 13 14 from allmydata.interfaces import NotEnoughSharesError 14 15 15 16 immutable_plaintext = "data" * 10000 16 17 mutable_plaintext = "muta" * 10000 17 18 18 class HungServerDownloadTest(GridTestMixin, ShouldFailMixin, unittest.TestCase): 19 class HungServerDownloadTest(GridTestMixin, ShouldFailMixin, PollMixin, 20 unittest.TestCase): 19 21 # Many of these tests take around 60 seconds on François's ARM buildslave: 20 22 # http://tahoe-lafs.org/buildbot/builders/FranXois%20lenny-armv5tel 21 23 # allmydata.test.test_hung_server.HungServerDownloadTest.test_2_good_8_broken_duplicate_share_fail once ERRORed after 197 seconds on Midnight Magic's NetBSD buildslave: … … class HungServerDownloadTest(GridTestMixin, ShouldFailMixin, unittest.TestCase): 36 38 for (id, ss) in servers: 37 39 self.g.unhang_server(id, **kwargs) 38 40 41 def _hang_shares(self, shnums, **kwargs): 42 # hang all servers who are holding the given shares 43 hung_serverids = set() 44 for (i_shnum, i_serverid, i_sharefile) in self.shares: 45 if i_shnum in shnums: 46 if i_serverid not in hung_serverids: 47 self.g.hang_server(i_serverid, **kwargs) 48 hung_serverids.add(i_serverid) 49 39 50 def _delete_all_shares_from(self, servers): 40 51 serverids = [id for (id, ss) in servers] 41 52 for (i_shnum, i_serverid, i_sharefile) in self.shares: … … class HungServerDownloadTest(GridTestMixin, ShouldFailMixin, unittest.TestCase): 113 124 stage_4_d = None # currently we aren't doing any tests which require this for mutable files 114 125 else: 115 126 d = download_to_data(n) 116 stage_4_d = n._downloader._all_downloads.keys()[0]._stage_4_d # too ugly! FIXME 127 #stage_4_d = n._downloader._all_downloads.keys()[0]._stage_4_d # too ugly! FIXME 128 stage_4_d = None 117 129 return (d, stage_4_d,) 118 130 119 131 def _wait_for_data(self, n): … … class HungServerDownloadTest(GridTestMixin, ShouldFailMixin, unittest.TestCase): 141 153 self._download_and_check) 142 154 else: 143 155 return self.shouldFail(NotEnoughSharesError, self.basedir, 144 " Failed to get enough shareholders",156 "ran out of shares", 145 157 self._download_and_check) 146 158 147 159 … … class HungServerDownloadTest(GridTestMixin, ShouldFailMixin, unittest.TestCase): 204 216 205 217 # The tests below do not currently pass for mutable files. 206 218 207 def test_3_good_7_hung (self):219 def test_3_good_7_hung_immutable(self): 208 220 d = defer.succeed(None) 209 for mutable in [False]: 210 d.addCallback(lambda ign: self._set_up(mutable, "test_3_good_7_hung")) 211 d.addCallback(lambda ign: self._hang(self.servers[3:])) 212 d.addCallback(lambda ign: self._download_and_check()) 221 d.addCallback(lambda ign: self._set_up(False, "test_3_good_7_hung")) 222 d.addCallback(lambda ign: self._hang(self.servers[3:])) 223 d.addCallback(lambda ign: self._download_and_check()) 213 224 return d 214 225 215 def test_2_good_8_hung_then_1_recovers(self): 226 def test_5_overdue_immutable(self): 227 # restrict the ShareFinder to only allow 5 outstanding requests, and 228 # arrange for the first 5 servers to hang. Then trigger the OVERDUE 229 # timers (simulating 10 seconds passed), at which point the 230 # ShareFinder should send additional queries and finish the download 231 # quickly. If we didn't have OVERDUE timers, this test would fail by 232 # timing out. 233 done = [] 234 d = self._set_up(False, "test_5_overdue_immutable") 235 def _reduce_max_outstanding_requests_and_download(ign): 236 self._hang_shares(range(5)) 237 n = self.c0.create_node_from_uri(self.uri) 238 self._sf = n._cnode._node._sharefinder 239 self._sf.max_outstanding_requests = 5 240 self._sf.OVERDUE_TIMEOUT = 1000.0 241 d2 = download_to_data(n) 242 # start download, but don't wait for it to complete yet 243 def _done(res): 244 done.append(res) # we will poll for this later 245 d2.addBoth(_done) 246 d.addCallback(_reduce_max_outstanding_requests_and_download) 247 from foolscap.eventual import fireEventually, flushEventualQueue 248 # wait here a while 249 d.addCallback(lambda res: fireEventually(res)) 250 d.addCallback(lambda res: flushEventualQueue()) 251 d.addCallback(lambda ign: self.failIf(done)) 252 def _check_waiting(ign): 253 # all the share requests should now be stuck waiting 254 self.failUnlessEqual(len(self._sf.pending_requests), 5) 255 # but none should be marked as OVERDUE until the timers expire 256 self.failUnlessEqual(len(self._sf.overdue_requests), 0) 257 d.addCallback(_check_waiting) 258 def _mark_overdue(ign): 259 # declare four requests overdue, allowing new requests to take 260 # their place, and leaving one stuck. The finder will keep 261 # sending requests until there are 5 non-overdue ones 262 # outstanding, at which point we'll have 4 OVERDUE, 1 263 # stuck-but-not-overdue, and 4 live requests. All 4 live requests 264 # will retire before the download is complete and the ShareFinder 265 # is shut off. That will leave 4 OVERDUE and 1 266 # stuck-but-not-overdue, for a total of 5 requests in in 267 # _sf.pending_requests 268 for t in self._sf.overdue_timers.values()[:4]: 269 t.reset(-1.0) 270 # the timers ought to fire before the eventual-send does 271 return fireEventually() 272 d.addCallback(_mark_overdue) 273 def _we_are_done(): 274 return bool(done) 275 d.addCallback(lambda ign: self.poll(_we_are_done)) 276 def _check_done(ign): 277 self.failUnlessEqual(done, [immutable_plaintext]) 278 self.failUnlessEqual(len(self._sf.pending_requests), 5) 279 self.failUnlessEqual(len(self._sf.overdue_requests), 4) 280 d.addCallback(_check_done) 281 return d 282 283 def test_3_good_7_hung_mutable(self): 284 raise unittest.SkipTest("still broken") 216 285 d = defer.succeed(None) 217 for mutable in [False]: 218 d.addCallback(lambda ign: self._set_up(mutable, "test_2_good_8_hung_then_1_recovers")) 219 d.addCallback(lambda ign: self._hang(self.servers[2:3])) 220 d.addCallback(lambda ign: self._hang(self.servers[3:])) 221 d.addCallback(lambda ign: self._unhang(self.servers[2:3])) 222 d.addCallback(lambda ign: self._download_and_check()) 286 d.addCallback(lambda ign: self._set_up(True, "test_3_good_7_hung")) 287 d.addCallback(lambda ign: self._hang(self.servers[3:])) 288 d.addCallback(lambda ign: self._download_and_check()) 223 289 return d 224 290 225 def test_2_good_8_hung_then_1_recovers_ with_2_shares(self):291 def test_2_good_8_hung_then_1_recovers_immutable(self): 226 292 d = defer.succeed(None) 227 for mutable in [False]: 228 d.addCallback(lambda ign: self._set_up(mutable, "test_2_good_8_hung_then_1_recovers_with_2_shares")) 229 d.addCallback(lambda ign: self._copy_all_shares_from(self.servers[0:1], self.servers[2])) 230 d.addCallback(lambda ign: self._hang(self.servers[2:3])) 231 d.addCallback(lambda ign: self._hang(self.servers[3:])) 232 d.addCallback(lambda ign: self._unhang(self.servers[2:3])) 233 d.addCallback(lambda ign: self._download_and_check()) 293 d.addCallback(lambda ign: self._set_up(False, "test_2_good_8_hung_then_1_recovers")) 294 d.addCallback(lambda ign: self._hang(self.servers[2:3])) 295 d.addCallback(lambda ign: self._hang(self.servers[3:])) 296 d.addCallback(lambda ign: self._unhang(self.servers[2:3])) 297 d.addCallback(lambda ign: self._download_and_check()) 298 return d 299 300 def test_2_good_8_hung_then_1_recovers_mutable(self): 301 raise unittest.SkipTest("still broken") 302 d = defer.succeed(None) 303 d.addCallback(lambda ign: self._set_up(True, "test_2_good_8_hung_then_1_recovers")) 304 d.addCallback(lambda ign: self._hang(self.servers[2:3])) 305 d.addCallback(lambda ign: self._hang(self.servers[3:])) 306 d.addCallback(lambda ign: self._unhang(self.servers[2:3])) 307 d.addCallback(lambda ign: self._download_and_check()) 234 308 return d 235 309 236 def test_failover_during_stage_4(self): 237 # See #287 310 def test_2_good_8_hung_then_1_recovers_with_2_shares_immutable(self): 238 311 d = defer.succeed(None) 239 for mutable in [False]: 240 d.addCallback(lambda ign: self._set_up(mutable, "test_failover_during_stage_4")) 241 d.addCallback(lambda ign: self._corrupt_all_shares_in(self.servers[2:3], _corrupt_share_data)) 242 d.addCallback(lambda ign: self._set_up(mutable, "test_failover_during_stage_4")) 243 d.addCallback(lambda ign: self._hang(self.servers[3:])) 244 d.addCallback(lambda ign: self._start_download()) 245 def _after_starting_download((doned, started4d)): 246 started4d.addCallback(lambda ign: self._unhang(self.servers[3:4])) 247 doned.addCallback(self._check) 248 return doned 249 d.addCallback(_after_starting_download) 312 d.addCallback(lambda ign: self._set_up(False, "test_2_good_8_hung_then_1_recovers_with_2_shares")) 313 d.addCallback(lambda ign: self._copy_all_shares_from(self.servers[0:1], self.servers[2])) 314 d.addCallback(lambda ign: self._hang(self.servers[2:3])) 315 d.addCallback(lambda ign: self._hang(self.servers[3:])) 316 d.addCallback(lambda ign: self._unhang(self.servers[2:3])) 317 d.addCallback(lambda ign: self._download_and_check()) 318 return d 250 319 320 def test_2_good_8_hung_then_1_recovers_with_2_shares_mutable(self): 321 raise unittest.SkipTest("still broken") 322 d = defer.succeed(None) 323 d.addCallback(lambda ign: self._set_up(True, "test_2_good_8_hung_then_1_recovers_with_2_shares")) 324 d.addCallback(lambda ign: self._copy_all_shares_from(self.servers[0:1], self.servers[2])) 325 d.addCallback(lambda ign: self._hang(self.servers[2:3])) 326 d.addCallback(lambda ign: self._hang(self.servers[3:])) 327 d.addCallback(lambda ign: self._unhang(self.servers[2:3])) 328 d.addCallback(lambda ign: self._download_and_check()) 251 329 return d -
src/allmydata/test/test_immutable.py
diff --git a/src/allmydata/test/test_immutable.py b/src/allmydata/test/test_immutable.py index a7eaa1d..813c5be 100644
a b from twisted.internet import defer 5 5 from twisted.trial import unittest 6 6 import random 7 7 8 class Test(common.ShareManglingMixin, unittest.TestCase):8 class Test(common.ShareManglingMixin, common.ShouldFailMixin, unittest.TestCase): 9 9 def test_test_code(self): 10 10 # The following process of stashing the shares, running 11 11 # replace_shares, and asserting that the new set of shares equals the … … class Test(common.ShareManglingMixin, unittest.TestCase): 18 18 return res 19 19 d.addCallback(_stash_it) 20 20 21 # The following process of deleting 8 of the shares and asserting that you can't 22 # download it is more to test this test code than to test the Tahoe code... 21 # The following process of deleting 8 of the shares and asserting 22 # that you can't download it is more to test this test code than to 23 # test the Tahoe code... 23 24 def _then_delete_8(unused=None): 24 25 self.replace_shares(stash[0], storage_index=self.uri.get_storage_index()) 25 26 for i in range(8): … … class Test(common.ShareManglingMixin, unittest.TestCase): 42 43 return d 43 44 44 45 def test_download(self): 45 """ Basic download. (This functionality is more or less already tested by test code in46 other modules, but this module is also going to test some more specific things about47 immutable download.)46 """ Basic download. (This functionality is more or less already 47 tested by test code in other modules, but this module is also going 48 to test some more specific things about immutable download.) 48 49 """ 49 50 d = defer.succeed(None) 50 51 before_download_reads = self._count_reads() 51 52 def _after_download(unused=None): 52 53 after_download_reads = self._count_reads() 53 self.failIf(after_download_reads-before_download_reads > 27, (after_download_reads, before_download_reads)) 54 #print before_download_reads, after_download_reads 55 self.failIf(after_download_reads-before_download_reads > 27, 56 (after_download_reads, before_download_reads)) 54 57 d.addCallback(self._download_and_check_plaintext) 55 58 d.addCallback(_after_download) 56 59 return d 57 60 58 61 def test_download_from_only_3_remaining_shares(self): 59 """ Test download after 7 random shares (of the 10) have been removed. """ 62 """ Test download after 7 random shares (of the 10) have been 63 removed.""" 60 64 d = defer.succeed(None) 61 65 def _then_delete_7(unused=None): 62 66 for i in range(7): … … class Test(common.ShareManglingMixin, unittest.TestCase): 65 69 d.addCallback(_then_delete_7) 66 70 def _after_download(unused=None): 67 71 after_download_reads = self._count_reads() 72 #print before_download_reads, after_download_reads 68 73 self.failIf(after_download_reads-before_download_reads > 27, (after_download_reads, before_download_reads)) 69 74 d.addCallback(self._download_and_check_plaintext) 70 75 d.addCallback(_after_download) 71 76 return d 72 77 73 78 def test_download_from_only_3_shares_with_good_crypttext_hash(self): 74 """ Test download after 7 random shares (of the 10) have had their crypttext hash tree corrupted. """ 79 """ Test download after 7 random shares (of the 10) have had their 80 crypttext hash tree corrupted.""" 75 81 d = defer.succeed(None) 76 82 def _then_corrupt_7(unused=None): 77 83 shnums = range(10) … … class Test(common.ShareManglingMixin, unittest.TestCase): 84 90 return d 85 91 86 92 def test_download_abort_if_too_many_missing_shares(self): 87 """ Test that download gives up quickly when it realizes there aren't enough shares out 88 there.""" 89 d = defer.succeed(None) 90 def _then_delete_8(unused=None): 91 for i in range(8): 92 self._delete_a_share() 93 d.addCallback(_then_delete_8) 94 95 before_download_reads = self._count_reads() 96 def _attempt_to_download(unused=None): 97 d2 = download_to_data(self.n) 98 99 def _callb(res): 100 self.fail("Should have gotten an error from attempt to download, not %r" % (res,)) 101 def _errb(f): 102 self.failUnless(f.check(NotEnoughSharesError)) 103 d2.addCallbacks(_callb, _errb) 104 return d2 105 106 d.addCallback(_attempt_to_download) 107 108 def _after_attempt(unused=None): 109 after_download_reads = self._count_reads() 110 # To pass this test, you are required to give up before actually trying to read any 111 # share data. 112 self.failIf(after_download_reads-before_download_reads > 0, (after_download_reads, before_download_reads)) 113 d.addCallback(_after_attempt) 93 """ Test that download gives up quickly when it realizes there aren't 94 enough shares out there.""" 95 for i in range(8): 96 self._delete_a_share() 97 d = self.shouldFail(NotEnoughSharesError, "delete 8", None, 98 download_to_data, self.n) 99 # the new downloader pipelines a bunch of read requests in parallel, 100 # so don't bother asserting anything about the number of reads 114 101 return d 115 102 116 103 def test_download_abort_if_too_many_corrupted_shares(self): 117 """ Test that download gives up quickly when it realizes there aren't enough uncorrupted 118 shares out there. It should be able to tell because the corruption occurs in the 119 sharedata version number, which it checks first.""" 104 """Test that download gives up quickly when it realizes there aren't 105 enough uncorrupted shares out there. It should be able to tell 106 because the corruption occurs in the sharedata version number, which 107 it checks first.""" 120 108 d = defer.succeed(None) 121 109 def _then_corrupt_8(unused=None): 122 110 shnums = range(10) … … class Test(common.ShareManglingMixin, unittest.TestCase): 140 128 141 129 def _after_attempt(unused=None): 142 130 after_download_reads = self._count_reads() 143 # To pass this test, you are required to give up before reading all of the share 144 # data. Actually, we could give up sooner than 45 reads, but currently our download 145 # code does 45 reads. This test then serves as a "performance regression detector" 146 # -- if you change download code so that it takes *more* reads, then this test will 147 # fail. 148 self.failIf(after_download_reads-before_download_reads > 45, (after_download_reads, before_download_reads)) 131 #print before_download_reads, after_download_reads 132 # To pass this test, you are required to give up before reading 133 # all of the share data. Actually, we could give up sooner than 134 # 45 reads, but currently our download code does 45 reads. This 135 # test then serves as a "performance regression detector" -- if 136 # you change download code so that it takes *more* reads, then 137 # this test will fail. 138 self.failIf(after_download_reads-before_download_reads > 45, 139 (after_download_reads, before_download_reads)) 149 140 d.addCallback(_after_attempt) 150 141 return d 151 142 152 143 153 # XXX extend these tests to show bad behavior of various kinds from servers: raising exception from each remove_foo() method, for example 144 # XXX extend these tests to show bad behavior of various kinds from servers: 145 # raising exception from each remove_foo() method, for example 154 146 155 147 # XXX test disconnect DeadReferenceError from get_buckets and get_block_whatsit 156 148 149 # TODO: delete this whole file -
src/allmydata/test/test_mutable.py
diff --git a/src/allmydata/test/test_mutable.py b/src/allmydata/test/test_mutable.py index 30d1083..021e196 100644
a b def make_nodemaker(s=None, num_peers=10): 197 197 keygen = client.KeyGenerator() 198 198 keygen.set_default_keysize(522) 199 199 nodemaker = NodeMaker(storage_broker, sh, None, 200 None, None, None,200 None, None, 201 201 {"k": 3, "n": 10}, keygen) 202 202 return nodemaker 203 203 -
src/allmydata/test/test_repairer.py
diff --git a/src/allmydata/test/test_repairer.py b/src/allmydata/test/test_repairer.py index 02264e4..bb30cc4 100644
a b from allmydata.test import common 3 3 from allmydata.monitor import Monitor 4 4 from allmydata import check_results 5 5 from allmydata.interfaces import NotEnoughSharesError 6 from allmydata.immutable import repairer,upload6 from allmydata.immutable import upload 7 7 from allmydata.util.consumer import download_to_data 8 8 from twisted.internet import defer 9 9 from twisted.trial import unittest … … WRITE_LEEWAY = 35 363 363 # Optimally, you could repair one of these (small) files in a single write. 364 364 DELTA_WRITES_PER_SHARE = 1 * WRITE_LEEWAY 365 365 366 class DownUpConnector(unittest.TestCase):367 def test_deferred_satisfaction(self):368 duc = repairer.DownUpConnector()369 duc.registerProducer(None, True) # just because you have to call registerProducer first370 # case 1: total data in buf is < requested data at time of request371 duc.write('\x01')372 d = duc.read_encrypted(2, False)373 def _then(data):374 self.failUnlessEqual(len(data), 2)375 self.failUnlessEqual(data[0], '\x01')376 self.failUnlessEqual(data[1], '\x02')377 d.addCallback(_then)378 duc.write('\x02')379 return d380 381 def test_extra(self):382 duc = repairer.DownUpConnector()383 duc.registerProducer(None, True) # just because you have to call registerProducer first384 # case 1: total data in buf is < requested data at time of request385 duc.write('\x01')386 d = duc.read_encrypted(2, False)387 def _then(data):388 self.failUnlessEqual(len(data), 2)389 self.failUnlessEqual(data[0], '\x01')390 self.failUnlessEqual(data[1], '\x02')391 d.addCallback(_then)392 duc.write('\x02\0x03')393 return d394 395 def test_short_reads_1(self):396 # You don't get fewer bytes than you requested -- instead you get no callback at all.397 duc = repairer.DownUpConnector()398 duc.registerProducer(None, True) # just because you have to call registerProducer first399 400 d = duc.read_encrypted(2, False)401 duc.write('\x04')402 403 def _callb(res):404 self.fail("Shouldn't have gotten this callback res: %s" % (res,))405 d.addCallback(_callb)406 407 # Also in the other order of read-vs-write:408 duc2 = repairer.DownUpConnector()409 duc2.registerProducer(None, True) # just because you have to call registerProducer first410 duc2.write('\x04')411 d = duc2.read_encrypted(2, False)412 413 def _callb2(res):414 self.fail("Shouldn't have gotten this callback res: %s" % (res,))415 d.addCallback(_callb2)416 417 # But once the DUC is closed then you *do* get short reads.418 duc3 = repairer.DownUpConnector()419 duc3.registerProducer(None, True) # just because you have to call registerProducer first420 421 d = duc3.read_encrypted(2, False)422 duc3.write('\x04')423 duc3.close()424 def _callb3(res):425 self.failUnlessEqual(len(res), 1)426 self.failUnlessEqual(res[0], '\x04')427 d.addCallback(_callb3)428 return d429 430 def test_short_reads_2(self):431 # Also in the other order of read-vs-write.432 duc = repairer.DownUpConnector()433 duc.registerProducer(None, True) # just because you have to call registerProducer first434 435 duc.write('\x04')436 d = duc.read_encrypted(2, False)437 duc.close()438 439 def _callb(res):440 self.failUnlessEqual(len(res), 1)441 self.failUnlessEqual(res[0], '\x04')442 d.addCallback(_callb)443 return d444 445 def test_short_reads_3(self):446 # Also if it is closed before the read.447 duc = repairer.DownUpConnector()448 duc.registerProducer(None, True) # just because you have to call registerProducer first449 450 duc.write('\x04')451 duc.close()452 d = duc.read_encrypted(2, False)453 def _callb(res):454 self.failUnlessEqual(len(res), 1)455 self.failUnlessEqual(res[0], '\x04')456 d.addCallback(_callb)457 return d458 459 366 class Repairer(GridTestMixin, unittest.TestCase, RepairTestMixin, 460 367 common.ShouldFailMixin): 461 368 -
src/allmydata/test/test_system.py
diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index 3351102..61662f2 100644
a b from allmydata import uri 9 9 from allmydata.storage.mutable import MutableShareFile 10 10 from allmydata.storage.server import si_a2b 11 11 from allmydata.immutable import offloaded, upload 12 from allmydata.immutable.filenode import ImmutableFileNode, LiteralFileNode 12 from allmydata.immutable.literal import LiteralFileNode 13 from allmydata.immutable.filenode import ImmutableFileNode 13 14 from allmydata.util import idlib, mathutil 14 15 from allmydata.util import log, base32 15 16 from allmydata.util.consumer import MemoryConsumer, download_to_data -
src/allmydata/test/test_upload.py
diff --git a/src/allmydata/test/test_upload.py b/src/allmydata/test/test_upload.py index 917472a..25d2d08 100644
a b class EncodingParameters(GridTestMixin, unittest.TestCase, SetDEPMixin, 2086 2086 # upload with exactly 75 peers (shares_of_happiness) 2087 2087 # have a download fail 2088 2088 # cancel a download (need to implement more cancel stuff) 2089 2090 # from test_encode: 2091 # NoNetworkGrid, upload part of ciphertext, kill server, continue upload 2092 # check with Kevan, they want to live in test_upload, existing tests might cover 2093 # def test_lost_one_shareholder(self): # these are upload-side tests 2094 # def test_lost_one_shareholder_early(self): 2095 # def test_lost_many_shareholders(self): 2096 # def test_lost_all_shareholders(self): -
src/allmydata/test/test_util.py
diff --git a/src/allmydata/test/test_util.py b/src/allmydata/test/test_util.py index 0a326b3..2fceee5 100644
a b from twisted.trial import unittest 7 7 from twisted.internet import defer, reactor 8 8 from twisted.python.failure import Failure 9 9 from twisted.python import log 10 from hashlib import md5 10 11 11 12 from allmydata.util import base32, idlib, humanreadable, mathutil, hashutil 12 13 from allmydata.util import assertutil, fileutil, deferredutil, abbreviate 13 14 from allmydata.util import limiter, time_format, pollmixin, cachedir 14 15 from allmydata.util import statistics, dictutil, pipeline 15 16 from allmydata.util import log as tahoe_log 17 from allmydata.util.spans import Spans, overlap, DataSpans 16 18 17 19 class Base32(unittest.TestCase): 18 20 def test_b2a_matches_Pythons(self): … … class Log(unittest.TestCase): 1537 1539 tahoe_log.err(format="intentional sample error", 1538 1540 failure=f, level=tahoe_log.OPERATIONAL, umid="wO9UoQ") 1539 1541 self.flushLoggedErrors(SampleError) 1542 1543 1544 class SimpleSpans: 1545 # this is a simple+inefficient form of util.spans.Spans . We compare the 1546 # behavior of this reference model against the real (efficient) form. 1547 1548 def __init__(self, _span_or_start=None, length=None): 1549 self._have = set() 1550 if length is not None: 1551 for i in range(_span_or_start, _span_or_start+length): 1552 self._have.add(i) 1553 elif _span_or_start: 1554 for (start,length) in _span_or_start: 1555 self.add(start, length) 1556 1557 def add(self, start, length): 1558 for i in range(start, start+length): 1559 self._have.add(i) 1560 return self 1561 1562 def remove(self, start, length): 1563 for i in range(start, start+length): 1564 self._have.discard(i) 1565 return self 1566 1567 def each(self): 1568 return sorted(self._have) 1569 1570 def __iter__(self): 1571 items = sorted(self._have) 1572 prevstart = None 1573 prevend = None 1574 for i in items: 1575 if prevstart is None: 1576 prevstart = prevend = i 1577 continue 1578 if i == prevend+1: 1579 prevend = i 1580 continue 1581 yield (prevstart, prevend-prevstart+1) 1582 prevstart = prevend = i 1583 if prevstart is not None: 1584 yield (prevstart, prevend-prevstart+1) 1585 1586 def __len__(self): 1587 # this also gets us bool(s) 1588 return len(self._have) 1589 1590 def __add__(self, other): 1591 s = self.__class__(self) 1592 for (start, length) in other: 1593 s.add(start, length) 1594 return s 1595 1596 def __sub__(self, other): 1597 s = self.__class__(self) 1598 for (start, length) in other: 1599 s.remove(start, length) 1600 return s 1601 1602 def __iadd__(self, other): 1603 for (start, length) in other: 1604 self.add(start, length) 1605 return self 1606 1607 def __isub__(self, other): 1608 for (start, length) in other: 1609 self.remove(start, length) 1610 return self 1611 1612 def __and__(self, other): 1613 s = self.__class__() 1614 for i in other.each(): 1615 if i in self._have: 1616 s.add(i, 1) 1617 return s 1618 1619 def __contains__(self, (start,length)): 1620 for i in range(start, start+length): 1621 if i not in self._have: 1622 return False 1623 return True 1624 1625 class ByteSpans(unittest.TestCase): 1626 def test_basic(self): 1627 s = Spans() 1628 self.failUnlessEqual(list(s), []) 1629 self.failIf(s) 1630 self.failIf((0,1) in s) 1631 self.failUnlessEqual(len(s), 0) 1632 1633 s1 = Spans(3, 4) # 3,4,5,6 1634 self._check1(s1) 1635 1636 s2 = Spans(s1) 1637 self._check1(s2) 1638 1639 s2.add(10,2) # 10,11 1640 self._check1(s1) 1641 self.failUnless((10,1) in s2) 1642 self.failIf((10,1) in s1) 1643 self.failUnlessEqual(list(s2.each()), [3,4,5,6,10,11]) 1644 self.failUnlessEqual(len(s2), 6) 1645 1646 s2.add(15,2).add(20,2) 1647 self.failUnlessEqual(list(s2.each()), [3,4,5,6,10,11,15,16,20,21]) 1648 self.failUnlessEqual(len(s2), 10) 1649 1650 s2.remove(4,3).remove(15,1) 1651 self.failUnlessEqual(list(s2.each()), [3,10,11,16,20,21]) 1652 self.failUnlessEqual(len(s2), 6) 1653 1654 s1 = SimpleSpans(3, 4) # 3 4 5 6 1655 s2 = SimpleSpans(5, 4) # 5 6 7 8 1656 i = s1 & s2 1657 self.failUnlessEqual(list(i.each()), [5, 6]) 1658 1659 def _check1(self, s): 1660 self.failUnlessEqual(list(s), [(3,4)]) 1661 self.failUnless(s) 1662 self.failUnlessEqual(len(s), 4) 1663 self.failIf((0,1) in s) 1664 self.failUnless((3,4) in s) 1665 self.failUnless((3,1) in s) 1666 self.failUnless((5,2) in s) 1667 self.failUnless((6,1) in s) 1668 self.failIf((6,2) in s) 1669 self.failIf((7,1) in s) 1670 self.failUnlessEqual(list(s.each()), [3,4,5,6]) 1671 1672 def test_math(self): 1673 s1 = Spans(0, 10) # 0,1,2,3,4,5,6,7,8,9 1674 s2 = Spans(5, 3) # 5,6,7 1675 s3 = Spans(8, 4) # 8,9,10,11 1676 1677 s = s1 - s2 1678 self.failUnlessEqual(list(s.each()), [0,1,2,3,4,8,9]) 1679 s = s1 - s3 1680 self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7]) 1681 s = s2 - s3 1682 self.failUnlessEqual(list(s.each()), [5,6,7]) 1683 s = s1 & s2 1684 self.failUnlessEqual(list(s.each()), [5,6,7]) 1685 s = s2 & s1 1686 self.failUnlessEqual(list(s.each()), [5,6,7]) 1687 s = s1 & s3 1688 self.failUnlessEqual(list(s.each()), [8,9]) 1689 s = s3 & s1 1690 self.failUnlessEqual(list(s.each()), [8,9]) 1691 s = s2 & s3 1692 self.failUnlessEqual(list(s.each()), []) 1693 s = s3 & s2 1694 self.failUnlessEqual(list(s.each()), []) 1695 s = Spans() & s3 1696 self.failUnlessEqual(list(s.each()), []) 1697 s = s3 & Spans() 1698 self.failUnlessEqual(list(s.each()), []) 1699 1700 s = s1 + s2 1701 self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7,8,9]) 1702 s = s1 + s3 1703 self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7,8,9,10,11]) 1704 s = s2 + s3 1705 self.failUnlessEqual(list(s.each()), [5,6,7,8,9,10,11]) 1706 1707 s = Spans(s1) 1708 s -= s2 1709 self.failUnlessEqual(list(s.each()), [0,1,2,3,4,8,9]) 1710 s = Spans(s1) 1711 s -= s3 1712 self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7]) 1713 s = Spans(s2) 1714 s -= s3 1715 self.failUnlessEqual(list(s.each()), [5,6,7]) 1716 1717 s = Spans(s1) 1718 s += s2 1719 self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7,8,9]) 1720 s = Spans(s1) 1721 s += s3 1722 self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7,8,9,10,11]) 1723 s = Spans(s2) 1724 s += s3 1725 self.failUnlessEqual(list(s.each()), [5,6,7,8,9,10,11]) 1726 1727 def test_random(self): 1728 # attempt to increase coverage of corner cases by comparing behavior 1729 # of a simple-but-slow model implementation against the 1730 # complex-but-fast actual implementation, in a large number of random 1731 # operations 1732 S1 = SimpleSpans 1733 S2 = Spans 1734 s1 = S1(); s2 = S2() 1735 seed = "" 1736 def _create(subseed): 1737 ns1 = S1(); ns2 = S2() 1738 for i in range(10): 1739 what = md5(subseed+str(i)).hexdigest() 1740 start = int(what[2:4], 16) 1741 length = max(1,int(what[5:6], 16)) 1742 ns1.add(start, length); ns2.add(start, length) 1743 return ns1, ns2 1744 1745 #print 1746 for i in range(1000): 1747 what = md5(seed+str(i)).hexdigest() 1748 op = what[0] 1749 subop = what[1] 1750 start = int(what[2:4], 16) 1751 length = max(1,int(what[5:6], 16)) 1752 #print what 1753 if op in "0": 1754 if subop in "01234": 1755 s1 = S1(); s2 = S2() 1756 elif subop in "5678": 1757 s1 = S1(start, length); s2 = S2(start, length) 1758 else: 1759 s1 = S1(s1); s2 = S2(s2) 1760 #print "s2 = %s" % s2.dump() 1761 elif op in "123": 1762 #print "s2.add(%d,%d)" % (start, length) 1763 s1.add(start, length); s2.add(start, length) 1764 elif op in "456": 1765 #print "s2.remove(%d,%d)" % (start, length) 1766 s1.remove(start, length); s2.remove(start, length) 1767 elif op in "78": 1768 ns1, ns2 = _create(what[7:11]) 1769 #print "s2 + %s" % ns2.dump() 1770 s1 = s1 + ns1; s2 = s2 + ns2 1771 elif op in "9a": 1772 ns1, ns2 = _create(what[7:11]) 1773 #print "%s - %s" % (s2.dump(), ns2.dump()) 1774 s1 = s1 - ns1; s2 = s2 - ns2 1775 elif op in "bc": 1776 ns1, ns2 = _create(what[7:11]) 1777 #print "s2 += %s" % ns2.dump() 1778 s1 += ns1; s2 += ns2 1779 elif op in "de": 1780 ns1, ns2 = _create(what[7:11]) 1781 #print "%s -= %s" % (s2.dump(), ns2.dump()) 1782 s1 -= ns1; s2 -= ns2 1783 else: 1784 ns1, ns2 = _create(what[7:11]) 1785 #print "%s &= %s" % (s2.dump(), ns2.dump()) 1786 s1 = s1 & ns1; s2 = s2 & ns2 1787 #print "s2 now %s" % s2.dump() 1788 self.failUnlessEqual(list(s1.each()), list(s2.each())) 1789 self.failUnlessEqual(len(s1), len(s2)) 1790 self.failUnlessEqual(bool(s1), bool(s2)) 1791 self.failUnlessEqual(list(s1), list(s2)) 1792 for j in range(10): 1793 what = md5(what[12:14]+str(j)).hexdigest() 1794 start = int(what[2:4], 16) 1795 length = max(1, int(what[5:6], 16)) 1796 span = (start, length) 1797 self.failUnlessEqual(bool(span in s1), bool(span in s2)) 1798 1799 1800 # s() 1801 # s(start,length) 1802 # s(s0) 1803 # s.add(start,length) : returns s 1804 # s.remove(start,length) 1805 # s.each() -> list of byte offsets, mostly for testing 1806 # list(s) -> list of (start,length) tuples, one per span 1807 # (start,length) in s -> True if (start..start+length-1) are all members 1808 # NOT equivalent to x in list(s) 1809 # len(s) -> number of bytes, for testing, bool(), and accounting/limiting 1810 # bool(s) (__len__) 1811 # s = s1+s2, s1-s2, +=s1, -=s1 1812 1813 def test_overlap(self): 1814 for a in range(20): 1815 for b in range(10): 1816 for c in range(20): 1817 for d in range(10): 1818 self._test_overlap(a,b,c,d) 1819 1820 def _test_overlap(self, a, b, c, d): 1821 s1 = set(range(a,a+b)) 1822 s2 = set(range(c,c+d)) 1823 #print "---" 1824 #self._show_overlap(s1, "1") 1825 #self._show_overlap(s2, "2") 1826 o = overlap(a,b,c,d) 1827 expected = s1.intersection(s2) 1828 if not expected: 1829 self.failUnlessEqual(o, None) 1830 else: 1831 start,length = o 1832 so = set(range(start,start+length)) 1833 #self._show(so, "o") 1834 self.failUnlessEqual(so, expected) 1835 1836 def _show_overlap(self, s, c): 1837 import sys 1838 out = sys.stdout 1839 if s: 1840 for i in range(max(s)): 1841 if i in s: 1842 out.write(c) 1843 else: 1844 out.write(" ") 1845 out.write("\n") 1846 1847 def extend(s, start, length, fill): 1848 if len(s) >= start+length: 1849 return s 1850 assert len(fill) == 1 1851 return s + fill*(start+length-len(s)) 1852 1853 def replace(s, start, data): 1854 assert len(s) >= start+len(data) 1855 return s[:start] + data + s[start+len(data):] 1856 1857 class SimpleDataSpans: 1858 def __init__(self, other=None): 1859 self.missing = "" # "1" where missing, "0" where found 1860 self.data = "" 1861 if other: 1862 for (start, data) in other.get_chunks(): 1863 self.add(start, data) 1864 1865 def __len__(self): 1866 return len(self.missing.translate(None, "1")) 1867 def _dump(self): 1868 return [i for (i,c) in enumerate(self.missing) if c == "0"] 1869 def _have(self, start, length): 1870 m = self.missing[start:start+length] 1871 if not m or len(m)<length or int(m): 1872 return False 1873 return True 1874 def get_chunks(self): 1875 for i in self._dump(): 1876 yield (i, self.data[i]) 1877 def get_spans(self): 1878 return SimpleSpans([(start,len(data)) 1879 for (start,data) in self.get_chunks()]) 1880 def get(self, start, length): 1881 if self._have(start, length): 1882 return self.data[start:start+length] 1883 return None 1884 def pop(self, start, length): 1885 data = self.get(start, length) 1886 if data: 1887 self.remove(start, length) 1888 return data 1889 def remove(self, start, length): 1890 self.missing = replace(extend(self.missing, start, length, "1"), 1891 start, "1"*length) 1892 def add(self, start, data): 1893 self.missing = replace(extend(self.missing, start, len(data), "1"), 1894 start, "0"*len(data)) 1895 self.data = replace(extend(self.data, start, len(data), " "), 1896 start, data) 1897 1898 1899 class StringSpans(unittest.TestCase): 1900 def do_basic(self, klass): 1901 ds = klass() 1902 self.failUnlessEqual(len(ds), 0) 1903 self.failUnlessEqual(list(ds._dump()), []) 1904 self.failUnlessEqual(sum([len(d) for (s,d) in ds.get_chunks()]), 0) 1905 s = ds.get_spans() 1906 self.failUnlessEqual(ds.get(0, 4), None) 1907 self.failUnlessEqual(ds.pop(0, 4), None) 1908 ds.remove(0, 4) 1909 1910 ds.add(2, "four") 1911 self.failUnlessEqual(len(ds), 4) 1912 self.failUnlessEqual(list(ds._dump()), [2,3,4,5]) 1913 self.failUnlessEqual(sum([len(d) for (s,d) in ds.get_chunks()]), 4) 1914 s = ds.get_spans() 1915 self.failUnless((2,2) in s) 1916 self.failUnlessEqual(ds.get(0, 4), None) 1917 self.failUnlessEqual(ds.pop(0, 4), None) 1918 self.failUnlessEqual(ds.get(4, 4), None) 1919 1920 ds2 = klass(ds) 1921 self.failUnlessEqual(len(ds2), 4) 1922 self.failUnlessEqual(list(ds2._dump()), [2,3,4,5]) 1923 self.failUnlessEqual(sum([len(d) for (s,d) in ds2.get_chunks()]), 4) 1924 self.failUnlessEqual(ds2.get(0, 4), None) 1925 self.failUnlessEqual(ds2.pop(0, 4), None) 1926 self.failUnlessEqual(ds2.pop(2, 3), "fou") 1927 self.failUnlessEqual(sum([len(d) for (s,d) in ds2.get_chunks()]), 1) 1928 self.failUnlessEqual(ds2.get(2, 3), None) 1929 self.failUnlessEqual(ds2.get(5, 1), "r") 1930 self.failUnlessEqual(ds.get(2, 3), "fou") 1931 self.failUnlessEqual(sum([len(d) for (s,d) in ds.get_chunks()]), 4) 1932 1933 ds.add(0, "23") 1934 self.failUnlessEqual(len(ds), 6) 1935 self.failUnlessEqual(list(ds._dump()), [0,1,2,3,4,5]) 1936 self.failUnlessEqual(sum([len(d) for (s,d) in ds.get_chunks()]), 6) 1937 self.failUnlessEqual(ds.get(0, 4), "23fo") 1938 self.failUnlessEqual(ds.pop(0, 4), "23fo") 1939 self.failUnlessEqual(sum([len(d) for (s,d) in ds.get_chunks()]), 2) 1940 self.failUnlessEqual(ds.get(0, 4), None) 1941 self.failUnlessEqual(ds.pop(0, 4), None) 1942 1943 ds = klass() 1944 ds.add(2, "four") 1945 ds.add(3, "ea") 1946 self.failUnlessEqual(ds.get(2, 4), "fear") 1947 1948 def do_scan(self, klass): 1949 # do a test with gaps and spans of size 1 and 2 1950 # left=(1,11) * right=(1,11) * gapsize=(1,2) 1951 # 111, 112, 121, 122, 211, 212, 221, 222 1952 # 211 1953 # 121 1954 # 112 1955 # 212 1956 # 222 1957 # 221 1958 # 111 1959 # 122 1960 # 11 1 1 11 11 11 1 1 111 1961 # 0123456789012345678901234567 1962 # abcdefghijklmnopqrstuvwxyz-= 1963 pieces = [(1, "bc"), 1964 (4, "e"), 1965 (7, "h"), 1966 (9, "jk"), 1967 (12, "mn"), 1968 (16, "qr"), 1969 (20, "u"), 1970 (22, "w"), 1971 (25, "z-="), 1972 ] 1973 p_elements = set([1,2,4,7,9,10,12,13,16,17,20,22,25,26,27]) 1974 S = "abcdefghijklmnopqrstuvwxyz-=" 1975 # TODO: when adding data, add capital letters, to make sure we aren't 1976 # just leaving the old data in place 1977 l = len(S) 1978 def base(): 1979 ds = klass() 1980 for start, data in pieces: 1981 ds.add(start, data) 1982 return ds 1983 def dump(s): 1984 p = set(s._dump()) 1985 # wow, this is the first time I've ever wanted ?: in python 1986 # note: this requires python2.5 1987 d = "".join([(S[i] if i in p else " ") for i in range(l)]) 1988 assert len(d) == l 1989 return d 1990 DEBUG = False 1991 for start in range(0, l): 1992 for end in range(start+1, l): 1993 # add [start-end) to the baseline 1994 which = "%d-%d" % (start, end-1) 1995 p_added = set(range(start, end)) 1996 b = base() 1997 if DEBUG: 1998 print 1999 print dump(b), which 2000 add = klass(); add.add(start, S[start:end]) 2001 print dump(add) 2002 b.add(start, S[start:end]) 2003 if DEBUG: 2004 print dump(b) 2005 # check that the new span is there 2006 d = b.get(start, end-start) 2007 self.failUnlessEqual(d, S[start:end], which) 2008 # check that all the original pieces are still there 2009 for t_start, t_data in pieces: 2010 t_len = len(t_data) 2011 self.failUnlessEqual(b.get(t_start, t_len), 2012 S[t_start:t_start+t_len], 2013 "%s %d+%d" % (which, t_start, t_len)) 2014 # check that a lot of subspans are mostly correct 2015 for t_start in range(l): 2016 for t_len in range(1,4): 2017 d = b.get(t_start, t_len) 2018 if d is not None: 2019 which2 = "%s+(%d-%d)" % (which, t_start, 2020 t_start+t_len-1) 2021 self.failUnlessEqual(d, S[t_start:t_start+t_len], 2022 which2) 2023 # check that removing a subspan gives the right value 2024 b2 = klass(b) 2025 b2.remove(t_start, t_len) 2026 removed = set(range(t_start, t_start+t_len)) 2027 for i in range(l): 2028 exp = (((i in p_elements) or (i in p_added)) 2029 and (i not in removed)) 2030 which2 = "%s-(%d-%d)" % (which, t_start, 2031 t_start+t_len-1) 2032 self.failUnlessEqual(bool(b2.get(i, 1)), exp, 2033 which2+" %d" % i) 2034 2035 def test_test(self): 2036 self.do_basic(SimpleDataSpans) 2037 self.do_scan(SimpleDataSpans) 2038 2039 def test_basic(self): 2040 self.do_basic(DataSpans) 2041 self.do_scan(DataSpans) 2042 2043 def test_random(self): 2044 # attempt to increase coverage of corner cases by comparing behavior 2045 # of a simple-but-slow model implementation against the 2046 # complex-but-fast actual implementation, in a large number of random 2047 # operations 2048 S1 = SimpleDataSpans 2049 S2 = DataSpans 2050 s1 = S1(); s2 = S2() 2051 seed = "" 2052 def _randstr(length, seed): 2053 created = 0 2054 pieces = [] 2055 while created < length: 2056 piece = md5(seed + str(created)).hexdigest() 2057 pieces.append(piece) 2058 created += len(piece) 2059 return "".join(pieces)[:length] 2060 def _create(subseed): 2061 ns1 = S1(); ns2 = S2() 2062 for i in range(10): 2063 what = md5(subseed+str(i)).hexdigest() 2064 start = int(what[2:4], 16) 2065 length = max(1,int(what[5:6], 16)) 2066 ns1.add(start, _randstr(length, what[7:9])); 2067 ns2.add(start, _randstr(length, what[7:9])) 2068 return ns1, ns2 2069 2070 #print 2071 for i in range(1000): 2072 what = md5(seed+str(i)).hexdigest() 2073 op = what[0] 2074 subop = what[1] 2075 start = int(what[2:4], 16) 2076 length = max(1,int(what[5:6], 16)) 2077 #print what 2078 if op in "0": 2079 if subop in "0123456": 2080 s1 = S1(); s2 = S2() 2081 else: 2082 s1, s2 = _create(what[7:11]) 2083 #print "s2 = %s" % list(s2._dump()) 2084 elif op in "123456": 2085 #print "s2.add(%d,%d)" % (start, length) 2086 s1.add(start, _randstr(length, what[7:9])); 2087 s2.add(start, _randstr(length, what[7:9])) 2088 elif op in "789abc": 2089 #print "s2.remove(%d,%d)" % (start, length) 2090 s1.remove(start, length); s2.remove(start, length) 2091 else: 2092 #print "s2.pop(%d,%d)" % (start, length) 2093 d1 = s1.pop(start, length); d2 = s2.pop(start, length) 2094 self.failUnlessEqual(d1, d2) 2095 #print "s1 now %s" % list(s1._dump()) 2096 #print "s2 now %s" % list(s2._dump()) 2097 self.failUnlessEqual(len(s1), len(s2)) 2098 self.failUnlessEqual(list(s1._dump()), list(s2._dump())) 2099 for j in range(100): 2100 what = md5(what[12:14]+str(j)).hexdigest() 2101 start = int(what[2:4], 16) 2102 length = max(1, int(what[5:6], 16)) 2103 d1 = s1.get(start, length); d2 = s2.get(start, length) 2104 self.failUnlessEqual(d1, d2, "%d+%d" % (start, length)) -
src/allmydata/test/test_web.py
diff --git a/src/allmydata/test/test_web.py b/src/allmydata/test/test_web.py index 3770864..a707722 100644
a b from nevow import rend 12 12 from allmydata import interfaces, uri, webish, dirnode 13 13 from allmydata.storage.shares import get_share_file 14 14 from allmydata.storage_client import StorageFarmBroker 15 from allmydata.immutable import upload, download 15 from allmydata.immutable import upload 16 from allmydata.immutable.downloader.status import DownloadStatus 16 17 from allmydata.dirnode import DirectoryNode 17 18 from allmydata.nodemaker import NodeMaker 18 19 from allmydata.unknown import UnknownNode … … class FakeUploader(service.Service): 75 76 76 77 class FakeHistory: 77 78 _all_upload_status = [upload.UploadStatus()] 78 _all_download_status = [ download.DownloadStatus()]79 _all_download_status = [DownloadStatus("storage_index", 1234)] 79 80 _all_mapupdate_statuses = [servermap.UpdateStatus()] 80 81 _all_publish_statuses = [publish.PublishStatus()] 81 82 _all_retrieve_statuses = [retrieve.RetrieveStatus()] … … class FakeClient(Client): 111 112 self.uploader = FakeUploader() 112 113 self.uploader.setServiceParent(self) 113 114 self.nodemaker = FakeNodeMaker(None, self._secret_holder, None, 114 self.uploader, None, None,115 self.uploader, None, 115 116 None, None) 116 117 117 118 def startService(self): … … class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi 4186 4187 "no servers were connected, but it might also indicate " 4187 4188 "severe corruption. You should perform a filecheck on " 4188 4189 "this object to learn more. The full error message is: " 4189 " Failed to get enough shareholders: have 0, need 3")4190 "no shares (need 3). Last failure: None") 4190 4191 self.failUnlessReallyEqual(exp, body) 4191 4192 d.addCallback(_check_zero_shares) 4192 4193 … … class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi 4198 4199 def _check_one_share(body): 4199 4200 self.failIf("<html>" in body, body) 4200 4201 body = " ".join(body.strip().split()) 4201 exp= ("NotEnoughSharesError: This indicates that some "4202 msg = ("NotEnoughSharesError: This indicates that some " 4202 4203 "servers were unavailable, or that shares have been " 4203 4204 "lost to server departure, hard drive failure, or disk " 4204 4205 "corruption. You should perform a filecheck on " 4205 4206 "this object to learn more. The full error message is:" 4206 " Failed to get enough shareholders: have 1, need 3") 4207 self.failUnlessReallyEqual(exp, body) 4207 " ran out of shares: %d complete, %d pending, 0 overdue," 4208 " 0 unused, need 3. Last failure: None") 4209 msg1 = msg % (1, 0) 4210 msg2 = msg % (0, 1) 4211 self.failUnless(body == msg1 or body == msg2, body) 4208 4212 d.addCallback(_check_one_share) 4209 4213 4210 4214 d.addCallback(lambda ignored: -
src/allmydata/util/observer.py
diff --git a/src/allmydata/util/observer.py b/src/allmydata/util/observer.py index 13e4b51..3dc1d27 100644
a b 1 1 # -*- test-case-name: allmydata.test.test_observer -*- 2 2 3 import weakref 3 4 from twisted.internet import defer 4 5 from foolscap.api import eventually 5 6 … … class ObserverList: 91 92 def notify(self, *args, **kwargs): 92 93 for o in self._watchers: 93 94 eventually(o, *args, **kwargs) 95 96 class EventStreamObserver: 97 """A simple class to distribute multiple events to a single subscriber. 98 It accepts arbitrary kwargs, but no posargs.""" 99 def __init__(self): 100 self._watcher = None 101 self._undelivered_results = [] 102 self._canceler = None 103 104 def set_canceler(self, c, methname): 105 """I will call c.METHNAME(self) when somebody cancels me.""" 106 # we use a weakref to avoid creating a cycle between us and the thing 107 # we're observing: they'll be holding a reference to us to compare 108 # against the value we pass to their canceler function. However, 109 # since bound methods are first-class objects (and not kept alive by 110 # the object they're bound to), we can't just stash a weakref to the 111 # bound cancel method. Instead, we must hold a weakref to the actual 112 # object, and obtain its cancel method later. 113 # http://code.activestate.com/recipes/81253-weakmethod/ has an 114 # alternative. 115 self._canceler = (weakref.ref(c), methname) 116 117 def subscribe(self, observer, **watcher_kwargs): 118 self._watcher = (observer, watcher_kwargs) 119 while self._undelivered_results: 120 self._notify(self._undelivered_results.pop(0)) 121 122 def notify(self, **result_kwargs): 123 if self._watcher: 124 self._notify(result_kwargs) 125 else: 126 self._undelivered_results.append(result_kwargs) 127 128 def _notify(self, result_kwargs): 129 o, watcher_kwargs = self._watcher 130 kwargs = dict(result_kwargs) 131 kwargs.update(watcher_kwargs) 132 eventually(o, **kwargs) 133 134 def cancel(self): 135 wr,methname = self._canceler 136 o = wr() 137 if o: 138 getattr(o,methname)(self) -
new file src/allmydata/util/spans.py
diff --git a/src/allmydata/util/spans.py b/src/allmydata/util/spans.py new file mode 100755 index 0000000..2a199f0
- + 1 2 class Spans: 3 """I represent a compressed list of booleans, one per index (an integer). 4 Typically, each index represents an offset into a large string, pointing 5 to a specific byte of a share. In this context, True means that byte has 6 been received, or has been requested. 7 8 Another way to look at this is maintaining a set of integers, optimized 9 for operations on spans like 'add range to set' and 'is range in set?'. 10 11 This is a python equivalent of perl's Set::IntSpan module, frequently 12 used to represent .newsrc contents. 13 14 Rather than storing an actual (large) list or dictionary, I represent my 15 internal state as a sorted list of spans, each with a start and a length. 16 My API is presented in terms of start+length pairs. I provide set 17 arithmetic operators, to efficiently answer questions like 'I want bytes 18 XYZ, I already requested bytes ABC, and I've already received bytes DEF: 19 what bytes should I request now?'. 20 21 The new downloader will use it to keep track of which bytes we've requested 22 or received already. 23 """ 24 25 def __init__(self, _span_or_start=None, length=None): 26 self._spans = list() 27 if length is not None: 28 self._spans.append( (_span_or_start, length) ) 29 elif _span_or_start: 30 for (start,length) in _span_or_start: 31 self.add(start, length) 32 self._check() 33 34 def _check(self): 35 assert sorted(self._spans) == self._spans 36 prev_end = None 37 try: 38 for (start,length) in self._spans: 39 if prev_end is not None: 40 assert start > prev_end 41 prev_end = start+length 42 except AssertionError: 43 print "BAD:", self.dump() 44 raise 45 46 def add(self, start, length): 47 assert start >= 0 48 assert length > 0 49 #print " ADD [%d+%d -%d) to %s" % (start, length, start+length, self.dump()) 50 first_overlap = last_overlap = None 51 for i,(s_start,s_length) in enumerate(self._spans): 52 #print " (%d+%d)-> overlap=%s adjacent=%s" % (s_start,s_length, overlap(s_start, s_length, start, length), adjacent(s_start, s_length, start, length)) 53 if (overlap(s_start, s_length, start, length) 54 or adjacent(s_start, s_length, start, length)): 55 last_overlap = i 56 if first_overlap is None: 57 first_overlap = i 58 continue 59 # no overlap 60 if first_overlap is not None: 61 break 62 #print " first_overlap", first_overlap, last_overlap 63 if first_overlap is None: 64 # no overlap, so just insert the span and sort by starting 65 # position. 66 self._spans.insert(0, (start,length)) 67 self._spans.sort() 68 else: 69 # everything from [first_overlap] to [last_overlap] overlapped 70 first_start,first_length = self._spans[first_overlap] 71 last_start,last_length = self._spans[last_overlap] 72 newspan_start = min(start, first_start) 73 newspan_end = max(start+length, last_start+last_length) 74 newspan_length = newspan_end - newspan_start 75 newspan = (newspan_start, newspan_length) 76 self._spans[first_overlap:last_overlap+1] = [newspan] 77 #print " ADD done: %s" % self.dump() 78 self._check() 79 80 return self 81 82 def remove(self, start, length): 83 assert start >= 0 84 assert length > 0 85 #print " REMOVE [%d+%d -%d) from %s" % (start, length, start+length, self.dump()) 86 first_complete_overlap = last_complete_overlap = None 87 for i,(s_start,s_length) in enumerate(self._spans): 88 s_end = s_start + s_length 89 o = overlap(s_start, s_length, start, length) 90 if o: 91 o_start, o_length = o 92 o_end = o_start+o_length 93 if o_start == s_start and o_end == s_end: 94 # delete this span altogether 95 if first_complete_overlap is None: 96 first_complete_overlap = i 97 last_complete_overlap = i 98 elif o_start == s_start: 99 # we only overlap the left side, so trim the start 100 # 1111 101 # rrrr 102 # oo 103 # -> 11 104 new_start = o_end 105 new_end = s_end 106 assert new_start > s_start 107 new_length = new_end - new_start 108 self._spans[i] = (new_start, new_length) 109 elif o_end == s_end: 110 # we only overlap the right side 111 # 1111 112 # rrrr 113 # oo 114 # -> 11 115 new_start = s_start 116 new_end = o_start 117 assert new_end < s_end 118 new_length = new_end - new_start 119 self._spans[i] = (new_start, new_length) 120 else: 121 # we overlap the middle, so create a new span. No need to 122 # examine any other spans. 123 # 111111 124 # rr 125 # LL RR 126 left_start = s_start 127 left_end = o_start 128 left_length = left_end - left_start 129 right_start = o_end 130 right_end = s_end 131 right_length = right_end - right_start 132 self._spans[i] = (left_start, left_length) 133 self._spans.append( (right_start, right_length) ) 134 self._spans.sort() 135 break 136 if first_complete_overlap is not None: 137 del self._spans[first_complete_overlap:last_complete_overlap+1] 138 #print " REMOVE done: %s" % self.dump() 139 self._check() 140 return self 141 142 def dump(self): 143 return "len=%d: %s" % (len(self), 144 ",".join(["[%d-%d]" % (start,start+l-1) 145 for (start,l) in self._spans]) ) 146 147 def each(self): 148 for start, length in self._spans: 149 for i in range(start, start+length): 150 yield i 151 152 def __iter__(self): 153 for s in self._spans: 154 yield s 155 156 def __len__(self): 157 # this also gets us bool(s) 158 return sum([length for start,length in self._spans]) 159 160 def __add__(self, other): 161 s = self.__class__(self) 162 for (start, length) in other: 163 s.add(start, length) 164 return s 165 166 def __sub__(self, other): 167 s = self.__class__(self) 168 for (start, length) in other: 169 s.remove(start, length) 170 return s 171 172 def __iadd__(self, other): 173 for (start, length) in other: 174 self.add(start, length) 175 return self 176 177 def __isub__(self, other): 178 for (start, length) in other: 179 self.remove(start, length) 180 return self 181 182 def __and__(self, other): 183 if not self._spans: 184 return self.__class__() 185 bounds = self.__class__(self._spans[0][0], 186 self._spans[-1][0]+self._spans[-1][1]) 187 not_other = bounds - other 188 return self - not_other 189 190 def __contains__(self, (start,length)): 191 for span_start,span_length in self._spans: 192 o = overlap(start, length, span_start, span_length) 193 if o: 194 o_start,o_length = o 195 if o_start == start and o_length == length: 196 return True 197 return False 198 199 def overlap(start0, length0, start1, length1): 200 # return start2,length2 of the overlapping region, or None 201 # 00 00 000 0000 00 00 000 00 00 00 00 202 # 11 11 11 11 111 11 11 1111 111 11 11 203 left = max(start0, start1) 204 right = min(start0+length0, start1+length1) 205 # if there is overlap, 'left' will be its start, and right-1 will 206 # be the end' 207 if left < right: 208 return (left, right-left) 209 return None 210 211 def adjacent(start0, length0, start1, length1): 212 if (start0 < start1) and start0+length0 == start1: 213 return True 214 elif (start1 < start0) and start1+length1 == start0: 215 return True 216 return False 217 218 class DataSpans: 219 """I represent portions of a large string. Equivalently, I can be said to 220 maintain a large array of characters (with gaps of empty elements). I can 221 be used to manage access to a remote share, where some pieces have been 222 retrieved, some have been requested, and others have not been read. 223 """ 224 225 def __init__(self, other=None): 226 self.spans = [] # (start, data) tuples, non-overlapping, merged 227 if other: 228 for (start, data) in other.get_chunks(): 229 self.add(start, data) 230 231 def __len__(self): 232 # return number of bytes we're holding 233 return sum([len(data) for (start,data) in self.spans]) 234 235 def _dump(self): 236 # return iterator of sorted list of offsets, one per byte 237 for (start,data) in self.spans: 238 for i in range(start, start+len(data)): 239 yield i 240 241 def dump(self): 242 return "len=%d: %s" % (len(self), 243 ",".join(["[%d-%d]" % (start,start+len(data)-1) 244 for (start,data) in self.spans]) ) 245 246 def get_chunks(self): 247 return list(self.spans) 248 249 def get_spans(self): 250 """Return a Spans object with a bit set for each byte I hold""" 251 return Spans([(start, len(data)) for (start,data) in self.spans]) 252 253 def assert_invariants(self): 254 if not self.spans: 255 return 256 prev_start = self.spans[0][0] 257 prev_end = prev_start + len(self.spans[0][1]) 258 for start, data in self.spans[1:]: 259 if not start > prev_end: 260 # adjacent or overlapping: bad 261 print "ASSERTION FAILED", self.spans 262 raise AssertionError 263 264 def get(self, start, length): 265 # returns a string of LENGTH, or None 266 #print "get", start, length, self.spans 267 end = start+length 268 for (s_start,s_data) in self.spans: 269 s_end = s_start+len(s_data) 270 #print " ",s_start,s_end 271 if s_start <= start < s_end: 272 # we want some data from this span. Because we maintain 273 # strictly merged and non-overlapping spans, everything we 274 # want must be in this span. 275 offset = start - s_start 276 if offset + length > len(s_data): 277 #print " None, span falls short" 278 return None # span falls short 279 #print " some", s_data[offset:offset+length] 280 return s_data[offset:offset+length] 281 if s_start >= end: 282 # we've gone too far: no further spans will overlap 283 #print " None, gone too far" 284 return None 285 #print " None, ran out of spans" 286 return None 287 288 def add(self, start, data): 289 # first: walk through existing spans, find overlap, modify-in-place 290 # create list of new spans 291 # add new spans 292 # sort 293 # merge adjacent spans 294 #print "add", start, data, self.spans 295 end = start + len(data) 296 i = 0 297 while len(data): 298 #print " loop", start, data, i, len(self.spans), self.spans 299 if i >= len(self.spans): 300 #print " append and done" 301 # append a last span 302 self.spans.append( (start, data) ) 303 break 304 (s_start,s_data) = self.spans[i] 305 # five basic cases: 306 # a: OLD b:OLDD c1:OLD c2:OLD d1:OLDD d2:OLD e: OLLDD 307 # NEW NEW NEW NEWW NEW NEW NEW 308 # 309 # we handle A by inserting a new segment (with "N") and looping, 310 # turning it into B or C. We handle B by replacing a prefix and 311 # terminating. We handle C (both c1 and c2) by replacing the 312 # segment (and, for c2, looping, turning it into A). We handle D 313 # by replacing a suffix (and, for d2, looping, turning it into 314 # A). We handle E by replacing the middle and terminating. 315 if start < s_start: 316 # case A: insert a new span, then loop with the remainder 317 #print " insert new psan" 318 s_len = s_start-start 319 self.spans.insert(i, (start, data[:s_len])) 320 i += 1 321 start = s_start 322 data = data[s_len:] 323 continue 324 s_len = len(s_data) 325 s_end = s_start+s_len 326 if s_start <= start < s_end: 327 #print " modify this span", s_start, start, s_end 328 # we want to modify some data in this span: a prefix, a 329 # suffix, or the whole thing 330 if s_start == start: 331 if s_end <= end: 332 #print " replace whole segment" 333 # case C: replace this segment 334 self.spans[i] = (s_start, data[:s_len]) 335 i += 1 336 start += s_len 337 data = data[s_len:] 338 # C2 is where len(data)>0 339 continue 340 # case B: modify the prefix, retain the suffix 341 #print " modify prefix" 342 self.spans[i] = (s_start, data + s_data[len(data):]) 343 break 344 if start > s_start and end < s_end: 345 # case E: modify the middle 346 #print " modify middle" 347 prefix_len = start - s_start # we retain this much 348 suffix_len = s_end - end # and retain this much 349 newdata = s_data[:prefix_len] + data + s_data[-suffix_len:] 350 self.spans[i] = (s_start, newdata) 351 break 352 # case D: retain the prefix, modify the suffix 353 #print " modify suffix" 354 prefix_len = start - s_start # we retain this much 355 suffix_len = s_len - prefix_len # we replace this much 356 #print " ", s_data, prefix_len, suffix_len, s_len, data 357 self.spans[i] = (s_start, 358 s_data[:prefix_len] + data[:suffix_len]) 359 i += 1 360 start += suffix_len 361 data = data[suffix_len:] 362 #print " now", start, data 363 # D2 is where len(data)>0 364 continue 365 # else we're not there yet 366 #print " still looking" 367 i += 1 368 continue 369 # now merge adjacent spans 370 #print " merging", self.spans 371 newspans = [] 372 for (s_start,s_data) in self.spans: 373 if newspans and adjacent(newspans[-1][0], len(newspans[-1][1]), 374 s_start, len(s_data)): 375 newspans[-1] = (newspans[-1][0], newspans[-1][1] + s_data) 376 else: 377 newspans.append( (s_start, s_data) ) 378 self.spans = newspans 379 self.assert_invariants() 380 #print " done", self.spans 381 382 def remove(self, start, length): 383 i = 0 384 end = start + length 385 #print "remove", start, length, self.spans 386 while i < len(self.spans): 387 (s_start,s_data) = self.spans[i] 388 if s_start >= end: 389 # this segment is entirely right of the removed region, and 390 # all further segments are even further right. We're done. 391 break 392 s_len = len(s_data) 393 s_end = s_start + s_len 394 o = overlap(start, length, s_start, s_len) 395 if not o: 396 i += 1 397 continue 398 o_start, o_len = o 399 o_end = o_start + o_len 400 if o_len == s_len: 401 # remove the whole segment 402 del self.spans[i] 403 continue 404 if o_start == s_start: 405 # remove a prefix, leaving the suffix from o_end to s_end 406 prefix_len = o_end - o_start 407 self.spans[i] = (o_end, s_data[prefix_len:]) 408 i += 1 409 continue 410 elif o_end == s_end: 411 # remove a suffix, leaving the prefix from s_start to o_start 412 prefix_len = o_start - s_start 413 self.spans[i] = (s_start, s_data[:prefix_len]) 414 i += 1 415 continue 416 # remove the middle, creating a new segment 417 # left is s_start:o_start, right is o_end:s_end 418 left_len = o_start - s_start 419 left = s_data[:left_len] 420 right_len = s_end - o_end 421 right = s_data[-right_len:] 422 self.spans[i] = (s_start, left) 423 self.spans.insert(i+1, (o_end, right)) 424 break 425 #print " done", self.spans 426 427 def pop(self, start, length): 428 data = self.get(start, length) 429 if data: 430 self.remove(start, length) 431 return data -
src/allmydata/web/download-status.xhtml
diff --git a/src/allmydata/web/download-status.xhtml b/src/allmydata/web/download-status.xhtml index 77342ba..30abfca 100644
a b 18 18 <li>Status: <span n:render="status"/></li> 19 19 </ul> 20 20 21 <div n:render="events"></div> 21 22 22 23 <div n:render="results"> 23 24 <h2>Download Results</h2> -
src/allmydata/web/status.py
diff --git a/src/allmydata/web/status.py b/src/allmydata/web/status.py index e4241a3..c3a55d7 100644
a b class DownloadStatusPage(DownloadResultsRendererMixin, rend.Page): 358 358 def download_results(self): 359 359 return defer.maybeDeferred(self.download_status.get_results) 360 360 361 def relative_time(self, t): 362 if t is None: 363 return t 364 if self.download_status.started is not None: 365 return t - self.download_status.started 366 return t 367 def short_relative_time(self, t): 368 t = self.relative_time(t) 369 if t is None: 370 return "" 371 return "+%.6fs" % t 372 373 def renderHTTP(self, ctx): 374 req = inevow.IRequest(ctx) 375 t = get_arg(req, "t") 376 if t == "json": 377 return self.json(req) 378 return rend.Page.renderHTTP(self, ctx) 379 380 def json(self, req): 381 req.setHeader("content-type", "text/plain") 382 data = {} 383 dyhb_events = [] 384 for serverid,requests in self.download_status.dyhb_requests.iteritems(): 385 for req in requests: 386 dyhb_events.append( (base32.b2a(serverid),) + req ) 387 dyhb_events.sort(key=lambda req: req[1]) 388 data["dyhb"] = dyhb_events 389 request_events = [] 390 for serverid,requests in self.download_status.requests.iteritems(): 391 for req in requests: 392 request_events.append( (base32.b2a(serverid),) + req ) 393 request_events.sort(key=lambda req: (req[4],req[1])) 394 data["requests"] = request_events 395 data["segment"] = self.download_status.segment_events 396 data["read"] = self.download_status.read_events 397 return simplejson.dumps(data, indent=1) + "\n" 398 399 def render_events(self, ctx, data): 400 if not self.download_status.storage_index: 401 return 402 srt = self.short_relative_time 403 l = T.ul() 404 405 t = T.table(class_="status-download-events") 406 t[T.tr[T.td["serverid"], T.td["sent"], T.td["received"], 407 T.td["shnums"], T.td["RTT"]]] 408 dyhb_events = [] 409 for serverid,requests in self.download_status.dyhb_requests.iteritems(): 410 for req in requests: 411 dyhb_events.append( (serverid,) + req ) 412 dyhb_events.sort(key=lambda req: req[1]) 413 for d_ev in dyhb_events: 414 (serverid, sent, shnums, received) = d_ev 415 serverid_s = idlib.shortnodeid_b2a(serverid) 416 rtt = received - sent 417 t[T.tr(style="background: %s" % self.color(serverid))[ 418 [T.td[serverid_s], T.td[srt(sent)], T.td[srt(received)], 419 T.td[",".join([str(shnum) for shnum in shnums])], 420 T.td[self.render_time(None, rtt)], 421 ]]] 422 l["DYHB Requests:", t] 423 424 t = T.table(class_="status-download-events") 425 t[T.tr[T.td["range"], T.td["start"], T.td["finish"], T.td["got"], 426 T.td["time"], T.td["decrypttime"], T.td["pausedtime"], 427 T.td["speed"]]] 428 for r_ev in self.download_status.read_events: 429 (start, length, requesttime, finishtime, bytes, decrypt, paused) = r_ev 430 print r_ev 431 if finishtime is not None: 432 rtt = finishtime - requesttime - paused 433 speed = self.render_rate(None, 1.0 * bytes / rtt) 434 rtt = self.render_time(None, rtt) 435 decrypt = self.render_time(None, decrypt) 436 paused = self.render_time(None, paused) 437 else: 438 speed, rtt, decrypt, paused = "","","","" 439 t[T.tr[T.td["[%d:+%d]" % (start, length)], 440 T.td[srt(requesttime)], T.td[srt(finishtime)], 441 T.td[bytes], T.td[rtt], T.td[decrypt], T.td[paused], 442 T.td[speed], 443 ]] 444 l["Read Events:", t] 445 446 t = T.table(class_="status-download-events") 447 t[T.tr[T.td["type"], T.td["segnum"], T.td["when"], T.td["range"], 448 T.td["decodetime"], T.td["segtime"], T.td["speed"]]] 449 reqtime = (None, None) 450 for s_ev in self.download_status.segment_events: 451 (etype, segnum, when, segstart, seglen, decodetime) = s_ev 452 if etype == "request": 453 t[T.tr[T.td["request"], T.td["seg%d" % segnum], 454 T.td[srt(when)]]] 455 reqtime = (segnum, when) 456 elif etype == "delivery": 457 if reqtime[0] == segnum: 458 segtime = when - reqtime[1] 459 speed = self.render_rate(None, 1.0 * seglen / segtime) 460 segtime = self.render_time(None, segtime) 461 else: 462 segtime, speed = "", "" 463 t[T.tr[T.td["delivery"], T.td["seg%d" % segnum], 464 T.td[srt(when)], 465 T.td["[%d:+%d]" % (segstart, seglen)], 466 T.td[self.render_time(None,decodetime)], 467 T.td[segtime], T.td[speed]]] 468 elif etype == "error": 469 t[T.tr[T.td["error"], T.td["seg%d" % segnum]]] 470 l["Segment Events:", t] 471 472 t = T.table(border="1") 473 t[T.tr[T.td["serverid"], T.td["shnum"], T.td["range"], 474 T.td["txtime"], T.td["rxtime"], T.td["received"], T.td["RTT"]]] 475 reqtime = (None, None) 476 request_events = [] 477 for serverid,requests in self.download_status.requests.iteritems(): 478 for req in requests: 479 request_events.append( (serverid,) + req ) 480 request_events.sort(key=lambda req: (req[4],req[1])) 481 for r_ev in request_events: 482 (peerid, shnum, start, length, sent, receivedlen, received) = r_ev 483 rtt = None 484 if received is not None: 485 rtt = received - sent 486 peerid_s = idlib.shortnodeid_b2a(peerid) 487 t[T.tr(style="background: %s" % self.color(peerid))[ 488 T.td[peerid_s], T.td[shnum], 489 T.td["[%d:+%d]" % (start, length)], 490 T.td[srt(sent)], T.td[srt(received)], T.td[receivedlen], 491 T.td[self.render_time(None, rtt)], 492 ]] 493 l["Requests:", t] 494 495 return l 496 497 def color(self, peerid): 498 def m(c): 499 return min(ord(c) / 2 + 0x80, 0xff) 500 return "#%02x%02x%02x" % (m(peerid[0]), m(peerid[1]), m(peerid[2])) 501 361 502 def render_results(self, ctx, data): 362 503 d = self.download_results() 363 504 def _got_results(results): … … class DownloadStatusPage(DownloadResultsRendererMixin, rend.Page): 371 512 TIME_FORMAT = "%H:%M:%S %d-%b-%Y" 372 513 started_s = time.strftime(TIME_FORMAT, 373 514 time.localtime(data.get_started())) 374 return started_s 515 return started_s + " (%s)" % data.get_started() 375 516 376 517 def render_si(self, ctx, data): 377 518 si_s = base32.b2a_or_none(data.get_storage_index()) -
src/allmydata/web/tahoe.css
diff --git a/src/allmydata/web/tahoe.css b/src/allmydata/web/tahoe.css index a9aced6..0ed83fc 100644
a b table.tahoe-directory { 135 135 display: inline; 136 136 text-align: center; 137 137 padding: 0 1em; 138 } 139 No newline at end of file 138 } 139 140 /* recent upload/download status pages */ 141 142 table.status-download-events { 143 border: 1px solid #aaa; 144 } 145 table.status-download-events td { 146 border: 1px solid #a00; 147 padding: 2px 148 }