Ticket #798: new-downloader-v10.diff

File new-downloader-v10.diff, 379.2 KB (added by warner, at 2010-07-26T02:26:52Z)

added OVERDUE for get_buckets calls, reenabled some "hung server" tests

  • Makefile

    diff --git a/Makefile b/Makefile
    index 3e4be60..723d656 100644
    a b quicktest: 
    125125# quicktest-coverage" to do a unit test run with coverage-gathering enabled,
    126126# then use "make coverate-output-text" for a brief report, or "make
    127127# coverage-output" for a pretty HTML report. Also see "make .coverage.el" and
    128 # misc/coding_helpers/coverage.el for emacs integration.
     128# misc/coding_tools/coverage.el for emacs integration.
    129129
    130130quicktest-coverage:
    131131        rm -f .coverage
    quicktest-coverage: 
    134134
    135135coverage-output:
    136136        rm -rf coverage-html
    137         coverage html -d coverage-html
     137        coverage html -i -d coverage-html $(COVERAGE_OMIT)
    138138        cp .coverage coverage-html/coverage.data
    139139        @echo "now point your browser at coverage-html/index.html"
    140140
    coverage-output: 
    154154.PHONY: repl test-darcs-boringfile test-clean clean find-trailing-spaces
    155155
    156156.coverage.el: .coverage
    157         $(PYTHON) misc/coding_helpers/coverage2el.py
     157        $(PYTHON) misc/coding_tools/coverage2el.py
    158158
    159159# 'upload-coverage' is meant to be run with an UPLOAD_TARGET=host:/dir setting
    160160ifdef UPLOAD_TARGET
    endif 
    178178
    179179pyflakes:
    180180        $(PYTHON) -OOu `which pyflakes` src/allmydata |sort |uniq
     181check-umids:
     182        $(PYTHON) misc/coding_tools/check-umids.py `find src/allmydata -name '*.py'`
    181183
    182184count-lines:
    183185        @echo -n "files: "
  • new file misc/coding_tools/check-umids.py

    diff --git a/misc/coding_tools/check-umids.py b/misc/coding_tools/check-umids.py
    new file mode 100755
    index 0000000..05e8825
    - +  
     1#! /usr/bin/python
     2
     3# ./rumid.py foo.py
     4
     5import sys, re, os
     6
     7ok = True
     8umids = {}
     9
     10for fn in sys.argv[1:]:
     11    fn = os.path.abspath(fn)
     12    for lineno,line in enumerate(open(fn, "r").readlines()):
     13        lineno = lineno+1
     14        if "umid" not in line:
     15            continue
     16        mo = re.search("umid=[\"\']([^\"\']+)[\"\']", line)
     17        if mo:
     18            umid = mo.group(1)
     19            if umid in umids:
     20                oldfn, oldlineno = umids[umid]
     21                print "%s:%d: duplicate umid '%s'" % (fn, lineno, umid)
     22                print "%s:%d: first used here" % (oldfn, oldlineno)
     23                ok = False
     24            umids[umid] = (fn,lineno)
     25
     26if ok:
     27    print "all umids are unique"
     28else:
     29    print "some umids were duplicates"
     30    sys.exit(1)
  • misc/coding_tools/coverage.el

    diff --git a/misc/coding_tools/coverage.el b/misc/coding_tools/coverage.el
    index bad490f..8d69d5d 100644
    a b  
    8484                           'face '(:box "red")
    8585                           )
    8686              )
    87             (message "Added annotations")
     87            (message (format "Added annotations: %d uncovered lines"
     88                             (safe-length uncovered-code-lines)))
    8889            )
    8990          )
    9091      (message "unable to find coverage for this file"))
  • misc/coding_tools/coverage2el.py

    diff --git a/misc/coding_tools/coverage2el.py b/misc/coding_tools/coverage2el.py
    index ed94bd0..7d03a27 100644
    a b  
    11
    2 from coverage import coverage, summary
     2from coverage import coverage, summary, misc
    33
    44class ElispReporter(summary.SummaryReporter):
    55    def report(self):
    class ElispReporter(summary.SummaryReporter): 
    2121        out.write("(let ((results (make-hash-table :test 'equal)))\n")
    2222        for cu in self.code_units:
    2323            f = cu.filename
    24             (fn, executable, missing, mf) = self.coverage.analysis(cu)
     24            try:
     25                (fn, executable, missing, mf) = self.coverage.analysis(cu)
     26            except misc.NoSource:
     27                continue
    2528            code_linenumbers = executable
    2629            uncovered_code = missing
    2730            covered_linenumbers = sorted(set(executable) - set(missing))
  • misc/simulators/sizes.py

    diff --git a/misc/simulators/sizes.py b/misc/simulators/sizes.py
    index d9c230a..7910946 100644
    a b class Sizes: 
    6060            self.block_arity = 0
    6161            self.block_tree_depth = 0
    6262            self.block_overhead = 0
    63             self.bytes_until_some_data = 20 + share_size
     63            self.bytes_until_some_data = 32 + share_size
    6464            self.share_storage_overhead = 0
    6565            self.share_transmission_overhead = 0
    6666
    6767        elif mode == "beta":
    6868            # k=num_blocks, d=1
    69             # each block has a 20-byte hash
     69            # each block has a 32-byte hash
    7070            self.block_arity = num_blocks
    7171            self.block_tree_depth = 1
    72             self.block_overhead = 20
     72            self.block_overhead = 32
    7373            # the share has a list of hashes, one for each block
    7474            self.share_storage_overhead = (self.block_overhead *
    7575                                           num_blocks)
    7676            # we can get away with not sending the hash of the share that
    7777            # we're sending in full, once
    78             self.share_transmission_overhead = self.share_storage_overhead - 20
     78            self.share_transmission_overhead = self.share_storage_overhead - 32
    7979            # we must get the whole list (so it can be validated) before
    8080            # any data can be validated
    8181            self.bytes_until_some_data = (self.share_transmission_overhead +
    class Sizes: 
    8989            # to make things easier, we make the pessimistic assumption that
    9090            # we have to store hashes for all the empty places in the tree
    9191            # (when the number of shares is not an exact exponent of k)
    92             self.block_overhead = 20
     92            self.block_overhead = 32
    9393            # the block hashes are organized into a k-ary tree, which
    9494            # means storing (and eventually transmitting) more hashes. This
    9595            # count includes all the low-level share hashes and the root.
    class Sizes: 
    9898            #print "num_leaves", num_leaves
    9999            #print "hash_nodes", hash_nodes
    100100            # the storage overhead is this
    101             self.share_storage_overhead = 20 * (hash_nodes - 1)
     101            self.share_storage_overhead = 32 * (hash_nodes - 1)
    102102            # the transmission overhead is smaller: if we actually transmit
    103103            # every block, we don't have to transmit 1/k of the
    104104            # lowest-level block hashes, and we don't have to transmit the
    105105            # root because it was already sent with the share-level hash tree
    106             self.share_transmission_overhead = 20 * (hash_nodes
     106            self.share_transmission_overhead = 32 * (hash_nodes
    107107                                                     - 1 # the root
    108108                                                     - num_leaves / k)
    109109            # we must get a full sibling hash chain before we can validate
    110110            # any data
    111111            sibling_length = d * (k-1)
    112             self.bytes_until_some_data = 20 * sibling_length + block_size
     112            self.bytes_until_some_data = 32 * sibling_length + block_size
    113113           
    114114           
    115115
  • misc/simulators/storage-overhead.py

    diff --git a/misc/simulators/storage-overhead.py b/misc/simulators/storage-overhead.py
    index 75a0bf6..a294b8d 100644
    a b  
    11#!/usr/bin/env python
    22
    33import sys, math
    4 from allmydata import upload, uri, encode, storage
     4from allmydata import uri, storage
     5from allmydata.immutable import upload
     6from allmydata.interfaces import DEFAULT_MAX_SEGMENT_SIZE
    57from allmydata.util import mathutil
    68
    79def roundup(size, blocksize=4096):
    class BigFakeString: 
    2224    def tell(self):
    2325        return self.fp
    2426
    25 def calc(filesize, params=(3,7,10), segsize=encode.Encoder.MAX_SEGMENT_SIZE):
     27def calc(filesize, params=(3,7,10), segsize=DEFAULT_MAX_SEGMENT_SIZE):
    2628    num_shares = params[2]
    2729    if filesize <= upload.Uploader.URI_LIT_SIZE_THRESHOLD:
    28         urisize = len(uri.pack_lit("A"*filesize))
     30        urisize = len(uri.LiteralFileURI("A"*filesize).to_string())
    2931        sharesize = 0
    3032        sharespace = 0
    3133    else:
    32         u = upload.FileUploader(None)
     34        u = upload.FileUploader(None) # XXX changed
    3335        u.set_params(params)
    3436        # unfortunately, Encoder doesn't currently lend itself to answering
    3537        # this question without measuring a filesize, so we have to give it a
  • src/allmydata/client.py

    diff --git a/src/allmydata/client.py b/src/allmydata/client.py
    index 12e7473..c914ec4 100644
    a b  
    1 import os, stat, time
     1import os, stat, time, weakref
    22from allmydata.interfaces import RIStorageServer
    33from allmydata import node
    44
    55from zope.interface import implements
    66from twisted.internet import reactor, defer
     7from twisted.application import service
    78from twisted.application.internet import TimerService
    89from foolscap.api import Referenceable
    910from pycryptopp.publickey import rsa
    import allmydata 
    1213from allmydata.storage.server import StorageServer
    1314from allmydata import storage_client
    1415from allmydata.immutable.upload import Uploader
    15 from allmydata.immutable.download import Downloader
    1616from allmydata.immutable.offloaded import Helper
    1717from allmydata.control import ControlServer
    1818from allmydata.introducer.client import IntroducerClient
    19 from allmydata.util import hashutil, base32, pollmixin, cachedir, log
     19from allmydata.util import hashutil, base32, pollmixin, log
    2020from allmydata.util.abbreviate import parse_abbreviated_size
    2121from allmydata.util.time_format import parse_duration, parse_date
    2222from allmydata.stats import StatsProvider
    class KeyGenerator: 
    9494            verifier = signer.get_verifying_key()
    9595            return defer.succeed( (verifier, signer) )
    9696
     97class Terminator(service.Service):
     98    def __init__(self):
     99        self._clients = weakref.WeakKeyDictionary()
     100    def register(self, c):
     101        self._clients[c] = None
     102    def stopService(self):
     103        for c in self._clients:
     104            c.stop()
     105        return service.Service.stopService(self)
     106
    97107
    98108class Client(node.Node, pollmixin.PollMixin):
    99109    implements(IStatsProducer)
    class Client(node.Node, pollmixin.PollMixin): 
    278288
    279289        self.init_client_storage_broker()
    280290        self.history = History(self.stats_provider)
     291        self.terminator = Terminator()
     292        self.terminator.setServiceParent(self)
    281293        self.add_service(Uploader(helper_furl, self.stats_provider))
    282         download_cachedir = os.path.join(self.basedir,
    283                                          "private", "cache", "download")
    284         self.download_cache_dirman = cachedir.CacheDirectoryManager(download_cachedir)
    285         self.download_cache_dirman.setServiceParent(self)
    286         self.downloader = Downloader(self.storage_broker, self.stats_provider)
    287294        self.init_stub_client()
    288295        self.init_nodemaker()
    289296
    class Client(node.Node, pollmixin.PollMixin): 
    342349                                   self._secret_holder,
    343350                                   self.get_history(),
    344351                                   self.getServiceNamed("uploader"),
    345                                    self.downloader,
    346                                    self.download_cache_dirman,
     352                                   self.terminator,
    347353                                   self.get_encoding_parameters(),
    348354                                   self._key_generator)
    349355
  • src/allmydata/immutable/checker.py

    diff --git a/src/allmydata/immutable/checker.py b/src/allmydata/immutable/checker.py
    index 2f2d8f1..cd5c556 100644
    a b  
     1from zope.interface import implements
     2from twisted.internet import defer
    13from foolscap.api import DeadReferenceError, RemoteException
     4from allmydata import hashtree, codec, uri
     5from allmydata.interfaces import IValidatedThingProxy, IVerifierURI
    26from allmydata.hashtree import IncompleteHashTree
    37from allmydata.check_results import CheckResults
    4 from allmydata.immutable import download
    58from allmydata.uri import CHKFileVerifierURI
    69from allmydata.util.assertutil import precondition
    7 from allmydata.util import base32, idlib, deferredutil, dictutil, log
     10from allmydata.util import base32, idlib, deferredutil, dictutil, log, mathutil
    811from allmydata.util.hashutil import file_renewal_secret_hash, \
    912     file_cancel_secret_hash, bucket_renewal_secret_hash, \
    10      bucket_cancel_secret_hash
     13     bucket_cancel_secret_hash, uri_extension_hash, CRYPTO_VAL_SIZE, \
     14     block_hash
    1115
    1216from allmydata.immutable import layout
    1317
     18class IntegrityCheckReject(Exception):
     19    pass
     20class BadURIExtension(IntegrityCheckReject):
     21    pass
     22class BadURIExtensionHashValue(IntegrityCheckReject):
     23    pass
     24class BadOrMissingHash(IntegrityCheckReject):
     25    pass
     26class UnsupportedErasureCodec(BadURIExtension):
     27    pass
     28
     29class ValidatedExtendedURIProxy:
     30    implements(IValidatedThingProxy)
     31    """ I am a front-end for a remote UEB (using a local ReadBucketProxy),
     32    responsible for retrieving and validating the elements from the UEB."""
     33
     34    def __init__(self, readbucketproxy, verifycap, fetch_failures=None):
     35        # fetch_failures is for debugging -- see test_encode.py
     36        self._fetch_failures = fetch_failures
     37        self._readbucketproxy = readbucketproxy
     38        precondition(IVerifierURI.providedBy(verifycap), verifycap)
     39        self._verifycap = verifycap
     40
     41        # required
     42        self.segment_size = None
     43        self.crypttext_root_hash = None
     44        self.share_root_hash = None
     45
     46        # computed
     47        self.block_size = None
     48        self.share_size = None
     49        self.num_segments = None
     50        self.tail_data_size = None
     51        self.tail_segment_size = None
     52
     53        # optional
     54        self.crypttext_hash = None
     55
     56    def __str__(self):
     57        return "<%s %s>" % (self.__class__.__name__, self._verifycap.to_string())
     58
     59    def _check_integrity(self, data):
     60        h = uri_extension_hash(data)
     61        if h != self._verifycap.uri_extension_hash:
     62            msg = ("The copy of uri_extension we received from %s was bad: wanted %s, got %s" %
     63                   (self._readbucketproxy,
     64                    base32.b2a(self._verifycap.uri_extension_hash),
     65                    base32.b2a(h)))
     66            if self._fetch_failures is not None:
     67                self._fetch_failures["uri_extension"] += 1
     68            raise BadURIExtensionHashValue(msg)
     69        else:
     70            return data
     71
     72    def _parse_and_validate(self, data):
     73        self.share_size = mathutil.div_ceil(self._verifycap.size,
     74                                            self._verifycap.needed_shares)
     75
     76        d = uri.unpack_extension(data)
     77
     78        # There are several kinds of things that can be found in a UEB.
     79        # First, things that we really need to learn from the UEB in order to
     80        # do this download. Next: things which are optional but not redundant
     81        # -- if they are present in the UEB they will get used. Next, things
     82        # that are optional and redundant. These things are required to be
     83        # consistent: they don't have to be in the UEB, but if they are in
     84        # the UEB then they will be checked for consistency with the
     85        # already-known facts, and if they are inconsistent then an exception
     86        # will be raised. These things aren't actually used -- they are just
     87        # tested for consistency and ignored. Finally: things which are
     88        # deprecated -- they ought not be in the UEB at all, and if they are
     89        # present then a warning will be logged but they are otherwise
     90        # ignored.
     91
     92        # First, things that we really need to learn from the UEB:
     93        # segment_size, crypttext_root_hash, and share_root_hash.
     94        self.segment_size = d['segment_size']
     95
     96        self.block_size = mathutil.div_ceil(self.segment_size,
     97                                            self._verifycap.needed_shares)
     98        self.num_segments = mathutil.div_ceil(self._verifycap.size,
     99                                              self.segment_size)
     100
     101        self.tail_data_size = self._verifycap.size % self.segment_size
     102        if not self.tail_data_size:
     103            self.tail_data_size = self.segment_size
     104        # padding for erasure code
     105        self.tail_segment_size = mathutil.next_multiple(self.tail_data_size,
     106                                                        self._verifycap.needed_shares)
     107
     108        # Ciphertext hash tree root is mandatory, so that there is at most
     109        # one ciphertext that matches this read-cap or verify-cap. The
     110        # integrity check on the shares is not sufficient to prevent the
     111        # original encoder from creating some shares of file A and other
     112        # shares of file B.
     113        self.crypttext_root_hash = d['crypttext_root_hash']
     114
     115        self.share_root_hash = d['share_root_hash']
     116
     117
     118        # Next: things that are optional and not redundant: crypttext_hash
     119        if d.has_key('crypttext_hash'):
     120            self.crypttext_hash = d['crypttext_hash']
     121            if len(self.crypttext_hash) != CRYPTO_VAL_SIZE:
     122                raise BadURIExtension('crypttext_hash is required to be hashutil.CRYPTO_VAL_SIZE bytes, not %s bytes' % (len(self.crypttext_hash),))
     123
     124
     125        # Next: things that are optional, redundant, and required to be
     126        # consistent: codec_name, codec_params, tail_codec_params,
     127        # num_segments, size, needed_shares, total_shares
     128        if d.has_key('codec_name'):
     129            if d['codec_name'] != "crs":
     130                raise UnsupportedErasureCodec(d['codec_name'])
     131
     132        if d.has_key('codec_params'):
     133            ucpss, ucpns, ucpts = codec.parse_params(d['codec_params'])
     134            if ucpss != self.segment_size:
     135                raise BadURIExtension("inconsistent erasure code params: "
     136                                      "ucpss: %s != self.segment_size: %s" %
     137                                      (ucpss, self.segment_size))
     138            if ucpns != self._verifycap.needed_shares:
     139                raise BadURIExtension("inconsistent erasure code params: ucpns: %s != "
     140                                      "self._verifycap.needed_shares: %s" %
     141                                      (ucpns, self._verifycap.needed_shares))
     142            if ucpts != self._verifycap.total_shares:
     143                raise BadURIExtension("inconsistent erasure code params: ucpts: %s != "
     144                                      "self._verifycap.total_shares: %s" %
     145                                      (ucpts, self._verifycap.total_shares))
     146
     147        if d.has_key('tail_codec_params'):
     148            utcpss, utcpns, utcpts = codec.parse_params(d['tail_codec_params'])
     149            if utcpss != self.tail_segment_size:
     150                raise BadURIExtension("inconsistent erasure code params: utcpss: %s != "
     151                                      "self.tail_segment_size: %s, self._verifycap.size: %s, "
     152                                      "self.segment_size: %s, self._verifycap.needed_shares: %s"
     153                                      % (utcpss, self.tail_segment_size, self._verifycap.size,
     154                                         self.segment_size, self._verifycap.needed_shares))
     155            if utcpns != self._verifycap.needed_shares:
     156                raise BadURIExtension("inconsistent erasure code params: utcpns: %s != "
     157                                      "self._verifycap.needed_shares: %s" % (utcpns,
     158                                                                             self._verifycap.needed_shares))
     159            if utcpts != self._verifycap.total_shares:
     160                raise BadURIExtension("inconsistent erasure code params: utcpts: %s != "
     161                                      "self._verifycap.total_shares: %s" % (utcpts,
     162                                                                            self._verifycap.total_shares))
     163
     164        if d.has_key('num_segments'):
     165            if d['num_segments'] != self.num_segments:
     166                raise BadURIExtension("inconsistent num_segments: size: %s, "
     167                                      "segment_size: %s, computed_num_segments: %s, "
     168                                      "ueb_num_segments: %s" % (self._verifycap.size,
     169                                                                self.segment_size,
     170                                                                self.num_segments, d['num_segments']))
     171
     172        if d.has_key('size'):
     173            if d['size'] != self._verifycap.size:
     174                raise BadURIExtension("inconsistent size: URI size: %s, UEB size: %s" %
     175                                      (self._verifycap.size, d['size']))
     176
     177        if d.has_key('needed_shares'):
     178            if d['needed_shares'] != self._verifycap.needed_shares:
     179                raise BadURIExtension("inconsistent needed shares: URI needed shares: %s, UEB "
     180                                      "needed shares: %s" % (self._verifycap.total_shares,
     181                                                             d['needed_shares']))
     182
     183        if d.has_key('total_shares'):
     184            if d['total_shares'] != self._verifycap.total_shares:
     185                raise BadURIExtension("inconsistent total shares: URI total shares: %s, UEB "
     186                                      "total shares: %s" % (self._verifycap.total_shares,
     187                                                            d['total_shares']))
     188
     189        # Finally, things that are deprecated and ignored: plaintext_hash,
     190        # plaintext_root_hash
     191        if d.get('plaintext_hash'):
     192            log.msg("Found plaintext_hash in UEB. This field is deprecated for security reasons "
     193                    "and is no longer used.  Ignoring.  %s" % (self,))
     194        if d.get('plaintext_root_hash'):
     195            log.msg("Found plaintext_root_hash in UEB. This field is deprecated for security "
     196                    "reasons and is no longer used.  Ignoring.  %s" % (self,))
     197
     198        return self
     199
     200    def start(self):
     201        """Fetch the UEB from bucket, compare its hash to the hash from
     202        verifycap, then parse it. Returns a deferred which is called back
     203        with self once the fetch is successful, or is erred back if it
     204        fails."""
     205        d = self._readbucketproxy.get_uri_extension()
     206        d.addCallback(self._check_integrity)
     207        d.addCallback(self._parse_and_validate)
     208        return d
     209
     210class ValidatedReadBucketProxy(log.PrefixingLogMixin):
     211    """I am a front-end for a remote storage bucket, responsible for
     212    retrieving and validating data from that bucket.
     213
     214    My get_block() method is used by BlockDownloaders.
     215    """
     216
     217    def __init__(self, sharenum, bucket, share_hash_tree, num_blocks,
     218                 block_size, share_size):
     219        """ share_hash_tree is required to have already been initialized with
     220        the root hash (the number-0 hash), using the share_root_hash from the
     221        UEB"""
     222        precondition(share_hash_tree[0] is not None, share_hash_tree)
     223        prefix = "%d-%s-%s" % (sharenum, bucket,
     224                               base32.b2a_l(share_hash_tree[0][:8], 60))
     225        log.PrefixingLogMixin.__init__(self,
     226                                       facility="tahoe.immutable.download",
     227                                       prefix=prefix)
     228        self.sharenum = sharenum
     229        self.bucket = bucket
     230        self.share_hash_tree = share_hash_tree
     231        self.num_blocks = num_blocks
     232        self.block_size = block_size
     233        self.share_size = share_size
     234        self.block_hash_tree = hashtree.IncompleteHashTree(self.num_blocks)
     235
     236    def get_all_sharehashes(self):
     237        """Retrieve and validate all the share-hash-tree nodes that are
     238        included in this share, regardless of whether we need them to
     239        validate the share or not. Each share contains a minimal Merkle tree
     240        chain, but there is lots of overlap, so usually we'll be using hashes
     241        from other shares and not reading every single hash from this share.
     242        The Verifier uses this function to read and validate every single
     243        hash from this share.
     244
     245        Call this (and wait for the Deferred it returns to fire) before
     246        calling get_block() for the first time: this lets us check that the
     247        share share contains enough hashes to validate its own data, and
     248        avoids downloading any share hash twice.
     249
     250        I return a Deferred which errbacks upon failure, probably with
     251        BadOrMissingHash."""
     252
     253        d = self.bucket.get_share_hashes()
     254        def _got_share_hashes(sh):
     255            sharehashes = dict(sh)
     256            try:
     257                self.share_hash_tree.set_hashes(sharehashes)
     258            except IndexError, le:
     259                raise BadOrMissingHash(le)
     260            except (hashtree.BadHashError, hashtree.NotEnoughHashesError), le:
     261                raise BadOrMissingHash(le)
     262        d.addCallback(_got_share_hashes)
     263        return d
     264
     265    def get_all_blockhashes(self):
     266        """Retrieve and validate all the block-hash-tree nodes that are
     267        included in this share. Each share contains a full Merkle tree, but
     268        we usually only fetch the minimal subset necessary for any particular
     269        block. This function fetches everything at once. The Verifier uses
     270        this function to validate the block hash tree.
     271
     272        Call this (and wait for the Deferred it returns to fire) after
     273        calling get_all_sharehashes() and before calling get_block() for the
     274        first time: this lets us check that the share contains all block
     275        hashes and avoids downloading them multiple times.
     276
     277        I return a Deferred which errbacks upon failure, probably with
     278        BadOrMissingHash.
     279        """
     280
     281        # get_block_hashes(anything) currently always returns everything
     282        needed = list(range(len(self.block_hash_tree)))
     283        d = self.bucket.get_block_hashes(needed)
     284        def _got_block_hashes(blockhashes):
     285            if len(blockhashes) < len(self.block_hash_tree):
     286                raise BadOrMissingHash()
     287            bh = dict(enumerate(blockhashes))
     288
     289            try:
     290                self.block_hash_tree.set_hashes(bh)
     291            except IndexError, le:
     292                raise BadOrMissingHash(le)
     293            except (hashtree.BadHashError, hashtree.NotEnoughHashesError), le:
     294                raise BadOrMissingHash(le)
     295        d.addCallback(_got_block_hashes)
     296        return d
     297
     298    def get_all_crypttext_hashes(self, crypttext_hash_tree):
     299        """Retrieve and validate all the crypttext-hash-tree nodes that are
     300        in this share. Normally we don't look at these at all: the download
     301        process fetches them incrementally as needed to validate each segment
     302        of ciphertext. But this is a convenient place to give the Verifier a
     303        function to validate all of these at once.
     304
     305        Call this with a new hashtree object for each share, initialized with
     306        the crypttext hash tree root. I return a Deferred which errbacks upon
     307        failure, probably with BadOrMissingHash.
     308        """
     309
     310        # get_crypttext_hashes() always returns everything
     311        d = self.bucket.get_crypttext_hashes()
     312        def _got_crypttext_hashes(hashes):
     313            if len(hashes) < len(crypttext_hash_tree):
     314                raise BadOrMissingHash()
     315            ct_hashes = dict(enumerate(hashes))
     316            try:
     317                crypttext_hash_tree.set_hashes(ct_hashes)
     318            except IndexError, le:
     319                raise BadOrMissingHash(le)
     320            except (hashtree.BadHashError, hashtree.NotEnoughHashesError), le:
     321                raise BadOrMissingHash(le)
     322        d.addCallback(_got_crypttext_hashes)
     323        return d
     324
     325    def get_block(self, blocknum):
     326        # the first time we use this bucket, we need to fetch enough elements
     327        # of the share hash tree to validate it from our share hash up to the
     328        # hashroot.
     329        if self.share_hash_tree.needed_hashes(self.sharenum):
     330            d1 = self.bucket.get_share_hashes()
     331        else:
     332            d1 = defer.succeed([])
     333
     334        # We might need to grab some elements of our block hash tree, to
     335        # validate the requested block up to the share hash.
     336        blockhashesneeded = self.block_hash_tree.needed_hashes(blocknum, include_leaf=True)
     337        # We don't need the root of the block hash tree, as that comes in the
     338        # share tree.
     339        blockhashesneeded.discard(0)
     340        d2 = self.bucket.get_block_hashes(blockhashesneeded)
     341
     342        if blocknum < self.num_blocks-1:
     343            thisblocksize = self.block_size
     344        else:
     345            thisblocksize = self.share_size % self.block_size
     346            if thisblocksize == 0:
     347                thisblocksize = self.block_size
     348        d3 = self.bucket.get_block_data(blocknum,
     349                                        self.block_size, thisblocksize)
     350
     351        dl = deferredutil.gatherResults([d1, d2, d3])
     352        dl.addCallback(self._got_data, blocknum)
     353        return dl
     354
     355    def _got_data(self, results, blocknum):
     356        precondition(blocknum < self.num_blocks,
     357                     self, blocknum, self.num_blocks)
     358        sharehashes, blockhashes, blockdata = results
     359        try:
     360            sharehashes = dict(sharehashes)
     361        except ValueError, le:
     362            le.args = tuple(le.args + (sharehashes,))
     363            raise
     364        blockhashes = dict(enumerate(blockhashes))
     365
     366        candidate_share_hash = None # in case we log it in the except block below
     367        blockhash = None # in case we log it in the except block below
     368
     369        try:
     370            if self.share_hash_tree.needed_hashes(self.sharenum):
     371                # This will raise exception if the values being passed do not
     372                # match the root node of self.share_hash_tree.
     373                try:
     374                    self.share_hash_tree.set_hashes(sharehashes)
     375                except IndexError, le:
     376                    # Weird -- sharehashes contained index numbers outside of
     377                    # the range that fit into this hash tree.
     378                    raise BadOrMissingHash(le)
     379
     380            # To validate a block we need the root of the block hash tree,
     381            # which is also one of the leafs of the share hash tree, and is
     382            # called "the share hash".
     383            if not self.block_hash_tree[0]: # empty -- no root node yet
     384                # Get the share hash from the share hash tree.
     385                share_hash = self.share_hash_tree.get_leaf(self.sharenum)
     386                if not share_hash:
     387                    # No root node in block_hash_tree and also the share hash
     388                    # wasn't sent by the server.
     389                    raise hashtree.NotEnoughHashesError
     390                self.block_hash_tree.set_hashes({0: share_hash})
     391
     392            if self.block_hash_tree.needed_hashes(blocknum):
     393                self.block_hash_tree.set_hashes(blockhashes)
     394
     395            blockhash = block_hash(blockdata)
     396            self.block_hash_tree.set_hashes(leaves={blocknum: blockhash})
     397            #self.log("checking block_hash(shareid=%d, blocknum=%d) len=%d "
     398            #        "%r .. %r: %s" %
     399            #        (self.sharenum, blocknum, len(blockdata),
     400            #         blockdata[:50], blockdata[-50:], base32.b2a(blockhash)))
     401
     402        except (hashtree.BadHashError, hashtree.NotEnoughHashesError), le:
     403            # log.WEIRD: indicates undetected disk/network error, or more
     404            # likely a programming error
     405            self.log("hash failure in block=%d, shnum=%d on %s" %
     406                    (blocknum, self.sharenum, self.bucket))
     407            if self.block_hash_tree.needed_hashes(blocknum):
     408                self.log(""" failure occurred when checking the block_hash_tree.
     409                This suggests that either the block data was bad, or that the
     410                block hashes we received along with it were bad.""")
     411            else:
     412                self.log(""" the failure probably occurred when checking the
     413                share_hash_tree, which suggests that the share hashes we
     414                received from the remote peer were bad.""")
     415            self.log(" have candidate_share_hash: %s" % bool(candidate_share_hash))
     416            self.log(" block length: %d" % len(blockdata))
     417            self.log(" block hash: %s" % base32.b2a_or_none(blockhash))
     418            if len(blockdata) < 100:
     419                self.log(" block data: %r" % (blockdata,))
     420            else:
     421                self.log(" block data start/end: %r .. %r" %
     422                        (blockdata[:50], blockdata[-50:]))
     423            self.log(" share hash tree:\n" + self.share_hash_tree.dump())
     424            self.log(" block hash tree:\n" + self.block_hash_tree.dump())
     425            lines = []
     426            for i,h in sorted(sharehashes.items()):
     427                lines.append("%3d: %s" % (i, base32.b2a_or_none(h)))
     428            self.log(" sharehashes:\n" + "\n".join(lines) + "\n")
     429            lines = []
     430            for i,h in blockhashes.items():
     431                lines.append("%3d: %s" % (i, base32.b2a_or_none(h)))
     432            log.msg(" blockhashes:\n" + "\n".join(lines) + "\n")
     433            raise BadOrMissingHash(le)
     434
     435        # If we made it here, the block is good. If the hash trees didn't
     436        # like what they saw, they would have raised a BadHashError, causing
     437        # our caller to see a Failure and thus ignore this block (as well as
     438        # dropping this bucket).
     439        return blockdata
     440
     441
    14442class Checker(log.PrefixingLogMixin):
    15443    """I query all servers to see if M uniquely-numbered shares are
    16444    available.
    class Checker(log.PrefixingLogMixin): 
    85513            level = log.WEIRD
    86514            if f.check(DeadReferenceError):
    87515                level = log.UNUSUAL
    88             self.log("failure from server on 'get_buckets' the REMOTE failure was:", facility="tahoe.immutable.checker", failure=f, level=level, umid="3uuBUQ")
     516            self.log("failure from server on 'get_buckets' the REMOTE failure was:",
     517                     facility="tahoe.immutable.checker",
     518                     failure=f, level=level, umid="AX7wZQ")
    89519            return ({}, serverid, False)
    90520
    91521        d.addCallbacks(_wrap_results, _trap_errs)
    class Checker(log.PrefixingLogMixin): 
    146576
    147577        vcap = self._verifycap
    148578        b = layout.ReadBucketProxy(bucket, serverid, vcap.get_storage_index())
    149         veup = download.ValidatedExtendedURIProxy(b, vcap)
     579        veup = ValidatedExtendedURIProxy(b, vcap)
    150580        d = veup.start()
    151581
    152582        def _got_ueb(vup):
    153583            share_hash_tree = IncompleteHashTree(vcap.total_shares)
    154584            share_hash_tree.set_hashes({0: vup.share_root_hash})
    155585
    156             vrbp = download.ValidatedReadBucketProxy(sharenum, b,
    157                                                      share_hash_tree,
    158                                                      vup.num_segments,
    159                                                      vup.block_size,
    160                                                      vup.share_size)
     586            vrbp = ValidatedReadBucketProxy(sharenum, b,
     587                                            share_hash_tree,
     588                                            vup.num_segments,
     589                                            vup.block_size,
     590                                            vup.share_size)
    161591
    162592            # note: normal download doesn't use get_all_sharehashes(),
    163593            # because it gets more data than necessary. We've discussed the
    class Checker(log.PrefixingLogMixin): 
    216646                return (False, sharenum, 'incompatible')
    217647            elif f.check(layout.LayoutInvalid,
    218648                         layout.RidiculouslyLargeURIExtensionBlock,
    219                          download.BadOrMissingHash,
    220                          download.BadURIExtensionHashValue):
     649                         BadOrMissingHash,
     650                         BadURIExtensionHashValue):
    221651                return (False, sharenum, 'corrupt')
    222652
    223653            # if it wasn't one of those reasons, re-raise the error
  • deleted file src/allmydata/immutable/download.py

    diff --git a/src/allmydata/immutable/download.py b/src/allmydata/immutable/download.py
    deleted file mode 100644
    index eb02c6a..0000000
    + -  
    1 import random, weakref, itertools, time
    2 from zope.interface import implements
    3 from twisted.internet import defer, reactor
    4 from twisted.internet.interfaces import IPushProducer, IConsumer
    5 from foolscap.api import DeadReferenceError, RemoteException, eventually
    6 
    7 from allmydata.util import base32, deferredutil, hashutil, log, mathutil, idlib
    8 from allmydata.util.assertutil import _assert, precondition
    9 from allmydata import codec, hashtree, uri
    10 from allmydata.interfaces import IDownloadTarget, IDownloader, IVerifierURI, \
    11      IDownloadStatus, IDownloadResults, IValidatedThingProxy, \
    12      IStorageBroker, NotEnoughSharesError, NoSharesError, NoServersError, \
    13      UnableToFetchCriticalDownloadDataError
    14 from allmydata.immutable import layout
    15 from allmydata.monitor import Monitor
    16 from pycryptopp.cipher.aes import AES
    17 
    18 class IntegrityCheckReject(Exception):
    19     pass
    20 
    21 class BadURIExtensionHashValue(IntegrityCheckReject):
    22     pass
    23 class BadURIExtension(IntegrityCheckReject):
    24     pass
    25 class UnsupportedErasureCodec(BadURIExtension):
    26     pass
    27 class BadCrypttextHashValue(IntegrityCheckReject):
    28     pass
    29 class BadOrMissingHash(IntegrityCheckReject):
    30     pass
    31 
    32 class DownloadStopped(Exception):
    33     pass
    34 
    35 class DownloadResults:
    36     implements(IDownloadResults)
    37 
    38     def __init__(self):
    39         self.servers_used = set()
    40         self.server_problems = {}
    41         self.servermap = {}
    42         self.timings = {}
    43         self.file_size = None
    44 
    45 class DecryptingTarget(log.PrefixingLogMixin):
    46     implements(IDownloadTarget, IConsumer)
    47     def __init__(self, target, key, _log_msg_id=None):
    48         precondition(IDownloadTarget.providedBy(target), target)
    49         self.target = target
    50         self._decryptor = AES(key)
    51         prefix = str(target)
    52         log.PrefixingLogMixin.__init__(self, "allmydata.immutable.download", _log_msg_id, prefix=prefix)
    53     # methods to satisfy the IConsumer interface
    54     def registerProducer(self, producer, streaming):
    55         if IConsumer.providedBy(self.target):
    56             self.target.registerProducer(producer, streaming)
    57     def unregisterProducer(self):
    58         if IConsumer.providedBy(self.target):
    59             self.target.unregisterProducer()
    60     def write(self, ciphertext):
    61         plaintext = self._decryptor.process(ciphertext)
    62         self.target.write(plaintext)
    63     def open(self, size):
    64         self.target.open(size)
    65     def close(self):
    66         self.target.close()
    67     def finish(self):
    68         return self.target.finish()
    69     # The following methods is just to pass through to the next target, and
    70     # just because that target might be a repairer.DownUpConnector, and just
    71     # because the current CHKUpload object expects to find the storage index
    72     # in its Uploadable.
    73     def set_storageindex(self, storageindex):
    74         self.target.set_storageindex(storageindex)
    75     def set_encodingparams(self, encodingparams):
    76         self.target.set_encodingparams(encodingparams)
    77 
    78 class ValidatedThingObtainer:
    79     def __init__(self, validatedthingproxies, debugname, log_id):
    80         self._validatedthingproxies = validatedthingproxies
    81         self._debugname = debugname
    82         self._log_id = log_id
    83 
    84     def _bad(self, f, validatedthingproxy):
    85         f.trap(RemoteException, DeadReferenceError,
    86                IntegrityCheckReject, layout.LayoutInvalid,
    87                layout.ShareVersionIncompatible)
    88         level = log.WEIRD
    89         if f.check(DeadReferenceError):
    90             level = log.UNUSUAL
    91         elif f.check(RemoteException):
    92             level = log.WEIRD
    93         else:
    94             level = log.SCARY
    95         log.msg(parent=self._log_id, facility="tahoe.immutable.download",
    96                 format="operation %(op)s from validatedthingproxy %(validatedthingproxy)s failed",
    97                 op=self._debugname, validatedthingproxy=str(validatedthingproxy),
    98                 failure=f, level=level, umid="JGXxBA")
    99         if not self._validatedthingproxies:
    100             raise UnableToFetchCriticalDownloadDataError("ran out of peers, last error was %s" % (f,))
    101         # try again with a different one
    102         d = self._try_the_next_one()
    103         return d
    104 
    105     def _try_the_next_one(self):
    106         vtp = self._validatedthingproxies.pop(0)
    107         # start() obtains, validates, and callsback-with the thing or else
    108         # errbacks
    109         d = vtp.start()
    110         d.addErrback(self._bad, vtp)
    111         return d
    112 
    113     def start(self):
    114         return self._try_the_next_one()
    115 
    116 class ValidatedCrypttextHashTreeProxy:
    117     implements(IValidatedThingProxy)
    118     """ I am a front-end for a remote crypttext hash tree using a local
    119     ReadBucketProxy -- I use its get_crypttext_hashes() method and offer the
    120     Validated Thing protocol (i.e., I have a start() method that fires with
    121     self once I get a valid one)."""
    122     def __init__(self, readbucketproxy, crypttext_hash_tree, num_segments,
    123                  fetch_failures=None):
    124         # fetch_failures is for debugging -- see test_encode.py
    125         self._readbucketproxy = readbucketproxy
    126         self._num_segments = num_segments
    127         self._fetch_failures = fetch_failures
    128         self._crypttext_hash_tree = crypttext_hash_tree
    129 
    130     def _validate(self, proposal):
    131         ct_hashes = dict(list(enumerate(proposal)))
    132         try:
    133             self._crypttext_hash_tree.set_hashes(ct_hashes)
    134         except (hashtree.BadHashError, hashtree.NotEnoughHashesError), le:
    135             if self._fetch_failures is not None:
    136                 self._fetch_failures["crypttext_hash_tree"] += 1
    137             raise BadOrMissingHash(le)
    138         # If we now have enough of the crypttext hash tree to integrity-check
    139         # *any* segment of ciphertext, then we are done. TODO: It would have
    140         # better alacrity if we downloaded only part of the crypttext hash
    141         # tree at a time.
    142         for segnum in range(self._num_segments):
    143             if self._crypttext_hash_tree.needed_hashes(segnum):
    144                 raise BadOrMissingHash("not enough hashes to validate segment number %d" % (segnum,))
    145         return self
    146 
    147     def start(self):
    148         d = self._readbucketproxy.get_crypttext_hashes()
    149         d.addCallback(self._validate)
    150         return d
    151 
    152 class ValidatedExtendedURIProxy:
    153     implements(IValidatedThingProxy)
    154     """ I am a front-end for a remote UEB (using a local ReadBucketProxy),
    155     responsible for retrieving and validating the elements from the UEB."""
    156 
    157     def __init__(self, readbucketproxy, verifycap, fetch_failures=None):
    158         # fetch_failures is for debugging -- see test_encode.py
    159         self._fetch_failures = fetch_failures
    160         self._readbucketproxy = readbucketproxy
    161         precondition(IVerifierURI.providedBy(verifycap), verifycap)
    162         self._verifycap = verifycap
    163 
    164         # required
    165         self.segment_size = None
    166         self.crypttext_root_hash = None
    167         self.share_root_hash = None
    168 
    169         # computed
    170         self.block_size = None
    171         self.share_size = None
    172         self.num_segments = None
    173         self.tail_data_size = None
    174         self.tail_segment_size = None
    175 
    176         # optional
    177         self.crypttext_hash = None
    178 
    179     def __str__(self):
    180         return "<%s %s>" % (self.__class__.__name__, self._verifycap.to_string())
    181 
    182     def _check_integrity(self, data):
    183         h = hashutil.uri_extension_hash(data)
    184         if h != self._verifycap.uri_extension_hash:
    185             msg = ("The copy of uri_extension we received from %s was bad: wanted %s, got %s" %
    186                    (self._readbucketproxy,
    187                     base32.b2a(self._verifycap.uri_extension_hash),
    188                     base32.b2a(h)))
    189             if self._fetch_failures is not None:
    190                 self._fetch_failures["uri_extension"] += 1
    191             raise BadURIExtensionHashValue(msg)
    192         else:
    193             return data
    194 
    195     def _parse_and_validate(self, data):
    196         self.share_size = mathutil.div_ceil(self._verifycap.size,
    197                                             self._verifycap.needed_shares)
    198 
    199         d = uri.unpack_extension(data)
    200 
    201         # There are several kinds of things that can be found in a UEB.
    202         # First, things that we really need to learn from the UEB in order to
    203         # do this download. Next: things which are optional but not redundant
    204         # -- if they are present in the UEB they will get used. Next, things
    205         # that are optional and redundant. These things are required to be
    206         # consistent: they don't have to be in the UEB, but if they are in
    207         # the UEB then they will be checked for consistency with the
    208         # already-known facts, and if they are inconsistent then an exception
    209         # will be raised. These things aren't actually used -- they are just
    210         # tested for consistency and ignored. Finally: things which are
    211         # deprecated -- they ought not be in the UEB at all, and if they are
    212         # present then a warning will be logged but they are otherwise
    213         # ignored.
    214 
    215         # First, things that we really need to learn from the UEB:
    216         # segment_size, crypttext_root_hash, and share_root_hash.
    217         self.segment_size = d['segment_size']
    218 
    219         self.block_size = mathutil.div_ceil(self.segment_size,
    220                                             self._verifycap.needed_shares)
    221         self.num_segments = mathutil.div_ceil(self._verifycap.size,
    222                                               self.segment_size)
    223 
    224         self.tail_data_size = self._verifycap.size % self.segment_size
    225         if not self.tail_data_size:
    226             self.tail_data_size = self.segment_size
    227         # padding for erasure code
    228         self.tail_segment_size = mathutil.next_multiple(self.tail_data_size,
    229                                                         self._verifycap.needed_shares)
    230 
    231         # Ciphertext hash tree root is mandatory, so that there is at most
    232         # one ciphertext that matches this read-cap or verify-cap. The
    233         # integrity check on the shares is not sufficient to prevent the
    234         # original encoder from creating some shares of file A and other
    235         # shares of file B.
    236         self.crypttext_root_hash = d['crypttext_root_hash']
    237 
    238         self.share_root_hash = d['share_root_hash']
    239 
    240 
    241         # Next: things that are optional and not redundant: crypttext_hash
    242         if d.has_key('crypttext_hash'):
    243             self.crypttext_hash = d['crypttext_hash']
    244             if len(self.crypttext_hash) != hashutil.CRYPTO_VAL_SIZE:
    245                 raise BadURIExtension('crypttext_hash is required to be hashutil.CRYPTO_VAL_SIZE bytes, not %s bytes' % (len(self.crypttext_hash),))
    246 
    247 
    248         # Next: things that are optional, redundant, and required to be
    249         # consistent: codec_name, codec_params, tail_codec_params,
    250         # num_segments, size, needed_shares, total_shares
    251         if d.has_key('codec_name'):
    252             if d['codec_name'] != "crs":
    253                 raise UnsupportedErasureCodec(d['codec_name'])
    254 
    255         if d.has_key('codec_params'):
    256             ucpss, ucpns, ucpts = codec.parse_params(d['codec_params'])
    257             if ucpss != self.segment_size:
    258                 raise BadURIExtension("inconsistent erasure code params: "
    259                                       "ucpss: %s != self.segment_size: %s" %
    260                                       (ucpss, self.segment_size))
    261             if ucpns != self._verifycap.needed_shares:
    262                 raise BadURIExtension("inconsistent erasure code params: ucpns: %s != "
    263                                       "self._verifycap.needed_shares: %s" %
    264                                       (ucpns, self._verifycap.needed_shares))
    265             if ucpts != self._verifycap.total_shares:
    266                 raise BadURIExtension("inconsistent erasure code params: ucpts: %s != "
    267                                       "self._verifycap.total_shares: %s" %
    268                                       (ucpts, self._verifycap.total_shares))
    269 
    270         if d.has_key('tail_codec_params'):
    271             utcpss, utcpns, utcpts = codec.parse_params(d['tail_codec_params'])
    272             if utcpss != self.tail_segment_size:
    273                 raise BadURIExtension("inconsistent erasure code params: utcpss: %s != "
    274                                       "self.tail_segment_size: %s, self._verifycap.size: %s, "
    275                                       "self.segment_size: %s, self._verifycap.needed_shares: %s"
    276                                       % (utcpss, self.tail_segment_size, self._verifycap.size,
    277                                          self.segment_size, self._verifycap.needed_shares))
    278             if utcpns != self._verifycap.needed_shares:
    279                 raise BadURIExtension("inconsistent erasure code params: utcpns: %s != "
    280                                       "self._verifycap.needed_shares: %s" % (utcpns,
    281                                                                              self._verifycap.needed_shares))
    282             if utcpts != self._verifycap.total_shares:
    283                 raise BadURIExtension("inconsistent erasure code params: utcpts: %s != "
    284                                       "self._verifycap.total_shares: %s" % (utcpts,
    285                                                                             self._verifycap.total_shares))
    286 
    287         if d.has_key('num_segments'):
    288             if d['num_segments'] != self.num_segments:
    289                 raise BadURIExtension("inconsistent num_segments: size: %s, "
    290                                       "segment_size: %s, computed_num_segments: %s, "
    291                                       "ueb_num_segments: %s" % (self._verifycap.size,
    292                                                                 self.segment_size,
    293                                                                 self.num_segments, d['num_segments']))
    294 
    295         if d.has_key('size'):
    296             if d['size'] != self._verifycap.size:
    297                 raise BadURIExtension("inconsistent size: URI size: %s, UEB size: %s" %
    298                                       (self._verifycap.size, d['size']))
    299 
    300         if d.has_key('needed_shares'):
    301             if d['needed_shares'] != self._verifycap.needed_shares:
    302                 raise BadURIExtension("inconsistent needed shares: URI needed shares: %s, UEB "
    303                                       "needed shares: %s" % (self._verifycap.total_shares,
    304                                                              d['needed_shares']))
    305 
    306         if d.has_key('total_shares'):
    307             if d['total_shares'] != self._verifycap.total_shares:
    308                 raise BadURIExtension("inconsistent total shares: URI total shares: %s, UEB "
    309                                       "total shares: %s" % (self._verifycap.total_shares,
    310                                                             d['total_shares']))
    311 
    312         # Finally, things that are deprecated and ignored: plaintext_hash,
    313         # plaintext_root_hash
    314         if d.get('plaintext_hash'):
    315             log.msg("Found plaintext_hash in UEB. This field is deprecated for security reasons "
    316                     "and is no longer used.  Ignoring.  %s" % (self,))
    317         if d.get('plaintext_root_hash'):
    318             log.msg("Found plaintext_root_hash in UEB. This field is deprecated for security "
    319                     "reasons and is no longer used.  Ignoring.  %s" % (self,))
    320 
    321         return self
    322 
    323     def start(self):
    324         """Fetch the UEB from bucket, compare its hash to the hash from
    325         verifycap, then parse it. Returns a deferred which is called back
    326         with self once the fetch is successful, or is erred back if it
    327         fails."""
    328         d = self._readbucketproxy.get_uri_extension()
    329         d.addCallback(self._check_integrity)
    330         d.addCallback(self._parse_and_validate)
    331         return d
    332 
    333 class ValidatedReadBucketProxy(log.PrefixingLogMixin):
    334     """I am a front-end for a remote storage bucket, responsible for
    335     retrieving and validating data from that bucket.
    336 
    337     My get_block() method is used by BlockDownloaders.
    338     """
    339 
    340     def __init__(self, sharenum, bucket, share_hash_tree, num_blocks,
    341                  block_size, share_size):
    342         """ share_hash_tree is required to have already been initialized with
    343         the root hash (the number-0 hash), using the share_root_hash from the
    344         UEB"""
    345         precondition(share_hash_tree[0] is not None, share_hash_tree)
    346         prefix = "%d-%s-%s" % (sharenum, bucket,
    347                                base32.b2a_l(share_hash_tree[0][:8], 60))
    348         log.PrefixingLogMixin.__init__(self,
    349                                        facility="tahoe.immutable.download",
    350                                        prefix=prefix)
    351         self.sharenum = sharenum
    352         self.bucket = bucket
    353         self.share_hash_tree = share_hash_tree
    354         self.num_blocks = num_blocks
    355         self.block_size = block_size
    356         self.share_size = share_size
    357         self.block_hash_tree = hashtree.IncompleteHashTree(self.num_blocks)
    358 
    359     def get_all_sharehashes(self):
    360         """Retrieve and validate all the share-hash-tree nodes that are
    361         included in this share, regardless of whether we need them to
    362         validate the share or not. Each share contains a minimal Merkle tree
    363         chain, but there is lots of overlap, so usually we'll be using hashes
    364         from other shares and not reading every single hash from this share.
    365         The Verifier uses this function to read and validate every single
    366         hash from this share.
    367 
    368         Call this (and wait for the Deferred it returns to fire) before
    369         calling get_block() for the first time: this lets us check that the
    370         share share contains enough hashes to validate its own data, and
    371         avoids downloading any share hash twice.
    372 
    373         I return a Deferred which errbacks upon failure, probably with
    374         BadOrMissingHash."""
    375 
    376         d = self.bucket.get_share_hashes()
    377         def _got_share_hashes(sh):
    378             sharehashes = dict(sh)
    379             try:
    380                 self.share_hash_tree.set_hashes(sharehashes)
    381             except IndexError, le:
    382                 raise BadOrMissingHash(le)
    383             except (hashtree.BadHashError, hashtree.NotEnoughHashesError), le:
    384                 raise BadOrMissingHash(le)
    385         d.addCallback(_got_share_hashes)
    386         return d
    387 
    388     def get_all_blockhashes(self):
    389         """Retrieve and validate all the block-hash-tree nodes that are
    390         included in this share. Each share contains a full Merkle tree, but
    391         we usually only fetch the minimal subset necessary for any particular
    392         block. This function fetches everything at once. The Verifier uses
    393         this function to validate the block hash tree.
    394 
    395         Call this (and wait for the Deferred it returns to fire) after
    396         calling get_all_sharehashes() and before calling get_block() for the
    397         first time: this lets us check that the share contains all block
    398         hashes and avoids downloading them multiple times.
    399 
    400         I return a Deferred which errbacks upon failure, probably with
    401         BadOrMissingHash.
    402         """
    403 
    404         # get_block_hashes(anything) currently always returns everything
    405         needed = list(range(len(self.block_hash_tree)))
    406         d = self.bucket.get_block_hashes(needed)
    407         def _got_block_hashes(blockhashes):
    408             if len(blockhashes) < len(self.block_hash_tree):
    409                 raise BadOrMissingHash()
    410             bh = dict(enumerate(blockhashes))
    411 
    412             try:
    413                 self.block_hash_tree.set_hashes(bh)
    414             except IndexError, le:
    415                 raise BadOrMissingHash(le)
    416             except (hashtree.BadHashError, hashtree.NotEnoughHashesError), le:
    417                 raise BadOrMissingHash(le)
    418         d.addCallback(_got_block_hashes)
    419         return d
    420 
    421     def get_all_crypttext_hashes(self, crypttext_hash_tree):
    422         """Retrieve and validate all the crypttext-hash-tree nodes that are
    423         in this share. Normally we don't look at these at all: the download
    424         process fetches them incrementally as needed to validate each segment
    425         of ciphertext. But this is a convenient place to give the Verifier a
    426         function to validate all of these at once.
    427 
    428         Call this with a new hashtree object for each share, initialized with
    429         the crypttext hash tree root. I return a Deferred which errbacks upon
    430         failure, probably with BadOrMissingHash.
    431         """
    432 
    433         # get_crypttext_hashes() always returns everything
    434         d = self.bucket.get_crypttext_hashes()
    435         def _got_crypttext_hashes(hashes):
    436             if len(hashes) < len(crypttext_hash_tree):
    437                 raise BadOrMissingHash()
    438             ct_hashes = dict(enumerate(hashes))
    439             try:
    440                 crypttext_hash_tree.set_hashes(ct_hashes)
    441             except IndexError, le:
    442                 raise BadOrMissingHash(le)
    443             except (hashtree.BadHashError, hashtree.NotEnoughHashesError), le:
    444                 raise BadOrMissingHash(le)
    445         d.addCallback(_got_crypttext_hashes)
    446         return d
    447 
    448     def get_block(self, blocknum):
    449         # the first time we use this bucket, we need to fetch enough elements
    450         # of the share hash tree to validate it from our share hash up to the
    451         # hashroot.
    452         if self.share_hash_tree.needed_hashes(self.sharenum):
    453             d1 = self.bucket.get_share_hashes()
    454         else:
    455             d1 = defer.succeed([])
    456 
    457         # We might need to grab some elements of our block hash tree, to
    458         # validate the requested block up to the share hash.
    459         blockhashesneeded = self.block_hash_tree.needed_hashes(blocknum, include_leaf=True)
    460         # We don't need the root of the block hash tree, as that comes in the
    461         # share tree.
    462         blockhashesneeded.discard(0)
    463         d2 = self.bucket.get_block_hashes(blockhashesneeded)
    464 
    465         if blocknum < self.num_blocks-1:
    466             thisblocksize = self.block_size
    467         else:
    468             thisblocksize = self.share_size % self.block_size
    469             if thisblocksize == 0:
    470                 thisblocksize = self.block_size
    471         d3 = self.bucket.get_block_data(blocknum,
    472                                         self.block_size, thisblocksize)
    473 
    474         dl = deferredutil.gatherResults([d1, d2, d3])
    475         dl.addCallback(self._got_data, blocknum)
    476         return dl
    477 
    478     def _got_data(self, results, blocknum):
    479         precondition(blocknum < self.num_blocks,
    480                      self, blocknum, self.num_blocks)
    481         sharehashes, blockhashes, blockdata = results
    482         try:
    483             sharehashes = dict(sharehashes)
    484         except ValueError, le:
    485             le.args = tuple(le.args + (sharehashes,))
    486             raise
    487         blockhashes = dict(enumerate(blockhashes))
    488 
    489         candidate_share_hash = None # in case we log it in the except block below
    490         blockhash = None # in case we log it in the except block below
    491 
    492         try:
    493             if self.share_hash_tree.needed_hashes(self.sharenum):
    494                 # This will raise exception if the values being passed do not
    495                 # match the root node of self.share_hash_tree.
    496                 try:
    497                     self.share_hash_tree.set_hashes(sharehashes)
    498                 except IndexError, le:
    499                     # Weird -- sharehashes contained index numbers outside of
    500                     # the range that fit into this hash tree.
    501                     raise BadOrMissingHash(le)
    502 
    503             # To validate a block we need the root of the block hash tree,
    504             # which is also one of the leafs of the share hash tree, and is
    505             # called "the share hash".
    506             if not self.block_hash_tree[0]: # empty -- no root node yet
    507                 # Get the share hash from the share hash tree.
    508                 share_hash = self.share_hash_tree.get_leaf(self.sharenum)
    509                 if not share_hash:
    510                     # No root node in block_hash_tree and also the share hash
    511                     # wasn't sent by the server.
    512                     raise hashtree.NotEnoughHashesError
    513                 self.block_hash_tree.set_hashes({0: share_hash})
    514 
    515             if self.block_hash_tree.needed_hashes(blocknum):
    516                 self.block_hash_tree.set_hashes(blockhashes)
    517 
    518             blockhash = hashutil.block_hash(blockdata)
    519             self.block_hash_tree.set_hashes(leaves={blocknum: blockhash})
    520             #self.log("checking block_hash(shareid=%d, blocknum=%d) len=%d "
    521             #        "%r .. %r: %s" %
    522             #        (self.sharenum, blocknum, len(blockdata),
    523             #         blockdata[:50], blockdata[-50:], base32.b2a(blockhash)))
    524 
    525         except (hashtree.BadHashError, hashtree.NotEnoughHashesError), le:
    526             # log.WEIRD: indicates undetected disk/network error, or more
    527             # likely a programming error
    528             self.log("hash failure in block=%d, shnum=%d on %s" %
    529                     (blocknum, self.sharenum, self.bucket))
    530             if self.block_hash_tree.needed_hashes(blocknum):
    531                 self.log(""" failure occurred when checking the block_hash_tree.
    532                 This suggests that either the block data was bad, or that the
    533                 block hashes we received along with it were bad.""")
    534             else:
    535                 self.log(""" the failure probably occurred when checking the
    536                 share_hash_tree, which suggests that the share hashes we
    537                 received from the remote peer were bad.""")
    538             self.log(" have candidate_share_hash: %s" % bool(candidate_share_hash))
    539             self.log(" block length: %d" % len(blockdata))
    540             self.log(" block hash: %s" % base32.b2a_or_none(blockhash))
    541             if len(blockdata) < 100:
    542                 self.log(" block data: %r" % (blockdata,))
    543             else:
    544                 self.log(" block data start/end: %r .. %r" %
    545                         (blockdata[:50], blockdata[-50:]))
    546             self.log(" share hash tree:\n" + self.share_hash_tree.dump())
    547             self.log(" block hash tree:\n" + self.block_hash_tree.dump())
    548             lines = []
    549             for i,h in sorted(sharehashes.items()):
    550                 lines.append("%3d: %s" % (i, base32.b2a_or_none(h)))
    551             self.log(" sharehashes:\n" + "\n".join(lines) + "\n")
    552             lines = []
    553             for i,h in blockhashes.items():
    554                 lines.append("%3d: %s" % (i, base32.b2a_or_none(h)))
    555             log.msg(" blockhashes:\n" + "\n".join(lines) + "\n")
    556             raise BadOrMissingHash(le)
    557 
    558         # If we made it here, the block is good. If the hash trees didn't
    559         # like what they saw, they would have raised a BadHashError, causing
    560         # our caller to see a Failure and thus ignore this block (as well as
    561         # dropping this bucket).
    562         return blockdata
    563 
    564 
    565 
    566 class BlockDownloader(log.PrefixingLogMixin):
    567     """I am responsible for downloading a single block (from a single bucket)
    568     for a single segment.
    569 
    570     I am a child of the SegmentDownloader.
    571     """
    572 
    573     def __init__(self, vbucket, blocknum, parent, results):
    574         precondition(isinstance(vbucket, ValidatedReadBucketProxy), vbucket)
    575         prefix = "%s-%d" % (vbucket, blocknum)
    576         log.PrefixingLogMixin.__init__(self, facility="tahoe.immutable.download", prefix=prefix)
    577         self.vbucket = vbucket
    578         self.blocknum = blocknum
    579         self.parent = parent
    580         self.results = results
    581 
    582     def start(self, segnum):
    583         self.log("get_block(segnum=%d)" % segnum)
    584         started = time.time()
    585         d = self.vbucket.get_block(segnum)
    586         d.addCallbacks(self._hold_block, self._got_block_error,
    587                        callbackArgs=(started,))
    588         return d
    589 
    590     def _hold_block(self, data, started):
    591         if self.results:
    592             elapsed = time.time() - started
    593             peerid = self.vbucket.bucket.get_peerid()
    594             if peerid not in self.results.timings["fetch_per_server"]:
    595                 self.results.timings["fetch_per_server"][peerid] = []
    596             self.results.timings["fetch_per_server"][peerid].append(elapsed)
    597         self.log("got block")
    598         self.parent.hold_block(self.blocknum, data)
    599 
    600     def _got_block_error(self, f):
    601         f.trap(RemoteException, DeadReferenceError,
    602                IntegrityCheckReject, layout.LayoutInvalid,
    603                layout.ShareVersionIncompatible)
    604         if f.check(RemoteException, DeadReferenceError):
    605             level = log.UNUSUAL
    606         else:
    607             level = log.WEIRD
    608         self.log("failure to get block", level=level, umid="5Z4uHQ")
    609         if self.results:
    610             peerid = self.vbucket.bucket.get_peerid()
    611             self.results.server_problems[peerid] = str(f)
    612         self.parent.bucket_failed(self.vbucket)
    613 
    614 class SegmentDownloader:
    615     """I am responsible for downloading all the blocks for a single segment
    616     of data.
    617 
    618     I am a child of the CiphertextDownloader.
    619     """
    620 
    621     def __init__(self, parent, segmentnumber, needed_shares, results):
    622         self.parent = parent
    623         self.segmentnumber = segmentnumber
    624         self.needed_blocks = needed_shares
    625         self.blocks = {} # k: blocknum, v: data
    626         self.results = results
    627         self._log_number = self.parent.log("starting segment %d" %
    628                                            segmentnumber)
    629 
    630     def log(self, *args, **kwargs):
    631         if "parent" not in kwargs:
    632             kwargs["parent"] = self._log_number
    633         return self.parent.log(*args, **kwargs)
    634 
    635     def start(self):
    636         return self._download()
    637 
    638     def _download(self):
    639         d = self._try()
    640         def _done(res):
    641             if len(self.blocks) >= self.needed_blocks:
    642                 # we only need self.needed_blocks blocks
    643                 # we want to get the smallest blockids, because they are
    644                 # more likely to be fast "primary blocks"
    645                 blockids = sorted(self.blocks.keys())[:self.needed_blocks]
    646                 blocks = []
    647                 for blocknum in blockids:
    648                     blocks.append(self.blocks[blocknum])
    649                 return (blocks, blockids)
    650             else:
    651                 return self._download()
    652         d.addCallback(_done)
    653         return d
    654 
    655     def _try(self):
    656         # fill our set of active buckets, maybe raising NotEnoughSharesError
    657         active_buckets = self.parent._activate_enough_buckets()
    658         # Now we have enough buckets, in self.parent.active_buckets.
    659 
    660         # in test cases, bd.start might mutate active_buckets right away, so
    661         # we need to put off calling start() until we've iterated all the way
    662         # through it.
    663         downloaders = []
    664         for blocknum, vbucket in active_buckets.iteritems():
    665             assert isinstance(vbucket, ValidatedReadBucketProxy), vbucket
    666             bd = BlockDownloader(vbucket, blocknum, self, self.results)
    667             downloaders.append(bd)
    668             if self.results:
    669                 self.results.servers_used.add(vbucket.bucket.get_peerid())
    670         l = [bd.start(self.segmentnumber) for bd in downloaders]
    671         return defer.DeferredList(l, fireOnOneErrback=True)
    672 
    673     def hold_block(self, blocknum, data):
    674         self.blocks[blocknum] = data
    675 
    676     def bucket_failed(self, vbucket):
    677         self.parent.bucket_failed(vbucket)
    678 
    679 class DownloadStatus:
    680     implements(IDownloadStatus)
    681     statusid_counter = itertools.count(0)
    682 
    683     def __init__(self):
    684         self.storage_index = None
    685         self.size = None
    686         self.helper = False
    687         self.status = "Not started"
    688         self.progress = 0.0
    689         self.paused = False
    690         self.stopped = False
    691         self.active = True
    692         self.results = None
    693         self.counter = self.statusid_counter.next()
    694         self.started = time.time()
    695 
    696     def get_started(self):
    697         return self.started
    698     def get_storage_index(self):
    699         return self.storage_index
    700     def get_size(self):
    701         return self.size
    702     def using_helper(self):
    703         return self.helper
    704     def get_status(self):
    705         status = self.status
    706         if self.paused:
    707             status += " (output paused)"
    708         if self.stopped:
    709             status += " (output stopped)"
    710         return status
    711     def get_progress(self):
    712         return self.progress
    713     def get_active(self):
    714         return self.active
    715     def get_results(self):
    716         return self.results
    717     def get_counter(self):
    718         return self.counter
    719 
    720     def set_storage_index(self, si):
    721         self.storage_index = si
    722     def set_size(self, size):
    723         self.size = size
    724     def set_helper(self, helper):
    725         self.helper = helper
    726     def set_status(self, status):
    727         self.status = status
    728     def set_paused(self, paused):
    729         self.paused = paused
    730     def set_stopped(self, stopped):
    731         self.stopped = stopped
    732     def set_progress(self, value):
    733         self.progress = value
    734     def set_active(self, value):
    735         self.active = value
    736     def set_results(self, value):
    737         self.results = value
    738 
    739 class CiphertextDownloader(log.PrefixingLogMixin):
    740     """ I download shares, check their integrity, then decode them, check the
    741     integrity of the resulting ciphertext, then and write it to my target.
    742     Before I send any new request to a server, I always ask the 'monitor'
    743     object that was passed into my constructor whether this task has been
    744     cancelled (by invoking its raise_if_cancelled() method)."""
    745     implements(IPushProducer)
    746     _status = None
    747 
    748     def __init__(self, storage_broker, v, target, monitor):
    749 
    750         precondition(IStorageBroker.providedBy(storage_broker), storage_broker)
    751         precondition(IVerifierURI.providedBy(v), v)
    752         precondition(IDownloadTarget.providedBy(target), target)
    753 
    754         self._storage_broker = storage_broker
    755         self._verifycap = v
    756         self._storage_index = v.get_storage_index()
    757         self._uri_extension_hash = v.uri_extension_hash
    758 
    759         prefix=base32.b2a_l(self._storage_index[:8], 60)
    760         log.PrefixingLogMixin.__init__(self, facility="tahoe.immutable.download", prefix=prefix)
    761 
    762         self._started = time.time()
    763         self._status = s = DownloadStatus()
    764         s.set_status("Starting")
    765         s.set_storage_index(self._storage_index)
    766         s.set_size(self._verifycap.size)
    767         s.set_helper(False)
    768         s.set_active(True)
    769 
    770         self._results = DownloadResults()
    771         s.set_results(self._results)
    772         self._results.file_size = self._verifycap.size
    773         self._results.timings["servers_peer_selection"] = {}
    774         self._results.timings["fetch_per_server"] = {}
    775         self._results.timings["cumulative_fetch"] = 0.0
    776         self._results.timings["cumulative_decode"] = 0.0
    777         self._results.timings["cumulative_decrypt"] = 0.0
    778         self._results.timings["paused"] = 0.0
    779 
    780         self._paused = False
    781         self._stopped = False
    782         if IConsumer.providedBy(target):
    783             target.registerProducer(self, True)
    784         self._target = target
    785         # Repairer (uploader) needs the storageindex.
    786         self._target.set_storageindex(self._storage_index)
    787         self._monitor = monitor
    788         self._opened = False
    789 
    790         self.active_buckets = {} # k: shnum, v: bucket
    791         self._share_buckets = {} # k: sharenum, v: list of buckets
    792 
    793         # _download_all_segments() will set this to:
    794         # self._share_vbuckets = {} # k: shnum, v: set of ValidatedBuckets
    795         self._share_vbuckets = None
    796 
    797         self._fetch_failures = {"uri_extension": 0, "crypttext_hash_tree": 0, }
    798 
    799         self._ciphertext_hasher = hashutil.crypttext_hasher()
    800 
    801         self._bytes_done = 0
    802         self._status.set_progress(float(self._bytes_done)/self._verifycap.size)
    803 
    804         # _got_uri_extension() will create the following:
    805         # self._crypttext_hash_tree
    806         # self._share_hash_tree
    807         # self._current_segnum = 0
    808         # self._vup # ValidatedExtendedURIProxy
    809 
    810         # _get_all_shareholders() will create the following:
    811         # self._total_queries
    812         # self._responses_received = 0
    813         # self._queries_failed = 0
    814 
    815         # This is solely for the use of unit tests. It will be triggered when
    816         # we start downloading shares.
    817         self._stage_4_d = defer.Deferred()
    818 
    819     def pauseProducing(self):
    820         if self._paused:
    821             return
    822         self._paused = defer.Deferred()
    823         self._paused_at = time.time()
    824         if self._status:
    825             self._status.set_paused(True)
    826 
    827     def resumeProducing(self):
    828         if self._paused:
    829             paused_for = time.time() - self._paused_at
    830             self._results.timings['paused'] += paused_for
    831             p = self._paused
    832             self._paused = None
    833             eventually(p.callback, None)
    834             if self._status:
    835                 self._status.set_paused(False)
    836 
    837     def stopProducing(self):
    838         self.log("Download.stopProducing")
    839         self._stopped = True
    840         self.resumeProducing()
    841         if self._status:
    842             self._status.set_stopped(True)
    843             self._status.set_active(False)
    844 
    845     def start(self):
    846         self.log("starting download")
    847 
    848         # first step: who should we download from?
    849         d = defer.maybeDeferred(self._get_all_shareholders)
    850         d.addBoth(self._got_all_shareholders)
    851         # now get the uri_extension block from somebody and integrity check
    852         # it and parse and validate its contents
    853         d.addCallback(self._obtain_uri_extension)
    854         d.addCallback(self._get_crypttext_hash_tree)
    855         # once we know that, we can download blocks from everybody
    856         d.addCallback(self._download_all_segments)
    857         def _finished(res):
    858             if self._status:
    859                 self._status.set_status("Finished")
    860                 self._status.set_active(False)
    861                 self._status.set_paused(False)
    862             if IConsumer.providedBy(self._target):
    863                 self._target.unregisterProducer()
    864             return res
    865         d.addBoth(_finished)
    866         def _failed(why):
    867             if self._status:
    868                 self._status.set_status("Failed")
    869                 self._status.set_active(False)
    870             if why.check(DownloadStopped):
    871                 # DownloadStopped just means the consumer aborted the
    872                 # download; not so scary.
    873                 self.log("download stopped", level=log.UNUSUAL)
    874             else:
    875                 # This is really unusual, and deserves maximum forensics.
    876                 self.log("download failed!", failure=why, level=log.SCARY,
    877                          umid="lp1vaQ")
    878             return why
    879         d.addErrback(_failed)
    880         d.addCallback(self._done)
    881         return d
    882 
    883     def _get_all_shareholders(self):
    884         """ Once the number of buckets that I know about is >= K then I
    885         callback the Deferred that I return.
    886 
    887         If all of the get_buckets deferreds have fired (whether callback
    888         or errback) and I still don't have enough buckets then I'll also
    889         callback -- not errback -- the Deferred that I return.
    890         """
    891         wait_for_enough_buckets_d = defer.Deferred()
    892         self._wait_for_enough_buckets_d = wait_for_enough_buckets_d
    893 
    894         sb = self._storage_broker
    895         servers = sb.get_servers_for_index(self._storage_index)
    896         if not servers:
    897             raise NoServersError("broker gave us no servers!")
    898 
    899         self._total_queries = len(servers)
    900         self._responses_received = 0
    901         self._queries_failed = 0
    902         for (peerid,ss) in servers:
    903             self.log(format="sending DYHB to [%(peerid)s]",
    904                      peerid=idlib.shortnodeid_b2a(peerid),
    905                      level=log.NOISY, umid="rT03hg")
    906             d = ss.callRemote("get_buckets", self._storage_index)
    907             d.addCallbacks(self._got_response, self._got_error,
    908                            callbackArgs=(peerid,))
    909             d.addBoth(self._check_got_all_responses)
    910 
    911         if self._status:
    912             self._status.set_status("Locating Shares (%d/%d)" %
    913                                     (self._responses_received,
    914                                      self._total_queries))
    915         return wait_for_enough_buckets_d
    916 
    917     def _check_got_all_responses(self, ignored=None):
    918         assert (self._responses_received+self._queries_failed) <= self._total_queries
    919         if self._wait_for_enough_buckets_d and (self._responses_received+self._queries_failed) == self._total_queries:
    920             reactor.callLater(0, self._wait_for_enough_buckets_d.callback, False)
    921             self._wait_for_enough_buckets_d = None
    922 
    923     def _got_response(self, buckets, peerid):
    924         # Note that this can continue to receive responses after _wait_for_enough_buckets_d
    925         # has fired.
    926         self._responses_received += 1
    927         self.log(format="got results from [%(peerid)s]: shnums %(shnums)s",
    928                  peerid=idlib.shortnodeid_b2a(peerid),
    929                  shnums=sorted(buckets.keys()),
    930                  level=log.NOISY, umid="o4uwFg")
    931         if self._results:
    932             elapsed = time.time() - self._started
    933             self._results.timings["servers_peer_selection"][peerid] = elapsed
    934         if self._status:
    935             self._status.set_status("Locating Shares (%d/%d)" %
    936                                     (self._responses_received,
    937                                      self._total_queries))
    938         for sharenum, bucket in buckets.iteritems():
    939             b = layout.ReadBucketProxy(bucket, peerid, self._storage_index)
    940             self.add_share_bucket(sharenum, b)
    941             # If we just got enough buckets for the first time, then fire the
    942             # deferred. Then remove it from self so that we don't fire it
    943             # again.
    944             if self._wait_for_enough_buckets_d and len(self._share_buckets) >= self._verifycap.needed_shares:
    945                 reactor.callLater(0, self._wait_for_enough_buckets_d.callback, True)
    946                 self._wait_for_enough_buckets_d = None
    947 
    948             if self._share_vbuckets is not None:
    949                 vbucket = ValidatedReadBucketProxy(sharenum, b, self._share_hash_tree, self._vup.num_segments, self._vup.block_size, self._vup.share_size)
    950                 self._share_vbuckets.setdefault(sharenum, set()).add(vbucket)
    951 
    952             if self._results:
    953                 if peerid not in self._results.servermap:
    954                     self._results.servermap[peerid] = set()
    955                 self._results.servermap[peerid].add(sharenum)
    956 
    957     def add_share_bucket(self, sharenum, bucket):
    958         # this is split out for the benefit of test_encode.py
    959         self._share_buckets.setdefault(sharenum, []).append(bucket)
    960 
    961     def _got_error(self, f):
    962         self._queries_failed += 1
    963         level = log.WEIRD
    964         if f.check(DeadReferenceError):
    965             level = log.UNUSUAL
    966         self.log("Error during get_buckets", failure=f, level=level,
    967                          umid="3uuBUQ")
    968 
    969     def bucket_failed(self, vbucket):
    970         shnum = vbucket.sharenum
    971         del self.active_buckets[shnum]
    972         s = self._share_vbuckets[shnum]
    973         # s is a set of ValidatedReadBucketProxy instances
    974         s.remove(vbucket)
    975         # ... which might now be empty
    976         if not s:
    977             # there are no more buckets which can provide this share, so
    978             # remove the key. This may prompt us to use a different share.
    979             del self._share_vbuckets[shnum]
    980 
    981     def _got_all_shareholders(self, res):
    982         if self._results:
    983             now = time.time()
    984             self._results.timings["peer_selection"] = now - self._started
    985 
    986         if len(self._share_buckets) < self._verifycap.needed_shares:
    987             msg = "Failed to get enough shareholders: have %d, need %d" \
    988                   % (len(self._share_buckets), self._verifycap.needed_shares)
    989             if self._share_buckets:
    990                 raise NotEnoughSharesError(msg)
    991             else:
    992                 raise NoSharesError(msg)
    993 
    994         #for s in self._share_vbuckets.values():
    995         #    for vb in s:
    996         #        assert isinstance(vb, ValidatedReadBucketProxy), \
    997         #               "vb is %s but should be a ValidatedReadBucketProxy" % (vb,)
    998 
    999     def _obtain_uri_extension(self, ignored):
    1000         # all shareholders are supposed to have a copy of uri_extension, and
    1001         # all are supposed to be identical. We compute the hash of the data
    1002         # that comes back, and compare it against the version in our URI. If
    1003         # they don't match, ignore their data and try someone else.
    1004         if self._status:
    1005             self._status.set_status("Obtaining URI Extension")
    1006 
    1007         uri_extension_fetch_started = time.time()
    1008 
    1009         vups = []
    1010         for sharenum, buckets in self._share_buckets.iteritems():
    1011             for bucket in buckets:
    1012                 vups.append(ValidatedExtendedURIProxy(bucket, self._verifycap, self._fetch_failures))
    1013         vto = ValidatedThingObtainer(vups, debugname="vups", log_id=self._parentmsgid)
    1014         d = vto.start()
    1015 
    1016         def _got_uri_extension(vup):
    1017             precondition(isinstance(vup, ValidatedExtendedURIProxy), vup)
    1018             if self._results:
    1019                 elapsed = time.time() - uri_extension_fetch_started
    1020                 self._results.timings["uri_extension"] = elapsed
    1021 
    1022             self._vup = vup
    1023             self._codec = codec.CRSDecoder()
    1024             self._codec.set_params(self._vup.segment_size, self._verifycap.needed_shares, self._verifycap.total_shares)
    1025             self._tail_codec = codec.CRSDecoder()
    1026             self._tail_codec.set_params(self._vup.tail_segment_size, self._verifycap.needed_shares, self._verifycap.total_shares)
    1027 
    1028             self._current_segnum = 0
    1029 
    1030             self._share_hash_tree = hashtree.IncompleteHashTree(self._verifycap.total_shares)
    1031             self._share_hash_tree.set_hashes({0: vup.share_root_hash})
    1032 
    1033             self._crypttext_hash_tree = hashtree.IncompleteHashTree(self._vup.num_segments)
    1034             self._crypttext_hash_tree.set_hashes({0: self._vup.crypttext_root_hash})
    1035 
    1036             # Repairer (uploader) needs the encodingparams.
    1037             self._target.set_encodingparams((
    1038                 self._verifycap.needed_shares,
    1039                 0, # see ticket #778 for why this is
    1040                 self._verifycap.total_shares,
    1041                 self._vup.segment_size
    1042                 ))
    1043         d.addCallback(_got_uri_extension)
    1044         return d
    1045 
    1046     def _get_crypttext_hash_tree(self, res):
    1047         vchtps = []
    1048         for sharenum, buckets in self._share_buckets.iteritems():
    1049             for bucket in buckets:
    1050                 vchtp = ValidatedCrypttextHashTreeProxy(bucket, self._crypttext_hash_tree, self._vup.num_segments, self._fetch_failures)
    1051                 vchtps.append(vchtp)
    1052 
    1053         _get_crypttext_hash_tree_started = time.time()
    1054         if self._status:
    1055             self._status.set_status("Retrieving crypttext hash tree")
    1056 
    1057         vto = ValidatedThingObtainer(vchtps, debugname="vchtps",
    1058                                      log_id=self._parentmsgid)
    1059         d = vto.start()
    1060 
    1061         def _got_crypttext_hash_tree(res):
    1062             # Good -- the self._crypttext_hash_tree that we passed to vchtp
    1063             # is now populated with hashes.
    1064             if self._results:
    1065                 elapsed = time.time() - _get_crypttext_hash_tree_started
    1066                 self._results.timings["hashtrees"] = elapsed
    1067         d.addCallback(_got_crypttext_hash_tree)
    1068         return d
    1069 
    1070     def _activate_enough_buckets(self):
    1071         """either return a mapping from shnum to a ValidatedReadBucketProxy
    1072         that can provide data for that share, or raise NotEnoughSharesError"""
    1073 
    1074         while len(self.active_buckets) < self._verifycap.needed_shares:
    1075             # need some more
    1076             handled_shnums = set(self.active_buckets.keys())
    1077             available_shnums = set(self._share_vbuckets.keys())
    1078             potential_shnums = list(available_shnums - handled_shnums)
    1079             if len(potential_shnums) < (self._verifycap.needed_shares
    1080                                         - len(self.active_buckets)):
    1081                 have = len(potential_shnums) + len(self.active_buckets)
    1082                 msg = "Unable to activate enough shares: have %d, need %d" \
    1083                       % (have, self._verifycap.needed_shares)
    1084                 if have:
    1085                     raise NotEnoughSharesError(msg)
    1086                 else:
    1087                     raise NoSharesError(msg)
    1088             # For the next share, choose a primary share if available, else a
    1089             # randomly chosen secondary share.
    1090             potential_shnums.sort()
    1091             if potential_shnums[0] < self._verifycap.needed_shares:
    1092                 shnum = potential_shnums[0]
    1093             else:
    1094                 shnum = random.choice(potential_shnums)
    1095             # and a random bucket that will provide it
    1096             validated_bucket = random.choice(list(self._share_vbuckets[shnum]))
    1097             self.active_buckets[shnum] = validated_bucket
    1098         return self.active_buckets
    1099 
    1100 
    1101     def _download_all_segments(self, res):
    1102         # From now on if new buckets are received then I will notice that
    1103         # self._share_vbuckets is not None and generate a vbucket for that new
    1104         # bucket and add it in to _share_vbuckets. (We had to wait because we
    1105         # didn't have self._vup and self._share_hash_tree earlier. We didn't
    1106         # need validated buckets until now -- now that we are ready to download
    1107         # shares.)
    1108         self._share_vbuckets = {}
    1109         for sharenum, buckets in self._share_buckets.iteritems():
    1110             for bucket in buckets:
    1111                 vbucket = ValidatedReadBucketProxy(sharenum, bucket, self._share_hash_tree, self._vup.num_segments, self._vup.block_size, self._vup.share_size)
    1112                 self._share_vbuckets.setdefault(sharenum, set()).add(vbucket)
    1113 
    1114         # after the above code, self._share_vbuckets contains enough
    1115         # buckets to complete the download, and some extra ones to
    1116         # tolerate some buckets dropping out or having
    1117         # errors. self._share_vbuckets is a dictionary that maps from
    1118         # shnum to a set of ValidatedBuckets, which themselves are
    1119         # wrappers around RIBucketReader references.
    1120         self.active_buckets = {} # k: shnum, v: ValidatedReadBucketProxy instance
    1121 
    1122         self._started_fetching = time.time()
    1123 
    1124         d = defer.succeed(None)
    1125         for segnum in range(self._vup.num_segments):
    1126             d.addCallback(self._download_segment, segnum)
    1127             # this pause, at the end of write, prevents pre-fetch from
    1128             # happening until the consumer is ready for more data.
    1129             d.addCallback(self._check_for_pause)
    1130 
    1131         self._stage_4_d.callback(None)
    1132         return d
    1133 
    1134     def _check_for_pause(self, res):
    1135         if self._paused:
    1136             d = defer.Deferred()
    1137             self._paused.addCallback(lambda ignored: d.callback(res))
    1138             return d
    1139         if self._stopped:
    1140             raise DownloadStopped("our Consumer called stopProducing()")
    1141         self._monitor.raise_if_cancelled()
    1142         return res
    1143 
    1144     def _download_segment(self, res, segnum):
    1145         if self._status:
    1146             self._status.set_status("Downloading segment %d of %d" %
    1147                                     (segnum+1, self._vup.num_segments))
    1148         self.log("downloading seg#%d of %d (%d%%)"
    1149                  % (segnum, self._vup.num_segments,
    1150                     100.0 * segnum / self._vup.num_segments))
    1151         # memory footprint: when the SegmentDownloader finishes pulling down
    1152         # all shares, we have 1*segment_size of usage.
    1153         segmentdler = SegmentDownloader(self, segnum,
    1154                                         self._verifycap.needed_shares,
    1155                                         self._results)
    1156         started = time.time()
    1157         d = segmentdler.start()
    1158         def _finished_fetching(res):
    1159             elapsed = time.time() - started
    1160             self._results.timings["cumulative_fetch"] += elapsed
    1161             return res
    1162         if self._results:
    1163             d.addCallback(_finished_fetching)
    1164         # pause before using more memory
    1165         d.addCallback(self._check_for_pause)
    1166         # while the codec does its job, we hit 2*segment_size
    1167         def _started_decode(res):
    1168             self._started_decode = time.time()
    1169             return res
    1170         if self._results:
    1171             d.addCallback(_started_decode)
    1172         if segnum + 1 == self._vup.num_segments:
    1173             codec = self._tail_codec
    1174         else:
    1175             codec = self._codec
    1176         d.addCallback(lambda (shares, shareids): codec.decode(shares, shareids))
    1177         # once the codec is done, we drop back to 1*segment_size, because
    1178         # 'shares' goes out of scope. The memory usage is all in the
    1179         # plaintext now, spread out into a bunch of tiny buffers.
    1180         def _finished_decode(res):
    1181             elapsed = time.time() - self._started_decode
    1182             self._results.timings["cumulative_decode"] += elapsed
    1183             return res
    1184         if self._results:
    1185             d.addCallback(_finished_decode)
    1186 
    1187         # pause/check-for-stop just before writing, to honor stopProducing
    1188         d.addCallback(self._check_for_pause)
    1189         d.addCallback(self._got_segment)
    1190         return d
    1191 
    1192     def _got_segment(self, buffers):
    1193         precondition(self._crypttext_hash_tree)
    1194         started_decrypt = time.time()
    1195         self._status.set_progress(float(self._current_segnum)/self._verifycap.size)
    1196 
    1197         if self._current_segnum + 1 == self._vup.num_segments:
    1198             # This is the last segment.
    1199             # Trim off any padding added by the upload side. We never send
    1200             # empty segments. If the data was an exact multiple of the
    1201             # segment size, the last segment will be full.
    1202             tail_buf_size = mathutil.div_ceil(self._vup.tail_segment_size, self._verifycap.needed_shares)
    1203             num_buffers_used = mathutil.div_ceil(self._vup.tail_data_size, tail_buf_size)
    1204             # Remove buffers which don't contain any part of the tail.
    1205             del buffers[num_buffers_used:]
    1206             # Remove the past-the-tail-part of the last buffer.
    1207             tail_in_last_buf = self._vup.tail_data_size % tail_buf_size
    1208             if tail_in_last_buf == 0:
    1209                 tail_in_last_buf = tail_buf_size
    1210             buffers[-1] = buffers[-1][:tail_in_last_buf]
    1211 
    1212         # First compute the hash of this segment and check that it fits.
    1213         ch = hashutil.crypttext_segment_hasher()
    1214         for buffer in buffers:
    1215             self._ciphertext_hasher.update(buffer)
    1216             ch.update(buffer)
    1217         self._crypttext_hash_tree.set_hashes(leaves={self._current_segnum: ch.digest()})
    1218 
    1219         # Then write this segment to the target.
    1220         if not self._opened:
    1221             self._opened = True
    1222             self._target.open(self._verifycap.size)
    1223 
    1224         for buffer in buffers:
    1225             self._target.write(buffer)
    1226             self._bytes_done += len(buffer)
    1227 
    1228         self._status.set_progress(float(self._bytes_done)/self._verifycap.size)
    1229         self._current_segnum += 1
    1230 
    1231         if self._results:
    1232             elapsed = time.time() - started_decrypt
    1233             self._results.timings["cumulative_decrypt"] += elapsed
    1234 
    1235     def _done(self, res):
    1236         self.log("download done")
    1237         if self._results:
    1238             now = time.time()
    1239             self._results.timings["total"] = now - self._started
    1240             self._results.timings["segments"] = now - self._started_fetching
    1241         if self._vup.crypttext_hash:
    1242             _assert(self._vup.crypttext_hash == self._ciphertext_hasher.digest(),
    1243                     "bad crypttext_hash: computed=%s, expected=%s" %
    1244                     (base32.b2a(self._ciphertext_hasher.digest()),
    1245                      base32.b2a(self._vup.crypttext_hash)))
    1246         _assert(self._bytes_done == self._verifycap.size, self._bytes_done, self._verifycap.size)
    1247         self._status.set_progress(1)
    1248         self._target.close()
    1249         return self._target.finish()
    1250     def get_download_status(self):
    1251         return self._status
    1252 
    1253 
    1254 class ConsumerAdapter:
    1255     implements(IDownloadTarget, IConsumer)
    1256     def __init__(self, consumer):
    1257         self._consumer = consumer
    1258 
    1259     def registerProducer(self, producer, streaming):
    1260         self._consumer.registerProducer(producer, streaming)
    1261     def unregisterProducer(self):
    1262         self._consumer.unregisterProducer()
    1263 
    1264     def open(self, size):
    1265         pass
    1266     def write(self, data):
    1267         self._consumer.write(data)
    1268     def close(self):
    1269         pass
    1270 
    1271     def fail(self, why):
    1272         pass
    1273     def register_canceller(self, cb):
    1274         pass
    1275     def finish(self):
    1276         return self._consumer
    1277     # The following methods are just because the target might be a
    1278     # repairer.DownUpConnector, and just because the current CHKUpload object
    1279     # expects to find the storage index and encoding parameters in its
    1280     # Uploadable.
    1281     def set_storageindex(self, storageindex):
    1282         pass
    1283     def set_encodingparams(self, encodingparams):
    1284         pass
    1285 
    1286 
    1287 class Downloader:
    1288     """I am a service that allows file downloading.
    1289     """
    1290     # TODO: in fact, this service only downloads immutable files (URI:CHK:).
    1291     # It is scheduled to go away, to be replaced by filenode.download()
    1292     implements(IDownloader)
    1293 
    1294     def __init__(self, storage_broker, stats_provider):
    1295         self.storage_broker = storage_broker
    1296         self.stats_provider = stats_provider
    1297         self._all_downloads = weakref.WeakKeyDictionary() # for debugging
    1298 
    1299     def download(self, u, t, _log_msg_id=None, monitor=None, history=None):
    1300         assert isinstance(u, uri.CHKFileURI)
    1301         t = IDownloadTarget(t)
    1302         assert t.write
    1303         assert t.close
    1304 
    1305         if self.stats_provider:
    1306             # these counters are meant for network traffic, and don't
    1307             # include LIT files
    1308             self.stats_provider.count('downloader.files_downloaded', 1)
    1309             self.stats_provider.count('downloader.bytes_downloaded', u.get_size())
    1310 
    1311         target = DecryptingTarget(t, u.key, _log_msg_id=_log_msg_id)
    1312         if not monitor:
    1313             monitor=Monitor()
    1314         dl = CiphertextDownloader(self.storage_broker,
    1315                                   u.get_verify_cap(), target,
    1316                                   monitor=monitor)
    1317         self._all_downloads[dl] = None
    1318         if history:
    1319             history.add_download(dl.get_download_status())
    1320         d = dl.start()
    1321         return d
  • new file src/allmydata/immutable/downloader/common.py

    diff --git a/src/allmydata/immutable/downloader/__init__.py b/src/allmydata/immutable/downloader/__init__.py
    new file mode 100644
    index 0000000..e69de29
    diff --git a/src/allmydata/immutable/downloader/common.py b/src/allmydata/immutable/downloader/common.py
    new file mode 100644
    index 0000000..e9dd271
    - +  
     1
     2(AVAILABLE, PENDING, OVERDUE, COMPLETE, CORRUPT, DEAD, BADSEGNUM) = \
     3 ("AVAILABLE", "PENDING", "OVERDUE", "COMPLETE", "CORRUPT", "DEAD", "BADSEGNUM")
     4
     5class BadSegmentNumberError(Exception):
     6    pass
     7class WrongSegmentError(Exception):
     8    pass
     9class BadCiphertextHashError(Exception):
     10    pass
     11
     12class DownloadStopped(Exception):
     13    pass
  • new file src/allmydata/immutable/downloader/fetcher.py

    diff --git a/src/allmydata/immutable/downloader/fetcher.py b/src/allmydata/immutable/downloader/fetcher.py
    new file mode 100644
    index 0000000..3918f65
    - +  
     1
     2from twisted.python.failure import Failure
     3from foolscap.api import eventually
     4from allmydata.interfaces import NotEnoughSharesError, NoSharesError
     5from allmydata.util import log
     6from allmydata.util.dictutil import DictOfSets
     7from common import AVAILABLE, PENDING, OVERDUE, COMPLETE, CORRUPT, DEAD, \
     8     BADSEGNUM, BadSegmentNumberError
     9
     10class SegmentFetcher:
     11    """I am responsible for acquiring blocks for a single segment. I will use
     12    the Share instances passed to my add_shares() method to locate, retrieve,
     13    and validate those blocks. I expect my parent node to call my
     14    no_more_shares() method when there are no more shares available. I will
     15    call my parent's want_more_shares() method when I want more: I expect to
     16    see at least one call to add_shares or no_more_shares afterwards.
     17
     18    When I have enough validated blocks, I will call my parent's
     19    process_blocks() method with a dictionary that maps shnum to blockdata.
     20    If I am unable to provide enough blocks, I will call my parent's
     21    fetch_failed() method with (self, f). After either of these events, I
     22    will shut down and do no further work. My parent can also call my stop()
     23    method to have me shut down early."""
     24
     25    def __init__(self, node, segnum, k):
     26        self._node = node # _Node
     27        self.segnum = segnum
     28        self._k = k
     29        self._shares = {} # maps non-dead Share instance to a state, one of
     30                          # (AVAILABLE, PENDING, OVERDUE, COMPLETE, CORRUPT).
     31                          # State transition map is:
     32                          #  AVAILABLE -(send-read)-> PENDING
     33                          #  PENDING -(timer)-> OVERDUE
     34                          #  PENDING -(rx)-> COMPLETE, CORRUPT, DEAD, BADSEGNUM
     35                          #  OVERDUE -(rx)-> COMPLETE, CORRUPT, DEAD, BADSEGNUM
     36                          # If a share becomes DEAD, it is removed from the
     37                          # dict. If it becomes BADSEGNUM, the whole fetch is
     38                          # terminated.
     39        self._share_observers = {} # maps Share to EventStreamObserver for
     40                                   # active ones
     41        self._shnums = DictOfSets() # maps shnum to the shares that provide it
     42        self._blocks = {} # maps shnum to validated block data
     43        self._no_more_shares = False
     44        self._bad_segnum = False
     45        self._last_failure = None
     46        self._running = True
     47
     48    def stop(self):
     49        log.msg("SegmentFetcher(%s).stop" % self._node._si_prefix,
     50                level=log.NOISY, umid="LWyqpg")
     51        self._cancel_all_requests()
     52        self._running = False
     53        self._shares.clear() # let GC work # ??? XXX
     54
     55
     56    # called by our parent _Node
     57
     58    def add_shares(self, shares):
     59        # called when ShareFinder locates a new share, and when a non-initial
     60        # segment fetch is started and we already know about shares from the
     61        # previous segment
     62        for s in shares:
     63            self._shares[s] = AVAILABLE
     64            self._shnums.add(s._shnum, s)
     65        eventually(self.loop)
     66
     67    def no_more_shares(self):
     68        # ShareFinder tells us it's reached the end of its list
     69        self._no_more_shares = True
     70        eventually(self.loop)
     71
     72    # internal methods
     73
     74    def _count_shnums(self, *states):
     75        """shnums for which at least one state is in the following list"""
     76        shnums = []
     77        for shnum,shares in self._shnums.iteritems():
     78            matches = [s for s in shares if self._shares.get(s) in states]
     79            if matches:
     80                shnums.append(shnum)
     81        return len(shnums)
     82
     83    def loop(self):
     84        try:
     85            # if any exception occurs here, kill the download
     86            self._do_loop()
     87        except BaseException:
     88            self._node.fetch_failed(self, Failure())
     89            raise
     90
     91    def _do_loop(self):
     92        k = self._k
     93        if not self._running:
     94            return
     95        if self._bad_segnum:
     96            # oops, we were asking for a segment number beyond the end of the
     97            # file. This is an error.
     98            self.stop()
     99            e = BadSegmentNumberError("segnum=%d, numsegs=%d" %
     100                                      (self.segnum, self._node.num_segments))
     101            f = Failure(e)
     102            self._node.fetch_failed(self, f)
     103            return
     104
     105        # are we done?
     106        if self._count_shnums(COMPLETE) >= k:
     107            # yay!
     108            self.stop()
     109            self._node.process_blocks(self.segnum, self._blocks)
     110            return
     111
     112        # we may have exhausted everything
     113        if (self._no_more_shares and
     114            self._count_shnums(AVAILABLE, PENDING, OVERDUE, COMPLETE) < k):
     115            # no more new shares are coming, and the remaining hopeful shares
     116            # aren't going to be enough. boo!
     117
     118            log.msg("share states: %r" % (self._shares,),
     119                    level=log.NOISY, umid="0ThykQ")
     120            if self._count_shnums(AVAILABLE, PENDING, OVERDUE, COMPLETE) == 0:
     121                format = ("no shares (need %(k)d)."
     122                          " Last failure: %(last_failure)s")
     123                args = { "k": k,
     124                         "last_failure": self._last_failure }
     125                error = NoSharesError
     126            else:
     127                format = ("ran out of shares: %(complete)d complete,"
     128                          " %(pending)d pending, %(overdue)d overdue,"
     129                          " %(unused)d unused, need %(k)d."
     130                          " Last failure: %(last_failure)s")
     131                args = {"complete": self._count_shnums(COMPLETE),
     132                        "pending": self._count_shnums(PENDING),
     133                        "overdue": self._count_shnums(OVERDUE),
     134                        # 'unused' should be zero
     135                        "unused": self._count_shnums(AVAILABLE),
     136                        "k": k,
     137                        "last_failure": self._last_failure,
     138                        }
     139                error = NotEnoughSharesError
     140            log.msg(format=format, level=log.UNUSUAL, umid="1DsnTg", **args)
     141            e = error(format % args)
     142            f = Failure(e)
     143            self.stop()
     144            self._node.fetch_failed(self, f)
     145            return
     146
     147        # nope, not done. Are we "block-hungry" (i.e. do we want to send out
     148        # more read requests, or do we think we have enough in flight
     149        # already?)
     150        while self._count_shnums(PENDING, COMPLETE) < k:
     151            # we're hungry.. are there any unused shares?
     152            sent = self._send_new_request()
     153            if not sent:
     154                break
     155
     156        # ok, now are we "share-hungry" (i.e. do we have enough known shares
     157        # to make us happy, or should we ask the ShareFinder to get us more?)
     158        if self._count_shnums(AVAILABLE, PENDING, COMPLETE) < k:
     159            # we're hungry for more shares
     160            self._node.want_more_shares()
     161            # that will trigger the ShareFinder to keep looking
     162
     163    def _find_one(self, shares, state):
     164        # TODO could choose fastest
     165        for s in shares:
     166            if self._shares[s] == state:
     167                return s
     168        # can never get here, caller has assert in case of code bug
     169
     170    def _send_new_request(self):
     171        for shnum,shares in sorted(self._shnums.iteritems()):
     172            states = [self._shares[s] for s in shares]
     173            if COMPLETE in states or PENDING in states:
     174                # don't send redundant requests
     175                continue
     176            if AVAILABLE not in states:
     177                # no candidates for this shnum, move on
     178                continue
     179            # here's a candidate. Send a request.
     180            s = self._find_one(shares, AVAILABLE)
     181            assert s
     182            self._shares[s] = PENDING
     183            self._share_observers[s] = o = s.get_block(self.segnum)
     184            o.subscribe(self._block_request_activity, share=s, shnum=shnum)
     185            # TODO: build up a list of candidates, then walk through the
     186            # list, sending requests to the most desireable servers,
     187            # re-checking our block-hunger each time. For non-initial segment
     188            # fetches, this would let us stick with faster servers.
     189            return True
     190        # nothing was sent: don't call us again until you have more shares to
     191        # work with, or one of the existing shares has been declared OVERDUE
     192        return False
     193
     194    def _cancel_all_requests(self):
     195        for o in self._share_observers.values():
     196            o.cancel()
     197        self._share_observers = {}
     198
     199    def _block_request_activity(self, share, shnum, state, block=None, f=None):
     200        # called by Shares, in response to our s.send_request() calls.
     201        if not self._running:
     202            return
     203        log.msg("SegmentFetcher(%s)._block_request_activity:"
     204                " Share(sh%d-on-%s) -> %s" %
     205                (self._node._si_prefix, shnum, share._peerid_s, state),
     206                level=log.NOISY, umid="vilNWA")
     207        # COMPLETE, CORRUPT, DEAD, BADSEGNUM are terminal.
     208        if state in (COMPLETE, CORRUPT, DEAD, BADSEGNUM):
     209            self._share_observers.pop(share, None)
     210        if state is COMPLETE:
     211            # 'block' is fully validated
     212            self._shares[share] = COMPLETE
     213            self._blocks[shnum] = block
     214        elif state is OVERDUE:
     215            self._shares[share] = OVERDUE
     216            # OVERDUE is not terminal: it will eventually transition to
     217            # COMPLETE, CORRUPT, or DEAD.
     218        elif state is CORRUPT:
     219            self._shares[share] = CORRUPT
     220        elif state is DEAD:
     221            del self._shares[share]
     222            self._shnums[shnum].remove(share)
     223            self._last_failure = f
     224        elif state is BADSEGNUM:
     225            self._shares[share] = BADSEGNUM # ???
     226            self._bad_segnum = True
     227        eventually(self.loop)
     228
     229
  • new file src/allmydata/immutable/downloader/finder.py

    diff --git a/src/allmydata/immutable/downloader/finder.py b/src/allmydata/immutable/downloader/finder.py
    new file mode 100644
    index 0000000..9adee99
    - +  
     1
     2import time
     3now = time.time
     4from foolscap.api import eventually
     5from allmydata.util import base32, log, idlib
     6from twisted.internet import reactor
     7
     8from share import Share, CommonShare
     9
     10def incidentally(res, f, *args, **kwargs):
     11    """Add me to a Deferred chain like this:
     12     d.addBoth(incidentally, func, arg)
     13    and I'll behave as if you'd added the following function:
     14     def _(res):
     15         func(arg)
     16         return res
     17    This is useful if you want to execute an expression when the Deferred
     18    fires, but don't care about its value.
     19    """
     20    f(*args, **kwargs)
     21    return res
     22
     23class RequestToken:
     24    def __init__(self, peerid):
     25        self.peerid = peerid
     26
     27class ShareFinder:
     28    OVERDUE_TIMEOUT = 10.0
     29
     30    def __init__(self, storage_broker, verifycap, node, download_status,
     31                 logparent=None, max_outstanding_requests=10):
     32        self.running = True # stopped by Share.stop, from Terminator
     33        self.verifycap = verifycap
     34        self._started = False
     35        self._storage_broker = storage_broker
     36        self.share_consumer = self.node = node
     37        self.max_outstanding_requests = max_outstanding_requests
     38
     39        self._hungry = False
     40
     41        self._commonshares = {} # shnum to CommonShare instance
     42        self.undelivered_shares = []
     43        self.pending_requests = set()
     44        self.overdue_requests = set() # subset of pending_requests
     45        self.overdue_timers = {}
     46
     47        self._storage_index = verifycap.storage_index
     48        self._si_prefix = base32.b2a_l(self._storage_index[:8], 60)
     49        self._node_logparent = logparent
     50        self._download_status = download_status
     51        self._lp = log.msg(format="ShareFinder[si=%(si)s] starting",
     52                           si=self._si_prefix,
     53                           level=log.NOISY, parent=logparent, umid="2xjj2A")
     54
     55    def start_finding_servers(self):
     56        # don't get servers until somebody uses us: creating the
     57        # ImmutableFileNode should not cause work to happen yet. Test case is
     58        # test_dirnode, which creates us with storage_broker=None
     59        if not self._started:
     60            si = self.verifycap.storage_index
     61            s = self._storage_broker.get_servers_for_index(si)
     62            self._servers = iter(s)
     63            self._started = True
     64
     65    def log(self, *args, **kwargs):
     66        if "parent" not in kwargs:
     67            kwargs["parent"] = self._lp
     68        return log.msg(*args, **kwargs)
     69
     70    def stop(self):
     71        self.running = False
     72        while self.overdue_timers:
     73            req,t = self.overdue_timers.popitem()
     74            t.cancel()
     75
     76    # called by our parent CiphertextDownloader
     77    def hungry(self):
     78        self.log(format="ShareFinder[si=%(si)s] hungry",
     79                 si=self._si_prefix, level=log.NOISY, umid="NywYaQ")
     80        self.start_finding_servers()
     81        self._hungry = True
     82        eventually(self.loop)
     83
     84    # internal methods
     85    def loop(self):
     86        undelivered_s = ",".join(["sh%d@%s" %
     87                                  (s._shnum, idlib.shortnodeid_b2a(s._peerid))
     88                                  for s in self.undelivered_shares])
     89        pending_s = ",".join([idlib.shortnodeid_b2a(rt.peerid)
     90                              for rt in self.pending_requests]) # sort?
     91        self.log(format="ShareFinder loop: running=%(running)s"
     92                 " hungry=%(hungry)s, undelivered=%(undelivered)s,"
     93                 " pending=%(pending)s",
     94                 running=self.running, hungry=self._hungry,
     95                 undelivered=undelivered_s, pending=pending_s,
     96                 level=log.NOISY, umid="kRtS4Q")
     97        if not self.running:
     98            return
     99        if not self._hungry:
     100            return
     101        if self.undelivered_shares:
     102            sh = self.undelivered_shares.pop(0)
     103            # they will call hungry() again if they want more
     104            self._hungry = False
     105            self.log(format="delivering Share(shnum=%(shnum)d, server=%(peerid)s)",
     106                     shnum=sh._shnum, peerid=sh._peerid_s,
     107                     level=log.NOISY, umid="2n1qQw")
     108            eventually(self.share_consumer.got_shares, [sh])
     109            return
     110
     111        non_overdue = self.pending_requests - self.overdue_requests
     112        if len(non_overdue) >= self.max_outstanding_requests:
     113            # cannot send more requests, must wait for some to retire
     114            return
     115
     116        server = None
     117        try:
     118            if self._servers:
     119                server = self._servers.next()
     120        except StopIteration:
     121            self._servers = None
     122
     123        if server:
     124            self.send_request(server)
     125            # we loop again to get parallel queries. The check above will
     126            # prevent us from looping forever.
     127            eventually(self.loop)
     128            return
     129
     130        if self.pending_requests:
     131            # no server, but there are still requests in flight: maybe one of
     132            # them will make progress
     133            return
     134
     135        self.log(format="ShareFinder.loop: no_more_shares, ever",
     136                 level=log.UNUSUAL, umid="XjQlzg")
     137        # we've run out of servers (so we can't send any more requests), and
     138        # we have nothing in flight. No further progress can be made. They
     139        # are destined to remain hungry.
     140        self.share_consumer.no_more_shares()
     141
     142    def send_request(self, server):
     143        peerid, rref = server
     144        req = RequestToken(peerid)
     145        self.pending_requests.add(req)
     146        lp = self.log(format="sending DYHB to [%(peerid)s]",
     147                      peerid=idlib.shortnodeid_b2a(peerid),
     148                      level=log.NOISY, umid="Io7pyg")
     149        d_ev = self._download_status.add_dyhb_sent(peerid, now())
     150        # TODO: get the timer from a Server object, it knows best
     151        self.overdue_timers[req] = reactor.callLater(self.OVERDUE_TIMEOUT,
     152                                                     self.overdue, req)
     153        d = rref.callRemote("get_buckets", self._storage_index)
     154        d.addBoth(incidentally, self._request_retired, req)
     155        d.addCallbacks(self._got_response, self._got_error,
     156                       callbackArgs=(rref.version, peerid, req, d_ev, lp),
     157                       errbackArgs=(peerid, req, d_ev, lp))
     158        d.addErrback(log.err, format="error in send_request",
     159                     level=log.WEIRD, parent=lp, umid="rpdV0w")
     160        d.addCallback(incidentally, eventually, self.loop)
     161
     162    def _request_retired(self, req):
     163        self.pending_requests.discard(req)
     164        self.overdue_requests.discard(req)
     165        if req in self.overdue_timers:
     166            self.overdue_timers[req].cancel()
     167            del self.overdue_timers[req]
     168
     169    def overdue(self, req):
     170        del self.overdue_timers[req]
     171        assert req in self.pending_requests # paranoia, should never be false
     172        self.overdue_requests.add(req)
     173        eventually(self.loop)
     174
     175    def _got_response(self, buckets, server_version, peerid, req, d_ev, lp):
     176        shnums = sorted([shnum for shnum in buckets])
     177        d_ev.finished(shnums, now())
     178        if buckets:
     179            shnums_s = ",".join([str(shnum) for shnum in shnums])
     180            self.log(format="got shnums [%(shnums)s] from [%(peerid)s]",
     181                     shnums=shnums_s, peerid=idlib.shortnodeid_b2a(peerid),
     182                     level=log.NOISY, parent=lp, umid="0fcEZw")
     183        else:
     184            self.log(format="no shares from [%(peerid)s]",
     185                     peerid=idlib.shortnodeid_b2a(peerid),
     186                     level=log.NOISY, parent=lp, umid="U7d4JA")
     187        if self.node.num_segments is None:
     188            best_numsegs = self.node.guessed_num_segments
     189        else:
     190            best_numsegs = self.node.num_segments
     191        for shnum, bucket in buckets.iteritems():
     192            self._create_share(best_numsegs, shnum, bucket, server_version,
     193                               peerid)
     194
     195    def _create_share(self, best_numsegs, shnum, bucket, server_version,
     196                      peerid):
     197        if shnum in self._commonshares:
     198            cs = self._commonshares[shnum]
     199        else:
     200            cs = CommonShare(best_numsegs, self._si_prefix, shnum,
     201                             self._node_logparent)
     202            # Share._get_satisfaction is responsible for updating
     203            # CommonShare.set_numsegs after we know the UEB. Alternatives:
     204            #  1: d = self.node.get_num_segments()
     205            #     d.addCallback(cs.got_numsegs)
     206            #   the problem is that the OneShotObserverList I was using
     207            #   inserts an eventual-send between _get_satisfaction's
     208            #   _satisfy_UEB and _satisfy_block_hash_tree, and the
     209            #   CommonShare didn't get the num_segs message before
     210            #   being asked to set block hash values. To resolve this
     211            #   would require an immediate ObserverList instead of
     212            #   an eventual-send -based one
     213            #  2: break _get_satisfaction into Deferred-attached pieces.
     214            #     Yuck.
     215            self._commonshares[shnum] = cs
     216        s = Share(bucket, server_version, self.verifycap, cs, self.node,
     217                  self._download_status, peerid, shnum,
     218                  self._node_logparent)
     219        self.undelivered_shares.append(s)
     220
     221    def _got_error(self, f, peerid, req, d_ev, lp):
     222        d_ev.finished("error", now())
     223        self.log(format="got error from [%(peerid)s]",
     224                 peerid=idlib.shortnodeid_b2a(peerid), failure=f,
     225                 level=log.UNUSUAL, parent=lp, umid="zUKdCw")
     226
     227
  • new file src/allmydata/immutable/downloader/node.py

    diff --git a/src/allmydata/immutable/downloader/node.py b/src/allmydata/immutable/downloader/node.py
    new file mode 100644
    index 0000000..2991c9e
    - +  
     1
     2import time
     3now = time.time
     4from twisted.python.failure import Failure
     5from twisted.internet import defer
     6from foolscap.api import eventually
     7from allmydata import uri
     8from allmydata.codec import CRSDecoder
     9from allmydata.util import base32, log, hashutil, mathutil, observer
     10from allmydata.interfaces import DEFAULT_MAX_SEGMENT_SIZE
     11from allmydata.hashtree import IncompleteHashTree, BadHashError, \
     12     NotEnoughHashesError
     13
     14# local imports
     15from finder import ShareFinder
     16from fetcher import SegmentFetcher
     17from segmentation import Segmentation
     18from common import BadCiphertextHashError
     19
     20class Cancel:
     21    def __init__(self, f):
     22        self._f = f
     23        self.cancelled = False
     24    def cancel(self):
     25        if not self.cancelled:
     26            self.cancelled = True
     27            self._f(self)
     28
     29class DownloadNode:
     30    """Internal class which manages downloads and holds state. External
     31    callers use CiphertextFileNode instead."""
     32
     33    # Share._node points to me
     34    def __init__(self, verifycap, storage_broker, secret_holder,
     35                 terminator, history, download_status):
     36        assert isinstance(verifycap, uri.CHKFileVerifierURI)
     37        self._verifycap = verifycap
     38        self._storage_broker = storage_broker
     39        self._si_prefix = base32.b2a_l(verifycap.storage_index[:8], 60)
     40        self.running = True
     41        if terminator:
     42            terminator.register(self) # calls self.stop() at stopService()
     43        # the rules are:
     44        # 1: Only send network requests if you're active (self.running is True)
     45        # 2: Use TimerService, not reactor.callLater
     46        # 3: You can do eventual-sends any time.
     47        # These rules should mean that once
     48        # stopService()+flushEventualQueue() fires, everything will be done.
     49        self._secret_holder = secret_holder
     50        self._history = history
     51        self._download_status = download_status
     52
     53        k, N = self._verifycap.needed_shares, self._verifycap.total_shares
     54        self.share_hash_tree = IncompleteHashTree(N)
     55
     56        # we guess the segment size, so Segmentation can pull non-initial
     57        # segments in a single roundtrip. This populates
     58        # .guessed_segment_size, .guessed_num_segments, and
     59        # .ciphertext_hash_tree (with a dummy, to let us guess which hashes
     60        # we'll need)
     61        self._build_guessed_tables(DEFAULT_MAX_SEGMENT_SIZE)
     62
     63        # filled in when we parse a valid UEB
     64        self.have_UEB = False
     65        self.segment_size = None
     66        self.tail_segment_size = None
     67        self.tail_segment_padded = None
     68        self.num_segments = None
     69        self.block_size = None
     70        self.tail_block_size = None
     71
     72        # things to track callers that want data
     73
     74        # _segment_requests can have duplicates
     75        self._segment_requests = [] # (segnum, d, cancel_handle)
     76        self._active_segment = None # a SegmentFetcher, with .segnum
     77
     78        self._segsize_observers = observer.OneShotObserverList()
     79
     80        # we create one top-level logparent for this _Node, and another one
     81        # for each read() call. Segmentation and get_segment() messages are
     82        # associated with the read() call, everything else is tied to the
     83        # _Node's log entry.
     84        lp = log.msg(format="Immutable _Node(%(si)s) created: size=%(size)d,"
     85                     " guessed_segsize=%(guessed_segsize)d,"
     86                     " guessed_numsegs=%(guessed_numsegs)d",
     87                     si=self._si_prefix, size=verifycap.size,
     88                     guessed_segsize=self.guessed_segment_size,
     89                     guessed_numsegs=self.guessed_num_segments,
     90                     level=log.OPERATIONAL, umid="uJ0zAQ")
     91        self._lp = lp
     92
     93        self._sharefinder = ShareFinder(storage_broker, verifycap, self,
     94                                        self._download_status, lp)
     95        self._shares = set()
     96
     97    def _build_guessed_tables(self, max_segment_size):
     98        size = min(self._verifycap.size, max_segment_size)
     99        s = mathutil.next_multiple(size, self._verifycap.needed_shares)
     100        self.guessed_segment_size = s
     101        r = self._calculate_sizes(self.guessed_segment_size)
     102        self.guessed_num_segments = r["num_segments"]
     103        # as with CommonShare, our ciphertext_hash_tree is a stub until we
     104        # get the real num_segments
     105        self.ciphertext_hash_tree = IncompleteHashTree(self.guessed_num_segments)
     106
     107    def __repr__(self):
     108        return "Imm_Node(%s)" % (self._si_prefix,)
     109
     110    def stop(self):
     111        # called by the Terminator at shutdown, mostly for tests
     112        if self._active_segment:
     113            self._active_segment.stop()
     114            self._active_segment = None
     115        self._sharefinder.stop()
     116
     117    # things called by outside callers, via CiphertextFileNode. get_segment()
     118    # may also be called by Segmentation.
     119
     120    def read(self, consumer, offset=0, size=None, read_ev=None):
     121        """I am the main entry point, from which FileNode.read() can get
     122        data. I feed the consumer with the desired range of ciphertext. I
     123        return a Deferred that fires (with the consumer) when the read is
     124        finished.
     125
     126        Note that there is no notion of a 'file pointer': each call to read()
     127        uses an independent offset= value."""
     128        # for concurrent operations: each gets its own Segmentation manager
     129        if size is None:
     130            size = self._verifycap.size
     131        # clip size so offset+size does not go past EOF
     132        size = min(size, self._verifycap.size-offset)
     133        if read_ev is None:
     134            read_ev = self._download_status.add_read_event(offset, size, now())
     135
     136        lp = log.msg(format="imm Node(%(si)s).read(%(offset)d, %(size)d)",
     137                     si=base32.b2a(self._verifycap.storage_index)[:8],
     138                     offset=offset, size=size,
     139                     level=log.OPERATIONAL, parent=self._lp, umid="l3j3Ww")
     140        if self._history:
     141            sp = self._history.stats_provider
     142            sp.count("downloader.files_downloaded", 1) # really read() calls
     143            sp.count("downloader.bytes_downloaded", size)
     144        s = Segmentation(self, offset, size, consumer, read_ev, lp)
     145        # this raises an interesting question: what segments to fetch? if
     146        # offset=0, always fetch the first segment, and then allow
     147        # Segmentation to be responsible for pulling the subsequent ones if
     148        # the first wasn't large enough. If offset>0, we're going to need an
     149        # extra roundtrip to get the UEB (and therefore the segment size)
     150        # before we can figure out which segment to get. TODO: allow the
     151        # offset-table-guessing code (which starts by guessing the segsize)
     152        # to assist the offset>0 process.
     153        d = s.start()
     154        def _done(res):
     155            read_ev.finished(now())
     156            return res
     157        d.addBoth(_done)
     158        return d
     159
     160    def get_segment(self, segnum, logparent=None):
     161        """Begin downloading a segment. I return a tuple (d, c): 'd' is a
     162        Deferred that fires with (offset,data) when the desired segment is
     163        available, and c is an object on which c.cancel() can be called to
     164        disavow interest in the segment (after which 'd' will never fire).
     165
     166        You probably need to know the segment size before calling this,
     167        unless you want the first few bytes of the file. If you ask for a
     168        segment number which turns out to be too large, the Deferred will
     169        errback with BadSegmentNumberError.
     170
     171        The Deferred fires with the offset of the first byte of the data
     172        segment, so that you can call get_segment() before knowing the
     173        segment size, and still know which data you received.
     174
     175        The Deferred can also errback with other fatal problems, such as
     176        NotEnoughSharesError, NoSharesError, or BadCiphertextHashError.
     177        """
     178        log.msg(format="imm Node(%(si)s).get_segment(%(segnum)d)",
     179                si=base32.b2a(self._verifycap.storage_index)[:8],
     180                segnum=segnum,
     181                level=log.OPERATIONAL, parent=logparent, umid="UKFjDQ")
     182        self._download_status.add_segment_request(segnum, now())
     183        d = defer.Deferred()
     184        c = Cancel(self._cancel_request)
     185        self._segment_requests.append( (segnum, d, c) )
     186        self._start_new_segment()
     187        return (d, c)
     188
     189    def get_segsize(self):
     190        """Return a Deferred that fires when we know the real segment size."""
     191        if self.segment_size:
     192            return defer.succeed(self.segment_size)
     193        # TODO: this downloads (and discards) the first segment of the file.
     194        # We could make this more efficient by writing
     195        # fetcher.SegmentSizeFetcher, with the job of finding a single valid
     196        # share and extracting the UEB. We'd add Share.get_UEB() to request
     197        # just the UEB.
     198        (d,c) = self.get_segment(0)
     199        # this ensures that an error during get_segment() will errback the
     200        # caller, so Repair won't wait forever on completely missing files
     201        d.addCallback(lambda ign: self._segsize_observers.when_fired())
     202        return d
     203
     204    # things called by the Segmentation object used to transform
     205    # arbitrary-sized read() calls into quantized segment fetches
     206
     207    def _start_new_segment(self):
     208        if self._active_segment is None and self._segment_requests:
     209            segnum = self._segment_requests[0][0]
     210            k = self._verifycap.needed_shares
     211            log.msg(format="%(node)s._start_new_segment: segnum=%(segnum)d",
     212                    node=repr(self), segnum=segnum,
     213                    level=log.NOISY, umid="wAlnHQ")
     214            self._active_segment = fetcher = SegmentFetcher(self, segnum, k)
     215            active_shares = [s for s in self._shares if s.is_alive()]
     216            fetcher.add_shares(active_shares) # this triggers the loop
     217
     218
     219    # called by our child ShareFinder
     220    def got_shares(self, shares):
     221        self._shares.update(shares)
     222        if self._active_segment:
     223            self._active_segment.add_shares(shares)
     224    def no_more_shares(self):
     225        self._no_more_shares = True
     226        if self._active_segment:
     227            self._active_segment.no_more_shares()
     228
     229    # things called by our Share instances
     230
     231    def validate_and_store_UEB(self, UEB_s):
     232        log.msg("validate_and_store_UEB",
     233                level=log.OPERATIONAL, parent=self._lp, umid="7sTrPw")
     234        h = hashutil.uri_extension_hash(UEB_s)
     235        if h != self._verifycap.uri_extension_hash:
     236            raise BadHashError
     237        UEB_dict = uri.unpack_extension(UEB_s)
     238        self._parse_and_store_UEB(UEB_dict) # sets self._stuff
     239        # TODO: a malformed (but authentic) UEB could throw an assertion in
     240        # _parse_and_store_UEB, and we should abandon the download.
     241        self.have_UEB = True
     242
     243    def _parse_and_store_UEB(self, d):
     244        # Note: the UEB contains needed_shares and total_shares. These are
     245        # redundant and inferior (the filecap contains the authoritative
     246        # values). However, because it is possible to encode the same file in
     247        # multiple ways, and the encoders might choose (poorly) to use the
     248        # same key for both (therefore getting the same SI), we might
     249        # encounter shares for both types. The UEB hashes will be different,
     250        # however, and we'll disregard the "other" encoding's shares as
     251        # corrupted.
     252
     253        # therefore, we ignore d['total_shares'] and d['needed_shares'].
     254
     255        log.msg(format="UEB=%(ueb)s, vcap=%(vcap)s",
     256                ueb=repr(d), vcap=self._verifycap.to_string(),
     257                level=log.NOISY, parent=self._lp, umid="cVqZnA")
     258
     259        k, N = self._verifycap.needed_shares, self._verifycap.total_shares
     260
     261        self.segment_size = d['segment_size']
     262        self._segsize_observers.fire(self.segment_size)
     263
     264        r = self._calculate_sizes(self.segment_size)
     265        self.tail_segment_size = r["tail_segment_size"]
     266        self.tail_segment_padded = r["tail_segment_padded"]
     267        self.num_segments = r["num_segments"]
     268        self.block_size = r["block_size"]
     269        self.tail_block_size = r["tail_block_size"]
     270        log.msg("actual sizes: %s" % (r,),
     271                level=log.NOISY, parent=self._lp, umid="PY6P5Q")
     272        if (self.segment_size == self.guessed_segment_size
     273            and self.num_segments == self.guessed_num_segments):
     274            log.msg("my guess was right!",
     275                    level=log.NOISY, parent=self._lp, umid="x340Ow")
     276        else:
     277            log.msg("my guess was wrong! Extra round trips for me.",
     278                    level=log.NOISY, parent=self._lp, umid="tb7RJw")
     279
     280        # zfec.Decode() instantiation is fast, but still, let's use the same
     281        # codec instance for all but the last segment. 3-of-10 takes 15us on
     282        # my laptop, 25-of-100 is 900us, 3-of-255 is 97us, 25-of-255 is
     283        # 2.5ms, worst-case 254-of-255 is 9.3ms
     284        self._codec = CRSDecoder()
     285        self._codec.set_params(self.segment_size, k, N)
     286
     287
     288        # Ciphertext hash tree root is mandatory, so that there is at most
     289        # one ciphertext that matches this read-cap or verify-cap. The
     290        # integrity check on the shares is not sufficient to prevent the
     291        # original encoder from creating some shares of file A and other
     292        # shares of file B. self.ciphertext_hash_tree was a guess before:
     293        # this is where we create it for real.
     294        self.ciphertext_hash_tree = IncompleteHashTree(self.num_segments)
     295        self.ciphertext_hash_tree.set_hashes({0: d['crypttext_root_hash']})
     296
     297        self.share_hash_tree.set_hashes({0: d['share_root_hash']})
     298
     299        # Our job is a fast download, not verification, so we ignore any
     300        # redundant fields. The Verifier uses a different code path which
     301        # does not ignore them.
     302
     303    def _calculate_sizes(self, segment_size):
     304        # segments of ciphertext
     305        size = self._verifycap.size
     306        k = self._verifycap.needed_shares
     307
     308        # this assert matches the one in encode.py:127 inside
     309        # Encoded._got_all_encoding_parameters, where the UEB is constructed
     310        assert segment_size % k == 0
     311
     312        # the last segment is usually short. We don't store a whole segsize,
     313        # but we do pad the segment up to a multiple of k, because the
     314        # encoder requires that.
     315        tail_segment_size = size % segment_size
     316        if tail_segment_size == 0:
     317            tail_segment_size = segment_size
     318        padded = mathutil.next_multiple(tail_segment_size, k)
     319        tail_segment_padded = padded
     320
     321        num_segments = mathutil.div_ceil(size, segment_size)
     322
     323        # each segment is turned into N blocks. All but the last are of size
     324        # block_size, and the last is of size tail_block_size
     325        block_size = segment_size / k
     326        tail_block_size = tail_segment_padded / k
     327
     328        return { "tail_segment_size": tail_segment_size,
     329                 "tail_segment_padded": tail_segment_padded,
     330                 "num_segments": num_segments,
     331                 "block_size": block_size,
     332                 "tail_block_size": tail_block_size,
     333                 }
     334
     335
     336    def process_share_hashes(self, share_hashes):
     337        for hashnum in share_hashes:
     338            if hashnum >= len(self.share_hash_tree):
     339                # "BadHashError" is normally for e.g. a corrupt block. We
     340                # sort of abuse it here to mean a badly numbered hash (which
     341                # indicates corruption in the number bytes, rather than in
     342                # the data bytes).
     343                raise BadHashError("hashnum %d doesn't fit in hashtree(%d)"
     344                                   % (hashnum, len(self.share_hash_tree)))
     345        self.share_hash_tree.set_hashes(share_hashes)
     346
     347    def get_needed_ciphertext_hashes(self, segnum):
     348        cht = self.ciphertext_hash_tree
     349        return cht.needed_hashes(segnum, include_leaf=True)
     350    def process_ciphertext_hashes(self, hashes):
     351        assert self.num_segments is not None
     352        # this may raise BadHashError or NotEnoughHashesError
     353        self.ciphertext_hash_tree.set_hashes(hashes)
     354
     355
     356    # called by our child SegmentFetcher
     357
     358    def want_more_shares(self):
     359        self._sharefinder.hungry()
     360
     361    def fetch_failed(self, sf, f):
     362        assert sf is self._active_segment
     363        self._active_segment = None
     364        # deliver error upwards
     365        for (d,c) in self._extract_requests(sf.segnum):
     366            eventually(self._deliver, d, c, f)
     367
     368    def process_blocks(self, segnum, blocks):
     369        d = defer.maybeDeferred(self._decode_blocks, segnum, blocks)
     370        d.addCallback(self._check_ciphertext_hash, segnum)
     371        def _deliver(result):
     372            ds = self._download_status
     373            if isinstance(result, Failure):
     374                ds.add_segment_error(segnum, now())
     375            else:
     376                (offset, segment, decodetime) = result
     377                ds.add_segment_delivery(segnum, now(),
     378                                        offset, len(segment), decodetime)
     379            log.msg(format="delivering segment(%(segnum)d)",
     380                    segnum=segnum,
     381                    level=log.OPERATIONAL, parent=self._lp,
     382                    umid="j60Ojg")
     383            for (d,c) in self._extract_requests(segnum):
     384                eventually(self._deliver, d, c, result)
     385            self._active_segment = None
     386            self._start_new_segment()
     387        d.addBoth(_deliver)
     388        d.addErrback(lambda f:
     389                     log.err("unhandled error during process_blocks",
     390                             failure=f, level=log.WEIRD,
     391                             parent=self._lp, umid="MkEsCg"))
     392
     393    def _decode_blocks(self, segnum, blocks):
     394        tail = (segnum == self.num_segments-1)
     395        codec = self._codec
     396        block_size = self.block_size
     397        decoded_size = self.segment_size
     398        if tail:
     399            # account for the padding in the last segment
     400            codec = CRSDecoder()
     401            k, N = self._verifycap.needed_shares, self._verifycap.total_shares
     402            codec.set_params(self.tail_segment_padded, k, N)
     403            block_size = self.tail_block_size
     404            decoded_size = self.tail_segment_padded
     405
     406        shares = []
     407        shareids = []
     408        for (shareid, share) in blocks.iteritems():
     409            assert len(share) == block_size
     410            shareids.append(shareid)
     411            shares.append(share)
     412        del blocks
     413
     414        start = now()
     415        d = codec.decode(shares, shareids)   # segment
     416        del shares
     417        def _process(buffers):
     418            decodetime = now() - start
     419            segment = "".join(buffers)
     420            assert len(segment) == decoded_size
     421            del buffers
     422            if tail:
     423                segment = segment[:self.tail_segment_size]
     424            return (segment, decodetime)
     425        d.addCallback(_process)
     426        return d
     427
     428    def _check_ciphertext_hash(self, (segment, decodetime), segnum):
     429        assert self._active_segment.segnum == segnum
     430        assert self.segment_size is not None
     431        offset = segnum * self.segment_size
     432
     433        h = hashutil.crypttext_segment_hash(segment)
     434        try:
     435            self.ciphertext_hash_tree.set_hashes(leaves={segnum: h})
     436            return (offset, segment, decodetime)
     437        except (BadHashError, NotEnoughHashesError):
     438            format = ("hash failure in ciphertext_hash_tree:"
     439                      " segnum=%(segnum)d, SI=%(si)s")
     440            log.msg(format=format, segnum=segnum, si=self._si_prefix,
     441                    failure=Failure(),
     442                    level=log.WEIRD, parent=self._lp, umid="MTwNnw")
     443            # this is especially weird, because we made it past the share
     444            # hash tree. It implies that we're using the wrong encoding, or
     445            # that the uploader deliberately constructed a bad UEB.
     446            msg = format % {"segnum": segnum, "si": self._si_prefix}
     447            raise BadCiphertextHashError(msg)
     448
     449    def _deliver(self, d, c, result):
     450        # this method exists to handle cancel() that occurs between
     451        # _got_segment and _deliver
     452        if not c.cancelled:
     453            d.callback(result) # might actually be an errback
     454
     455    def _extract_requests(self, segnum):
     456        """Remove matching requests and return their (d,c) tuples so that the
     457        caller can retire them."""
     458        retire = [(d,c) for (segnum0, d, c) in self._segment_requests
     459                  if segnum0 == segnum]
     460        self._segment_requests = [t for t in self._segment_requests
     461                                  if t[0] != segnum]
     462        return retire
     463
     464    def _cancel_request(self, c):
     465        self._segment_requests = [t for t in self._segment_requests
     466                                  if t[2] != c]
     467        segnums = [segnum for (segnum,d,c) in self._segment_requests]
     468        if self._active_segment.segnum not in segnums:
     469            self._active_segment.stop()
     470            self._active_segment = None
     471            self._start_new_segment()
  • new file src/allmydata/immutable/downloader/segmentation.py

    diff --git a/src/allmydata/immutable/downloader/segmentation.py b/src/allmydata/immutable/downloader/segmentation.py
    new file mode 100644
    index 0000000..4890195
    - +  
     1
     2import time
     3now = time.time
     4from zope.interface import implements
     5from twisted.internet import defer
     6from twisted.internet.interfaces import IPushProducer
     7from foolscap.api import eventually
     8from allmydata.util import log
     9from allmydata.util.spans import overlap
     10
     11from common import BadSegmentNumberError, WrongSegmentError, DownloadStopped
     12
     13class Segmentation:
     14    """I am responsible for a single offset+size read of the file. I handle
     15    segmentation: I figure out which segments are necessary, request them
     16    (from my CiphertextDownloader) in order, and trim the segments down to
     17    match the offset+size span. I use the Producer/Consumer interface to only
     18    request one segment at a time.
     19    """
     20    implements(IPushProducer)
     21    def __init__(self, node, offset, size, consumer, read_ev, logparent=None):
     22        self._node = node
     23        self._hungry = True
     24        self._active_segnum = None
     25        self._cancel_segment_request = None
     26        # these are updated as we deliver data. At any given time, we still
     27        # want to download file[offset:offset+size]
     28        self._offset = offset
     29        self._size = size
     30        assert offset+size <= node._verifycap.size
     31        self._consumer = consumer
     32        self._read_ev = read_ev
     33        self._start_pause = None
     34        self._lp = logparent
     35
     36    def start(self):
     37        self._alive = True
     38        self._deferred = defer.Deferred()
     39        self._consumer.registerProducer(self, True)
     40        self._maybe_fetch_next()
     41        return self._deferred
     42
     43    def _maybe_fetch_next(self):
     44        if not self._alive or not self._hungry:
     45            return
     46        if self._active_segnum is not None:
     47            return
     48        self._fetch_next()
     49
     50    def _fetch_next(self):
     51        if self._size == 0:
     52            # done!
     53            self._alive = False
     54            self._hungry = False
     55            self._consumer.unregisterProducer()
     56            self._deferred.callback(self._consumer)
     57            return
     58        n = self._node
     59        have_actual_segment_size = n.segment_size is not None
     60        guess_s = ""
     61        if not have_actual_segment_size:
     62            guess_s = "probably "
     63        segment_size = n.segment_size or n.guessed_segment_size
     64        if self._offset == 0:
     65            # great! we want segment0 for sure
     66            wanted_segnum = 0
     67        else:
     68            # this might be a guess
     69            wanted_segnum = self._offset // segment_size
     70        log.msg(format="_fetch_next(offset=%(offset)d) %(guess)swants segnum=%(segnum)d",
     71                offset=self._offset, guess=guess_s, segnum=wanted_segnum,
     72                level=log.NOISY, parent=self._lp, umid="5WfN0w")
     73        self._active_segnum = wanted_segnum
     74        d,c = n.get_segment(wanted_segnum, self._lp)
     75        self._cancel_segment_request = c
     76        d.addBoth(self._request_retired)
     77        d.addCallback(self._got_segment, wanted_segnum)
     78        if not have_actual_segment_size:
     79            # we can retry once
     80            d.addErrback(self._retry_bad_segment)
     81        d.addErrback(self._error)
     82
     83    def _request_retired(self, res):
     84        self._active_segnum = None
     85        self._cancel_segment_request = None
     86        return res
     87
     88    def _got_segment(self, (segment_start,segment,decodetime), wanted_segnum):
     89        self._cancel_segment_request = None
     90        # we got file[segment_start:segment_start+len(segment)]
     91        # we want file[self._offset:self._offset+self._size]
     92        log.msg(format="Segmentation got data:"
     93                " want [%(wantstart)d-%(wantend)d),"
     94                " given [%(segstart)d-%(segend)d), for segnum=%(segnum)d",
     95                wantstart=self._offset, wantend=self._offset+self._size,
     96                segstart=segment_start, segend=segment_start+len(segment),
     97                segnum=wanted_segnum,
     98                level=log.OPERATIONAL, parent=self._lp, umid="32dHcg")
     99
     100        o = overlap(segment_start, len(segment),  self._offset, self._size)
     101        # the overlap is file[o[0]:o[0]+o[1]]
     102        if not o or o[0] != self._offset:
     103            # we didn't get the first byte, so we can't use this segment
     104            log.msg("Segmentation handed wrong data:"
     105                    " want [%d-%d), given [%d-%d), for segnum=%d,"
     106                    " for si=%s"
     107                    % (self._offset, self._offset+self._size,
     108                       segment_start, segment_start+len(segment),
     109                       wanted_segnum, self._node._si_prefix),
     110                    level=log.UNUSUAL, parent=self._lp, umid="STlIiA")
     111            # we may retry if the segnum we asked was based on a guess
     112            raise WrongSegmentError("I was given the wrong data.")
     113        offset_in_segment = self._offset - segment_start
     114        desired_data = segment[offset_in_segment:offset_in_segment+o[1]]
     115
     116        self._offset += len(desired_data)
     117        self._size -= len(desired_data)
     118        self._consumer.write(desired_data)
     119        # the consumer might call our .pauseProducing() inside that write()
     120        # call, setting self._hungry=False
     121        self._read_ev.update(len(desired_data), 0, 0)
     122        self._maybe_fetch_next()
     123
     124    def _retry_bad_segment(self, f):
     125        f.trap(WrongSegmentError, BadSegmentNumberError)
     126        # we guessed the segnum wrong: either one that doesn't overlap with
     127        # the start of our desired region, or one that's beyond the end of
     128        # the world. Now that we have the right information, we're allowed to
     129        # retry once.
     130        assert self._node.segment_size is not None
     131        return self._maybe_fetch_next()
     132
     133    def _error(self, f):
     134        log.msg("Error in Segmentation", failure=f,
     135                level=log.WEIRD, parent=self._lp, umid="EYlXBg")
     136        self._alive = False
     137        self._hungry = False
     138        self._consumer.unregisterProducer()
     139        self._deferred.errback(f)
     140
     141    def stopProducing(self):
     142        self._hungry = False
     143        self._alive = False
     144        # cancel any outstanding segment request
     145        if self._cancel_segment_request:
     146            self._cancel_segment_request.cancel()
     147            self._cancel_segment_request = None
     148        e = DownloadStopped("our Consumer called stopProducing()")
     149        self._deferred.errback(e)
     150
     151    def pauseProducing(self):
     152        self._hungry = False
     153        self._start_pause = now()
     154    def resumeProducing(self):
     155        self._hungry = True
     156        eventually(self._maybe_fetch_next)
     157        if self._start_pause is not None:
     158            paused = now() - self._start_pause
     159            self._read_ev.update(0, 0, paused)
     160            self._start_pause = None
  • new file src/allmydata/immutable/downloader/share.py

    diff --git a/src/allmydata/immutable/downloader/share.py b/src/allmydata/immutable/downloader/share.py
    new file mode 100644
    index 0000000..e3c9017
    - +  
     1
     2import struct
     3import time
     4now = time.time
     5
     6from twisted.python.failure import Failure
     7from foolscap.api import eventually
     8from allmydata.util import base32, log, hashutil, mathutil
     9from allmydata.util.spans import Spans, DataSpans
     10from allmydata.interfaces import HASH_SIZE
     11from allmydata.hashtree import IncompleteHashTree, BadHashError, \
     12     NotEnoughHashesError
     13
     14from allmydata.immutable.layout import make_write_bucket_proxy
     15from allmydata.util.observer import EventStreamObserver
     16from common import COMPLETE, CORRUPT, DEAD, BADSEGNUM
     17
     18
     19class LayoutInvalid(Exception):
     20    pass
     21class DataUnavailable(Exception):
     22    pass
     23
     24class Share:
     25    """I represent a single instance of a single share (e.g. I reference the
     26    shnum2 for share SI=abcde on server xy12t, not the one on server ab45q).
     27    I am associated with a CommonShare that remembers data that is held in
     28    common among e.g. SI=abcde/shnum2 across all servers. I am also
     29    associated with a CiphertextFileNode for e.g. SI=abcde (all shares, all
     30    servers).
     31    """
     32    # this is a specific implementation of IShare for tahoe's native storage
     33    # servers. A different backend would use a different class.
     34
     35    def __init__(self, rref, server_version, verifycap, commonshare, node,
     36                 download_status, peerid, shnum, logparent):
     37        self._rref = rref
     38        self._server_version = server_version
     39        self._node = node # holds share_hash_tree and UEB
     40        self.actual_segment_size = node.segment_size # might still be None
     41        # XXX change node.guessed_segment_size to
     42        # node.best_guess_segment_size(), which should give us the real ones
     43        # if known, else its guess.
     44        self._guess_offsets(verifycap, node.guessed_segment_size)
     45        self.actual_offsets = None
     46        self._UEB_length = None
     47        self._commonshare = commonshare # holds block_hash_tree
     48        self._download_status = download_status
     49        self._peerid = peerid
     50        self._peerid_s = base32.b2a(peerid)[:5]
     51        self._storage_index = verifycap.storage_index
     52        self._si_prefix = base32.b2a(verifycap.storage_index)[:8]
     53        self._shnum = shnum
     54        # self._alive becomes False upon fatal corruption or server error
     55        self._alive = True
     56        self._lp = log.msg(format="%(share)s created", share=repr(self),
     57                           level=log.NOISY, parent=logparent, umid="P7hv2w")
     58
     59        self._pending = Spans() # request sent but no response received yet
     60        self._received = DataSpans() # ACK response received, with data
     61        self._unavailable = Spans() # NAK response received, no data
     62
     63        # any given byte of the share can be in one of four states:
     64        #  in: _wanted, _requested, _received
     65        #      FALSE    FALSE       FALSE : don't care about it at all
     66        #      TRUE     FALSE       FALSE : want it, haven't yet asked for it
     67        #      TRUE     TRUE        FALSE : request is in-flight
     68        #                                   or didn't get it
     69        #      FALSE    TRUE        TRUE  : got it, haven't used it yet
     70        #      FALSE    TRUE        FALSE : got it and used it
     71        #      FALSE    FALSE       FALSE : block consumed, ready to ask again
     72        #
     73        # when we request data and get a NAK, we leave it in _requested
     74        # to remind ourself to not ask for it again. We don't explicitly
     75        # remove it from anything (maybe this should change).
     76        #
     77        # We retain the hashtrees in the Node, so we leave those spans in
     78        # _requested (and never ask for them again, as long as the Node is
     79        # alive). But we don't retain data blocks (too big), so when we
     80        # consume a data block, we remove it from _requested, so a later
     81        # download can re-fetch it.
     82
     83        self._requested_blocks = [] # (segnum, set(observer2..))
     84        ver = server_version["http://allmydata.org/tahoe/protocols/storage/v1"]
     85        self._overrun_ok = ver["tolerates-immutable-read-overrun"]
     86        # If _overrun_ok and we guess the offsets correctly, we can get
     87        # everything in one RTT. If _overrun_ok and we guess wrong, we might
     88        # need two RTT (but we could get lucky and do it in one). If overrun
     89        # is *not* ok (tahoe-1.3.0 or earlier), we need four RTT: 1=version,
     90        # 2=offset table, 3=UEB_length and everything else (hashes, block),
     91        # 4=UEB.
     92
     93        self.had_corruption = False # for unit tests
     94
     95    def __repr__(self):
     96        return "Share(sh%d-on-%s)" % (self._shnum, self._peerid_s)
     97
     98    def is_alive(self):
     99        # XXX: reconsider. If the share sees a single error, should it remain
     100        # dead for all time? Or should the next segment try again? This DEAD
     101        # state is stored elsewhere too (SegmentFetcher per-share states?)
     102        # and needs to be consistent. We clear _alive in self._fail(), which
     103        # is called upon a network error, or layout failure, or hash failure
     104        # in the UEB or a hash tree. We do not _fail() for a hash failure in
     105        # a block, but of course we still tell our callers about
     106        # state=CORRUPT so they'll find a different share.
     107        return self._alive
     108
     109    def _guess_offsets(self, verifycap, guessed_segment_size):
     110        self.guessed_segment_size = guessed_segment_size
     111        size = verifycap.size
     112        k = verifycap.needed_shares
     113        N = verifycap.total_shares
     114        r = self._node._calculate_sizes(guessed_segment_size)
     115        # num_segments, block_size/tail_block_size
     116        # guessed_segment_size/tail_segment_size/tail_segment_padded
     117        share_size = mathutil.div_ceil(size, k)
     118        # share_size is the amount of block data that will be put into each
     119        # share, summed over all segments. It does not include hashes, the
     120        # UEB, or other overhead.
     121
     122        # use the upload-side code to get this as accurate as possible
     123        ht = IncompleteHashTree(N)
     124        num_share_hashes = len(ht.needed_hashes(0, include_leaf=True))
     125        wbp = make_write_bucket_proxy(None, share_size, r["block_size"],
     126                                      r["num_segments"], num_share_hashes, 0,
     127                                      None)
     128        self._fieldsize = wbp.fieldsize
     129        self._fieldstruct = wbp.fieldstruct
     130        self.guessed_offsets = wbp._offsets
     131
     132    # called by our client, the SegmentFetcher
     133    def get_block(self, segnum):
     134        """Add a block number to the list of requests. This will eventually
     135        result in a fetch of the data necessary to validate the block, then
     136        the block itself. The fetch order is generally
     137        first-come-first-served, but requests may be answered out-of-order if
     138        data becomes available sooner.
     139
     140        I return an EventStreamObserver, which has two uses. The first is to
     141        call o.subscribe(), which gives me a place to send state changes and
     142        eventually the data block. The second is o.cancel(), which removes
     143        the request (if it is still active).
     144
     145        I will distribute the following events through my EventStreamObserver:
     146         - state=OVERDUE: ?? I believe I should have had an answer by now.
     147                          You may want to ask another share instead.
     148         - state=BADSEGNUM: the segnum you asked for is too large. I must
     149                            fetch a valid UEB before I can determine this,
     150                            so the notification is asynchronous
     151         - state=COMPLETE, block=data: here is a valid block
     152         - state=CORRUPT: this share contains corrupted data
     153         - state=DEAD, f=Failure: the server reported an error, this share
     154                                  is unusable
     155        """
     156        log.msg("%s.get_block(%d)" % (repr(self), segnum),
     157                level=log.NOISY, parent=self._lp, umid="RTo9MQ")
     158        assert segnum >= 0
     159        o = EventStreamObserver()
     160        o.set_canceler(self, "_cancel_block_request")
     161        for i,(segnum0,observers) in enumerate(self._requested_blocks):
     162            if segnum0 == segnum:
     163                observers.add(o)
     164                break
     165        else:
     166            self._requested_blocks.append( (segnum, set([o])) )
     167        eventually(self.loop)
     168        return o
     169
     170    def _cancel_block_request(self, o):
     171        new_requests = []
     172        for e in self._requested_blocks:
     173            (segnum0, observers) = e
     174            observers.discard(o)
     175            if observers:
     176                new_requests.append(e)
     177        self._requested_blocks = new_requests
     178
     179    # internal methods
     180    def _active_segnum_and_observers(self):
     181        if self._requested_blocks:
     182            # we only retrieve information for one segment at a time, to
     183            # minimize alacrity (first come, first served)
     184            return self._requested_blocks[0]
     185        return None, []
     186
     187    def loop(self):
     188        try:
     189            # if any exceptions occur here, kill the download
     190            log.msg("%s.loop, reqs=[%s], pending=%s, received=%s,"
     191                    " unavailable=%s" %
     192                    (repr(self),
     193                     ",".join([str(req[0]) for req in self._requested_blocks]),
     194                     self._pending.dump(), self._received.dump(),
     195                     self._unavailable.dump() ),
     196                    level=log.NOISY, parent=self._lp, umid="BaL1zw")
     197            self._do_loop()
     198            # all exception cases call self._fail(), which clears self._alive
     199        except (BadHashError, NotEnoughHashesError, LayoutInvalid), e:
     200            # Abandon this share. We do this if we see corruption in the
     201            # offset table, the UEB, or a hash tree. We don't abandon the
     202            # whole share if we see corruption in a data block (we abandon
     203            # just the one block, and still try to get data from other blocks
     204            # on the same server). In theory, we could get good data from a
     205            # share with a corrupt UEB (by first getting the UEB from some
     206            # other share), or corrupt hash trees, but the logic to decide
     207            # when this is safe is non-trivial. So for now, give up at the
     208            # first sign of corruption.
     209            #
     210            # _satisfy_*() code which detects corruption should first call
     211            # self._signal_corruption(), and then raise the exception.
     212            log.msg(format="corruption detected in %(share)s",
     213                    share=repr(self),
     214                    level=log.UNUSUAL, parent=self._lp, umid="gWspVw")
     215            self._fail(Failure(e), log.UNUSUAL)
     216        except DataUnavailable, e:
     217            # Abandon this share.
     218            log.msg(format="need data that will never be available"
     219                    " from %s: pending=%s, received=%s, unavailable=%s" %
     220                    (repr(self),
     221                     self._pending.dump(), self._received.dump(),
     222                     self._unavailable.dump() ),
     223                    level=log.UNUSUAL, parent=self._lp, umid="F7yJnQ")
     224            self._fail(Failure(e), log.UNUSUAL)
     225        except BaseException:
     226            self._fail(Failure())
     227            raise
     228        log.msg("%s.loop done, reqs=[%s], pending=%s, received=%s,"
     229                " unavailable=%s" %
     230                (repr(self),
     231                 ",".join([str(req[0]) for req in self._requested_blocks]),
     232                 self._pending.dump(), self._received.dump(),
     233                 self._unavailable.dump() ),
     234                level=log.NOISY, parent=self._lp, umid="9lRaRA")
     235
     236    def _do_loop(self):
     237        # we are (eventually) called after all state transitions:
     238        #  new segments added to self._requested_blocks
     239        #  new data received from servers (responses to our read() calls)
     240        #  impatience timer fires (server appears slow)
     241        if not self._alive:
     242            return
     243
     244        # First, consume all of the information that we currently have, for
     245        # all the segments people currently want.
     246        while self._get_satisfaction():
     247            pass
     248
     249        # When we get no satisfaction (from the data we've received so far),
     250        # we determine what data we desire (to satisfy more requests). The
     251        # number of segments is finite, so I can't get no satisfaction
     252        # forever.
     253        wanted, needed = self._desire()
     254
     255        # Finally, send out requests for whatever we need (desire minus
     256        # have). You can't always get what you want, but if you try
     257        # sometimes, you just might find, you get what you need.
     258        self._send_requests(wanted + needed)
     259
     260        # and sometimes you can't even get what you need
     261        disappointment = needed & self._unavailable
     262        if len(disappointment):
     263            self.had_corruption = True
     264            raise DataUnavailable("need %s but will never get it" %
     265                                  disappointment.dump())
     266
     267    def _get_satisfaction(self):
     268        # return True if we retired a data block, and should therefore be
     269        # called again. Return False if we don't retire a data block (even if
     270        # we do retire some other data, like hash chains).
     271
     272        if self.actual_offsets is None:
     273            if not self._satisfy_offsets():
     274                # can't even look at anything without the offset table
     275                return False
     276
     277        if not self._node.have_UEB:
     278            if not self._satisfy_UEB():
     279                # can't check any hashes without the UEB
     280                return False
     281        self.actual_segment_size = self._node.segment_size # might be updated
     282        assert self.actual_segment_size is not None
     283
     284        # knowing the UEB means knowing num_segments. Despite the redundancy,
     285        # this is the best place to set this. CommonShare.set_numsegs will
     286        # ignore duplicate calls.
     287        assert self._node.num_segments is not None
     288        cs = self._commonshare
     289        cs.set_numsegs(self._node.num_segments)
     290
     291        segnum, observers = self._active_segnum_and_observers()
     292        # if segnum is None, we don't really need to do anything (we have no
     293        # outstanding readers right now), but we'll fill in the bits that
     294        # aren't tied to any particular segment.
     295
     296        if segnum is not None and segnum >= self._node.num_segments:
     297            for o in observers:
     298                o.notify(state=BADSEGNUM)
     299            self._requested_blocks.pop(0)
     300            return True
     301
     302        if self._node.share_hash_tree.needed_hashes(self._shnum):
     303            if not self._satisfy_share_hash_tree():
     304                # can't check block_hash_tree without a root
     305                return False
     306
     307        if cs.need_block_hash_root():
     308            block_hash_root = self._node.share_hash_tree.get_leaf(self._shnum)
     309            cs.set_block_hash_root(block_hash_root)
     310
     311        if segnum is None:
     312            return False # we don't want any particular segment right now
     313
     314        # block_hash_tree
     315        needed_hashes = self._commonshare.get_needed_block_hashes(segnum)
     316        if needed_hashes:
     317            if not self._satisfy_block_hash_tree(needed_hashes):
     318                # can't check block without block_hash_tree
     319                return False
     320
     321        # ciphertext_hash_tree
     322        needed_hashes = self._node.get_needed_ciphertext_hashes(segnum)
     323        if needed_hashes:
     324            if not self._satisfy_ciphertext_hash_tree(needed_hashes):
     325                # can't check decoded blocks without ciphertext_hash_tree
     326                return False
     327
     328        # data blocks
     329        return self._satisfy_data_block(segnum, observers)
     330
     331    def _satisfy_offsets(self):
     332        version_s = self._received.get(0, 4)
     333        if version_s is None:
     334            return False
     335        (version,) = struct.unpack(">L", version_s)
     336        if version == 1:
     337            table_start = 0x0c
     338            self._fieldsize = 0x4
     339            self._fieldstruct = "L"
     340        elif version == 2:
     341            table_start = 0x14
     342            self._fieldsize = 0x8
     343            self._fieldstruct = "Q"
     344        else:
     345            self.had_corruption = True
     346            raise LayoutInvalid("unknown version %d (I understand 1 and 2)"
     347                                % version)
     348        offset_table_size = 6 * self._fieldsize
     349        table_s = self._received.pop(table_start, offset_table_size)
     350        if table_s is None:
     351            return False
     352        fields = struct.unpack(">"+6*self._fieldstruct, table_s)
     353        offsets = {}
     354        for i,field in enumerate(['data',
     355                                  'plaintext_hash_tree', # UNUSED
     356                                  'crypttext_hash_tree',
     357                                  'block_hashes',
     358                                  'share_hashes',
     359                                  'uri_extension',
     360                                  ] ):
     361            offsets[field] = fields[i]
     362        self.actual_offsets = offsets
     363        log.msg("actual offsets: data=%d, plaintext_hash_tree=%d, crypttext_hash_tree=%d, block_hashes=%d, share_hashes=%d, uri_extension=%d" % tuple(fields))
     364        self._received.remove(0, 4) # don't need this anymore
     365
     366        # validate the offsets a bit
     367        share_hashes_size = offsets["uri_extension"] - offsets["share_hashes"]
     368        if share_hashes_size < 0 or share_hashes_size % (2+HASH_SIZE) != 0:
     369            # the share hash chain is stored as (hashnum,hash) pairs
     370            self.had_corruption = True
     371            raise LayoutInvalid("share hashes malformed -- should be a"
     372                                " multiple of %d bytes -- not %d" %
     373                                (2+HASH_SIZE, share_hashes_size))
     374        block_hashes_size = offsets["share_hashes"] - offsets["block_hashes"]
     375        if block_hashes_size < 0 or block_hashes_size % (HASH_SIZE) != 0:
     376            # the block hash tree is stored as a list of hashes
     377            self.had_corruption = True
     378            raise LayoutInvalid("block hashes malformed -- should be a"
     379                                " multiple of %d bytes -- not %d" %
     380                                (HASH_SIZE, block_hashes_size))
     381        # we only look at 'crypttext_hash_tree' if the UEB says we're
     382        # actually using it. Same with 'plaintext_hash_tree'. This gives us
     383        # some wiggle room: a place to stash data for later extensions.
     384
     385        return True
     386
     387    def _satisfy_UEB(self):
     388        o = self.actual_offsets
     389        fsize = self._fieldsize
     390        UEB_length_s = self._received.get(o["uri_extension"], fsize)
     391        if not UEB_length_s:
     392            return False
     393        (UEB_length,) = struct.unpack(">"+self._fieldstruct, UEB_length_s)
     394        UEB_s = self._received.pop(o["uri_extension"]+fsize, UEB_length)
     395        if not UEB_s:
     396            return False
     397        self._received.remove(o["uri_extension"], fsize)
     398        try:
     399            self._node.validate_and_store_UEB(UEB_s)
     400            return True
     401        except (LayoutInvalid, BadHashError), e:
     402            # TODO: if this UEB was bad, we'll keep trying to validate it
     403            # over and over again. Only log.err on the first one, or better
     404            # yet skip all but the first
     405            f = Failure(e)
     406            self._signal_corruption(f, o["uri_extension"], fsize+UEB_length)
     407            self.had_corruption = True
     408            raise
     409
     410    def _satisfy_share_hash_tree(self):
     411        # the share hash chain is stored as (hashnum,hash) tuples, so you
     412        # can't fetch just the pieces you need, because you don't know
     413        # exactly where they are. So fetch everything, and parse the results
     414        # later.
     415        o = self.actual_offsets
     416        hashlen = o["uri_extension"] - o["share_hashes"]
     417        assert hashlen % (2+HASH_SIZE) == 0
     418        hashdata = self._received.get(o["share_hashes"], hashlen)
     419        if not hashdata:
     420            return False
     421        share_hashes = {}
     422        for i in range(0, hashlen, 2+HASH_SIZE):
     423            (hashnum,) = struct.unpack(">H", hashdata[i:i+2])
     424            hashvalue = hashdata[i+2:i+2+HASH_SIZE]
     425            share_hashes[hashnum] = hashvalue
     426        # TODO: if they give us an empty set of hashes,
     427        # process_share_hashes() won't fail. We must ensure that this
     428        # situation doesn't allow unverified shares through. Manual testing
     429        # shows that set_block_hash_root() throws an assert because an
     430        # internal node is None instead of an actual hash, but we want
     431        # something better. It's probably best to add a method to
     432        # IncompleteHashTree which takes a leaf number and raises an
     433        # exception unless that leaf is present and fully validated.
     434        try:
     435            self._node.process_share_hashes(share_hashes)
     436            # adds to self._node.share_hash_tree
     437        except (BadHashError, NotEnoughHashesError), e:
     438            f = Failure(e)
     439            self._signal_corruption(f, o["share_hashes"], hashlen)
     440            self.had_corruption = True
     441            raise
     442        self._received.remove(o["share_hashes"], hashlen)
     443        return True
     444
     445    def _signal_corruption(self, f, start, offset):
     446        # there was corruption somewhere in the given range
     447        reason = "corruption in share[%d-%d): %s" % (start, start+offset,
     448                                                     str(f.value))
     449        self._rref.callRemoteOnly("advise_corrupt_share", reason)
     450
     451    def _satisfy_block_hash_tree(self, needed_hashes):
     452        o_bh = self.actual_offsets["block_hashes"]
     453        block_hashes = {}
     454        for hashnum in needed_hashes:
     455            hashdata = self._received.get(o_bh+hashnum*HASH_SIZE, HASH_SIZE)
     456            if hashdata:
     457                block_hashes[hashnum] = hashdata
     458            else:
     459                return False # missing some hashes
     460        # note that we don't submit any hashes to the block_hash_tree until
     461        # we've gotten them all, because the hash tree will throw an
     462        # exception if we only give it a partial set (which it therefore
     463        # cannot validate)
     464        try:
     465            self._commonshare.process_block_hashes(block_hashes)
     466        except (BadHashError, NotEnoughHashesError), e:
     467            f = Failure(e)
     468            hashnums = ",".join([str(n) for n in sorted(block_hashes.keys())])
     469            log.msg(format="hash failure in block_hashes=(%(hashnums)s),"
     470                    " from %(share)s",
     471                    hashnums=hashnums, shnum=self._shnum, share=repr(self),
     472                    failure=f, level=log.WEIRD, parent=self._lp, umid="yNyFdA")
     473            hsize = max(0, max(needed_hashes)) * HASH_SIZE
     474            self._signal_corruption(f, o_bh, hsize)
     475            self.had_corruption = True
     476            raise
     477        for hashnum in needed_hashes:
     478            self._received.remove(o_bh+hashnum*HASH_SIZE, HASH_SIZE)
     479        return True
     480
     481    def _satisfy_ciphertext_hash_tree(self, needed_hashes):
     482        start = self.actual_offsets["crypttext_hash_tree"]
     483        hashes = {}
     484        for hashnum in needed_hashes:
     485            hashdata = self._received.get(start+hashnum*HASH_SIZE, HASH_SIZE)
     486            if hashdata:
     487                hashes[hashnum] = hashdata
     488            else:
     489                return False # missing some hashes
     490        # we don't submit any hashes to the ciphertext_hash_tree until we've
     491        # gotten them all
     492        try:
     493            self._node.process_ciphertext_hashes(hashes)
     494        except (BadHashError, NotEnoughHashesError), e:
     495            f = Failure(e)
     496            hashnums = ",".join([str(n) for n in sorted(hashes.keys())])
     497            log.msg(format="hash failure in ciphertext_hashes=(%(hashnums)s),"
     498                    " from %(share)s",
     499                    hashnums=hashnums, share=repr(self), failure=f,
     500                    level=log.WEIRD, parent=self._lp, umid="iZI0TA")
     501            hsize = max(0, max(needed_hashes))*HASH_SIZE
     502            self._signal_corruption(f, start, hsize)
     503            self.had_corruption = True
     504            raise
     505        for hashnum in needed_hashes:
     506            self._received.remove(start+hashnum*HASH_SIZE, HASH_SIZE)
     507        return True
     508
     509    def _satisfy_data_block(self, segnum, observers):
     510        tail = (segnum == self._node.num_segments-1)
     511        datastart = self.actual_offsets["data"]
     512        blockstart = datastart + segnum * self._node.block_size
     513        blocklen = self._node.block_size
     514        if tail:
     515            blocklen = self._node.tail_block_size
     516
     517        block = self._received.pop(blockstart, blocklen)
     518        if not block:
     519            log.msg("no data for block %s (want [%d:+%d])" % (repr(self),
     520                                                              blockstart, blocklen))
     521            return False
     522        log.msg(format="%(share)s._satisfy_data_block [%(start)d:+%(length)d]",
     523                share=repr(self), start=blockstart, length=blocklen,
     524                level=log.NOISY, parent=self._lp, umid="uTDNZg")
     525        # this block is being retired, either as COMPLETE or CORRUPT, since
     526        # no further data reads will help
     527        assert self._requested_blocks[0][0] == segnum
     528        try:
     529            self._commonshare.check_block(segnum, block)
     530            # hurrah, we have a valid block. Deliver it.
     531            for o in observers:
     532                # goes to SegmentFetcher._block_request_activity
     533                o.notify(state=COMPLETE, block=block)
     534        except (BadHashError, NotEnoughHashesError), e:
     535            # rats, we have a corrupt block. Notify our clients that they
     536            # need to look elsewhere, and advise the server. Unlike
     537            # corruption in other parts of the share, this doesn't cause us
     538            # to abandon the whole share.
     539            f = Failure(e)
     540            log.msg(format="hash failure in block %(segnum)d, from %(share)s",
     541                    segnum=segnum, share=repr(self), failure=f,
     542                    level=log.WEIRD, parent=self._lp, umid="mZjkqA")
     543            for o in observers:
     544                o.notify(state=CORRUPT)
     545            self._signal_corruption(f, blockstart, blocklen)
     546            self.had_corruption = True
     547        # in either case, we've retired this block
     548        self._requested_blocks.pop(0)
     549        # popping the request keeps us from turning around and wanting the
     550        # block again right away
     551        return True # got satisfaction
     552
     553    def _desire(self):
     554        segnum, observers = self._active_segnum_and_observers() # maybe None
     555
     556        # 'want_it' is for data we merely want: we know that we don't really
     557        # need it. This includes speculative reads, like the first 1KB of the
     558        # share (for the offset table) and the first 2KB of the UEB.
     559        #
     560        # 'need_it' is for data that, if we have the real offset table, we'll
     561        # need. If we are only guessing at the offset table, it's merely
     562        # wanted. (The share is abandoned if we can't get data that we really
     563        # need).
     564        #
     565        # 'gotta_gotta_have_it' is for data that we absolutely need,
     566        # independent of whether we're still guessing about the offset table:
     567        # the version number and the offset table itself.
     568        #
     569        # Mr. Popeil, I'm in trouble, need your assistance on the double. Aww..
     570
     571        desire = Spans(), Spans(), Spans()
     572        (want_it, need_it, gotta_gotta_have_it) = desire
     573
     574        self.actual_segment_size = self._node.segment_size # might be updated
     575        o = self.actual_offsets or self.guessed_offsets
     576        segsize = self.actual_segment_size or self.guessed_segment_size
     577        r = self._node._calculate_sizes(segsize)
     578
     579        if not self.actual_offsets:
     580            # all _desire functions add bits to the three desire[] spans
     581            self._desire_offsets(desire)
     582
     583        # we can use guessed offsets as long as this server tolerates
     584        # overrun. Otherwise, we must wait for the offsets to arrive before
     585        # we try to read anything else.
     586        if self.actual_offsets or self._overrun_ok:
     587            if not self._node.have_UEB:
     588                self._desire_UEB(desire, o)
     589            # They might ask for a segment that doesn't look right.
     590            # _satisfy() will catch+reject bad segnums once we know the UEB
     591            # (and therefore segsize and numsegs), so we'll only fail this
     592            # test if we're still guessing. We want to avoid asking the
     593            # hashtrees for needed_hashes() for bad segnums. So don't enter
     594            # _desire_hashes or _desire_data unless the segnum looks
     595            # reasonable.
     596            if segnum < r["num_segments"]:
     597                # XXX somehow we're getting here for sh5. we don't yet know
     598                # the actual_segment_size, we're still working off the guess.
     599                # the ciphertext_hash_tree has been corrected, but the
     600                # commonshare._block_hash_tree is still in the guessed state.
     601                self._desire_share_hashes(desire, o)
     602                if segnum is not None:
     603                    self._desire_block_hashes(desire, o, segnum)
     604                    self._desire_data(desire, o, r, segnum, segsize)
     605            else:
     606                log.msg("_desire: segnum(%d) looks wrong (numsegs=%d)"
     607                        % (segnum, r["num_segments"]),
     608                        level=log.UNUSUAL, parent=self._lp, umid="tuYRQQ")
     609
     610        log.msg("end _desire: want_it=%s need_it=%s gotta=%s"
     611                % (want_it.dump(), need_it.dump(), gotta_gotta_have_it.dump()))
     612        if self.actual_offsets:
     613            return (want_it, need_it+gotta_gotta_have_it)
     614        else:
     615            return (want_it+need_it, gotta_gotta_have_it)
     616
     617    def _desire_offsets(self, desire):
     618        (want_it, need_it, gotta_gotta_have_it) = desire
     619        if self._overrun_ok:
     620            # easy! this includes version number, sizes, and offsets
     621            want_it.add(0, 1024)
     622            return
     623
     624        # v1 has an offset table that lives [0x0,0x24). v2 lives [0x0,0x44).
     625        # To be conservative, only request the data that we know lives there,
     626        # even if that means more roundtrips.
     627
     628        gotta_gotta_have_it.add(0, 4)  # version number, always safe
     629        version_s = self._received.get(0, 4)
     630        if not version_s:
     631            return
     632        (version,) = struct.unpack(">L", version_s)
     633        # The code in _satisfy_offsets will have checked this version
     634        # already. There is no code path to get this far with version>2.
     635        assert 1 <= version <= 2, "can't get here, version=%d" % version
     636        if version == 1:
     637            table_start = 0x0c
     638            fieldsize = 0x4
     639        elif version == 2:
     640            table_start = 0x14
     641            fieldsize = 0x8
     642        offset_table_size = 6 * fieldsize
     643        gotta_gotta_have_it.add(table_start, offset_table_size)
     644
     645    def _desire_UEB(self, desire, o):
     646        (want_it, need_it, gotta_gotta_have_it) = desire
     647
     648        # UEB data is stored as (length,data).
     649        if self._overrun_ok:
     650            # We can pre-fetch 2kb, which should probably cover it. If it
     651            # turns out to be larger, we'll come back here later with a known
     652            # length and fetch the rest.
     653            want_it.add(o["uri_extension"], 2048)
     654            # now, while that is probably enough to fetch the whole UEB, it
     655            # might not be, so we need to do the next few steps as well. In
     656            # most cases, the following steps will not actually add anything
     657            # to need_it
     658
     659        need_it.add(o["uri_extension"], self._fieldsize)
     660        # only use a length if we're sure it's correct, otherwise we'll
     661        # probably fetch a huge number
     662        if not self.actual_offsets:
     663            return
     664        UEB_length_s = self._received.get(o["uri_extension"], self._fieldsize)
     665        if UEB_length_s:
     666            (UEB_length,) = struct.unpack(">"+self._fieldstruct, UEB_length_s)
     667            # we know the length, so make sure we grab everything
     668            need_it.add(o["uri_extension"]+self._fieldsize, UEB_length)
     669
     670    def _desire_share_hashes(self, desire, o):
     671        (want_it, need_it, gotta_gotta_have_it) = desire
     672
     673        if self._node.share_hash_tree.needed_hashes(self._shnum):
     674            hashlen = o["uri_extension"] - o["share_hashes"]
     675            need_it.add(o["share_hashes"], hashlen)
     676
     677    def _desire_block_hashes(self, desire, o, segnum):
     678        (want_it, need_it, gotta_gotta_have_it) = desire
     679
     680        # block hash chain
     681        for hashnum in self._commonshare.get_needed_block_hashes(segnum):
     682            need_it.add(o["block_hashes"]+hashnum*HASH_SIZE, HASH_SIZE)
     683
     684        # ciphertext hash chain
     685        for hashnum in self._node.get_needed_ciphertext_hashes(segnum):
     686            need_it.add(o["crypttext_hash_tree"]+hashnum*HASH_SIZE, HASH_SIZE)
     687
     688    def _desire_data(self, desire, o, r, segnum, segsize):
     689        (want_it, need_it, gotta_gotta_have_it) = desire
     690        tail = (segnum == r["num_segments"]-1)
     691        datastart = o["data"]
     692        blockstart = datastart + segnum * r["block_size"]
     693        blocklen = r["block_size"]
     694        if tail:
     695            blocklen = r["tail_block_size"]
     696        need_it.add(blockstart, blocklen)
     697
     698    def _send_requests(self, desired):
     699        ask = desired - self._pending - self._received.get_spans()
     700        log.msg("%s._send_requests, desired=%s, pending=%s, ask=%s" %
     701                (repr(self), desired.dump(), self._pending.dump(), ask.dump()),
     702                level=log.NOISY, parent=self._lp, umid="E94CVA")
     703        # XXX At one time, this code distinguished between data blocks and
     704        # hashes, and made sure to send (small) requests for hashes before
     705        # sending (big) requests for blocks. The idea was to make sure that
     706        # all hashes arrive before the blocks, so the blocks can be consumed
     707        # and released in a single turn. I removed this for simplicity.
     708        # Reconsider the removal: maybe bring it back.
     709        ds = self._download_status
     710
     711        for (start, length) in ask:
     712            # TODO: quantize to reasonably-large blocks
     713            self._pending.add(start, length)
     714            lp = log.msg(format="%(share)s._send_request"
     715                         " [%(start)d:+%(length)d]",
     716                         share=repr(self),
     717                         start=start, length=length,
     718                         level=log.NOISY, parent=self._lp, umid="sgVAyA")
     719            req_ev = ds.add_request_sent(self._peerid, self._shnum,
     720                                         start, length, now())
     721            d = self._send_request(start, length)
     722            d.addCallback(self._got_data, start, length, req_ev, lp)
     723            d.addErrback(self._got_error, start, length, req_ev, lp)
     724            d.addCallback(self._trigger_loop)
     725            d.addErrback(lambda f:
     726                         log.err(format="unhandled error during send_request",
     727                                 failure=f, parent=self._lp,
     728                                 level=log.WEIRD, umid="qZu0wg"))
     729
     730    def _send_request(self, start, length):
     731        return self._rref.callRemote("read", start, length)
     732
     733    def _got_data(self, data, start, length, req_ev, lp):
     734        req_ev.finished(len(data), now())
     735        if not self._alive:
     736            return
     737        log.msg(format="%(share)s._got_data [%(start)d:+%(length)d] -> %(datalen)d",
     738                share=repr(self), start=start, length=length, datalen=len(data),
     739                level=log.NOISY, parent=lp, umid="5Qn6VQ")
     740        self._pending.remove(start, length)
     741        self._received.add(start, data)
     742
     743        # if we ask for [a:c], and we get back [a:b] (b<c), that means we're
     744        # never going to get [b:c]. If we really need that data, this block
     745        # will never complete. The easiest way to get into this situation is
     746        # to hit a share with a corrupted offset table, or one that's somehow
     747        # been truncated. On the other hand, when overrun_ok is true, we ask
     748        # for data beyond the end of the share all the time (it saves some
     749        # RTT when we don't know the length of the share ahead of time). So
     750        # not every asked-for-but-not-received byte is fatal.
     751        if len(data) < length:
     752            self._unavailable.add(start+len(data), length-len(data))
     753
     754        # XXX if table corruption causes our sections to overlap, then one
     755        # consumer (i.e. block hash tree) will pop/remove the data that
     756        # another consumer (i.e. block data) mistakenly thinks it needs. It
     757        # won't ask for that data again, because the span is in
     758        # self._requested. But that span won't be in self._unavailable
     759        # because we got it back from the server. TODO: handle this properly
     760        # (raise DataUnavailable). Then add sanity-checking
     761        # no-overlaps-allowed tests to the offset-table unpacking code to
     762        # catch this earlier. XXX
     763
     764        # accumulate a wanted/needed span (not as self._x, but passed into
     765        # desire* functions). manage a pending/in-flight list. when the
     766        # requests are sent out, empty/discard the wanted/needed span and
     767        # populate/augment the pending list. when the responses come back,
     768        # augment either received+data or unavailable.
     769
     770        # if a corrupt offset table results in double-usage, we'll send
     771        # double requests.
     772
     773        # the wanted/needed span is only "wanted" for the first pass. Once
     774        # the offset table arrives, it's all "needed".
     775
     776    def _got_error(self, f, start, length, req_ev, lp):
     777        req_ev.finished("error", now())
     778        log.msg(format="error requesting %(start)d+%(length)d"
     779                " from %(server)s for si %(si)s",
     780                start=start, length=length,
     781                server=self._peerid_s, si=self._si_prefix,
     782                failure=f, parent=lp, level=log.UNUSUAL, umid="BZgAJw")
     783        # retire our observers, assuming we won't be able to make any
     784        # further progress
     785        self._fail(f, log.UNUSUAL)
     786
     787    def _trigger_loop(self, res):
     788        if self._alive:
     789            eventually(self.loop)
     790        return res
     791
     792    def _fail(self, f, level=log.WEIRD):
     793        log.msg(format="abandoning %(share)s",
     794                share=repr(self), failure=f,
     795                level=level, parent=self._lp, umid="JKM2Og")
     796        self._alive = False
     797        for (segnum, observers) in self._requested_blocks:
     798            for o in observers:
     799                o.notify(state=DEAD, f=f)
     800
     801
     802class CommonShare:
     803    """I hold data that is common across all instances of a single share,
     804    like sh2 on both servers A and B. This is just the block hash tree.
     805    """
     806    def __init__(self, guessed_numsegs, si_prefix, shnum, logparent):
     807        self.si_prefix = si_prefix
     808        self.shnum = shnum
     809        # in the beginning, before we have the real UEB, we can only guess at
     810        # the number of segments. But we want to ask for block hashes early.
     811        # So if we're asked for which block hashes are needed before we know
     812        # numsegs for sure, we return a guess.
     813        self._block_hash_tree = IncompleteHashTree(guessed_numsegs)
     814        self._know_numsegs = False
     815        self._logparent = logparent
     816
     817    def set_numsegs(self, numsegs):
     818        if self._know_numsegs:
     819            return
     820        self._block_hash_tree = IncompleteHashTree(numsegs)
     821        self._know_numsegs = True
     822
     823    def need_block_hash_root(self):
     824        return bool(not self._block_hash_tree[0])
     825
     826    def set_block_hash_root(self, roothash):
     827        assert self._know_numsegs
     828        self._block_hash_tree.set_hashes({0: roothash})
     829
     830    def get_needed_block_hashes(self, segnum):
     831        # XXX: include_leaf=True needs thought: how did the old downloader do
     832        # it? I think it grabbed *all* block hashes and set them all at once.
     833        # Since we want to fetch less data, we either need to fetch the leaf
     834        # too, or wait to set the block hashes until we've also received the
     835        # block itself, so we can hash it too, and set the chain+leaf all at
     836        # the same time.
     837        return self._block_hash_tree.needed_hashes(segnum, include_leaf=True)
     838
     839    def process_block_hashes(self, block_hashes):
     840        assert self._know_numsegs
     841        # this may raise BadHashError or NotEnoughHashesError
     842        self._block_hash_tree.set_hashes(block_hashes)
     843
     844    def check_block(self, segnum, block):
     845        assert self._know_numsegs
     846        h = hashutil.block_hash(block)
     847        # this may raise BadHashError or NotEnoughHashesError
     848        self._block_hash_tree.set_hashes(leaves={segnum: h})
  • new file src/allmydata/immutable/downloader/status.py

    diff --git a/src/allmydata/immutable/downloader/status.py b/src/allmydata/immutable/downloader/status.py
    new file mode 100644
    index 0000000..5d60db0
    - +  
     1
     2import itertools
     3from zope.interface import implements
     4from allmydata.interfaces import IDownloadStatus
     5
     6class RequestEvent:
     7    def __init__(self, download_status, tag):
     8        self._download_status = download_status
     9        self._tag = tag
     10    def finished(self, received, when):
     11        self._download_status.add_request_finished(self._tag, received, when)
     12
     13class DYHBEvent:
     14    def __init__(self, download_status, tag):
     15        self._download_status = download_status
     16        self._tag = tag
     17    def finished(self, shnums, when):
     18        self._download_status.add_dyhb_finished(self._tag, shnums, when)
     19
     20class ReadEvent:
     21    def __init__(self, download_status, tag):
     22        self._download_status = download_status
     23        self._tag = tag
     24    def update(self, bytes, decrypttime, pausetime):
     25        self._download_status.update_read_event(self._tag, bytes,
     26                                                decrypttime, pausetime)
     27    def finished(self, finishtime):
     28        self._download_status.finish_read_event(self._tag, finishtime)
     29
     30class DownloadStatus:
     31    # There is one DownloadStatus for each CiphertextFileNode. The status
     32    # object will keep track of all activity for that node.
     33    implements(IDownloadStatus)
     34    statusid_counter = itertools.count(0)
     35
     36    def __init__(self, storage_index, size):
     37        self.storage_index = storage_index
     38        self.size = size
     39        self.counter = self.statusid_counter.next()
     40        self.helper = False
     41        self.started = None
     42        # self.dyhb_requests tracks "do you have a share" requests and
     43        # responses. It maps serverid to a tuple of:
     44        #  send time
     45        #  tuple of response shnums (None if response hasn't arrived, "error")
     46        #  response time (None if response hasn't arrived yet)
     47        self.dyhb_requests = {}
     48
     49        # self.requests tracks share-data requests and responses. It maps
     50        # serverid to a tuple of:
     51        #  shnum,
     52        #  start,length,  (of data requested)
     53        #  send time
     54        #  response length (None if reponse hasn't arrived yet, or "error")
     55        #  response time (None if response hasn't arrived)
     56        self.requests = {}
     57
     58        # self.segment_events tracks segment requests and delivery. It is a
     59        # list of:
     60        #  type ("request", "delivery", "error")
     61        #  segment number
     62        #  event time
     63        #  segment start (file offset of first byte, None except in "delivery")
     64        #  segment length (only in "delivery")
     65        #  time spent in decode (only in "delivery")
     66        self.segment_events = []
     67
     68        # self.read_events tracks read() requests. It is a list of:
     69        #  start,length  (of data requested)
     70        #  request time
     71        #  finish time (None until finished)
     72        #  bytes returned (starts at 0, grows as segments are delivered)
     73        #  time spent in decrypt (None for ciphertext-only reads)
     74        #  time spent paused
     75        self.read_events = []
     76
     77        self.known_shares = [] # (serverid, shnum)
     78        self.problems = []
     79
     80
     81    def add_dyhb_sent(self, serverid, when):
     82        r = (when, None, None)
     83        if serverid not in self.dyhb_requests:
     84            self.dyhb_requests[serverid] = []
     85        self.dyhb_requests[serverid].append(r)
     86        tag = (serverid, len(self.dyhb_requests[serverid])-1)
     87        return DYHBEvent(self, tag)
     88
     89    def add_dyhb_finished(self, tag, shnums, when):
     90        # received="error" on error, else tuple(shnums)
     91        (serverid, index) = tag
     92        r = self.dyhb_requests[serverid][index]
     93        (sent, _, _) = r
     94        r = (sent, shnums, when)
     95        self.dyhb_requests[serverid][index] = r
     96
     97    def add_request_sent(self, serverid, shnum, start, length, when):
     98        r = (shnum, start, length, when, None, None)
     99        if serverid not in self.requests:
     100            self.requests[serverid] = []
     101        self.requests[serverid].append(r)
     102        tag = (serverid, len(self.requests[serverid])-1)
     103        return RequestEvent(self, tag)
     104
     105    def add_request_finished(self, tag, received, when):
     106        # received="error" on error, else len(data)
     107        (serverid, index) = tag
     108        r = self.requests[serverid][index]
     109        (shnum, start, length, sent, _, _) = r
     110        r = (shnum, start, length, sent, received, when)
     111        self.requests[serverid][index] = r
     112
     113    def add_segment_request(self, segnum, when):
     114        if self.started is None:
     115            self.started = when
     116        r = ("request", segnum, when, None, None, None)
     117        self.segment_events.append(r)
     118    def add_segment_delivery(self, segnum, when, start, length, decodetime):
     119        r = ("delivery", segnum, when, start, length, decodetime)
     120        self.segment_events.append(r)
     121    def add_segment_error(self, segnum, when):
     122        r = ("error", segnum, when, None, None, None)
     123        self.segment_events.append(r)
     124
     125    def add_read_event(self, start, length, when):
     126        if self.started is None:
     127            self.started = when
     128        r = (start, length, when, None, 0, 0, 0)
     129        self.read_events.append(r)
     130        tag = len(self.read_events)-1
     131        return ReadEvent(self, tag)
     132    def update_read_event(self, tag, bytes_d, decrypt_d, paused_d):
     133        r = self.read_events[tag]
     134        (start, length, requesttime, finishtime, bytes, decrypt, paused) = r
     135        bytes += bytes_d
     136        decrypt += decrypt_d
     137        paused += paused_d
     138        r = (start, length, requesttime, finishtime, bytes, decrypt, paused)
     139        self.read_events[tag] = r
     140    def finish_read_event(self, tag, finishtime):
     141        r = self.read_events[tag]
     142        (start, length, requesttime, _, bytes, decrypt, paused) = r
     143        r = (start, length, requesttime, finishtime, bytes, decrypt, paused)
     144        self.read_events[tag] = r
     145
     146    def add_known_share(self, serverid, shnum):
     147        self.known_shares.append( (serverid, shnum) )
     148
     149    def add_problem(self, p):
     150        self.problems.append(p)
     151
     152    # IDownloadStatus methods
     153    def get_counter(self):
     154        return self.counter
     155    def get_storage_index(self):
     156        return self.storage_index
     157    def get_size(self):
     158        return self.size
     159    def get_status(self):
     160        return "not impl yet" # TODO
     161    def get_progress(self):
     162        return 0.1 # TODO
     163    def using_helper(self):
     164        return False
     165    def get_active(self):
     166        return False # TODO
     167    def get_started(self):
     168        return self.started
     169    def get_results(self):
     170        return None # TODO
  • src/allmydata/immutable/filenode.py

    diff --git a/src/allmydata/immutable/filenode.py b/src/allmydata/immutable/filenode.py
    index 70044a7..1d5be94 100644
    a b  
    1 import copy, os.path, stat
    2 from cStringIO import StringIO
     1
     2import binascii
     3import copy
     4import time
     5now = time.time
    36from zope.interface import implements
    47from twisted.internet import defer
    5 from twisted.internet.interfaces import IPushProducer
    6 from twisted.protocols import basic
    7 from foolscap.api import eventually
    8 from allmydata.interfaces import IImmutableFileNode, ICheckable, \
    9      IDownloadTarget, IUploadResults
    10 from allmydata.util import dictutil, log, base32
    11 from allmydata.uri import CHKFileURI, LiteralFileURI
    12 from allmydata.immutable.checker import Checker
    13 from allmydata.check_results import CheckResults, CheckAndRepairResults
    14 from allmydata.immutable.repairer import Repairer
    15 from allmydata.immutable import download
    16 
    17 class _ImmutableFileNodeBase(object):
    18     implements(IImmutableFileNode, ICheckable)
    19 
    20     def get_write_uri(self):
    21         return None
    22 
    23     def get_readonly_uri(self):
    24         return self.get_uri()
    25 
    26     def is_mutable(self):
    27         return False
    28 
    29     def is_readonly(self):
    30         return True
    31 
    32     def is_unknown(self):
    33         return False
    34 
    35     def is_allowed_in_immutable_directory(self):
    36         return True
    37 
    38     def raise_error(self):
    39         pass
    40 
    41     def __hash__(self):
    42         return self.u.__hash__()
    43     def __eq__(self, other):
    44         if isinstance(other, _ImmutableFileNodeBase):
    45             return self.u.__eq__(other.u)
    46         else:
    47             return False
    48     def __ne__(self, other):
    49         if isinstance(other, _ImmutableFileNodeBase):
    50             return self.u.__eq__(other.u)
    51         else:
    52             return True
    53 
    54 class PortionOfFile:
    55     # like a list slice (things[2:14]), but for a file on disk
    56     def __init__(self, fn, offset=0, size=None):
    57         self.f = open(fn, "rb")
    58         self.f.seek(offset)
    59         self.bytes_left = size
    60 
    61     def read(self, size=None):
    62         # bytes_to_read = min(size, self.bytes_left), but None>anything
    63         if size is None:
    64             bytes_to_read = self.bytes_left
    65         elif self.bytes_left is None:
    66             bytes_to_read = size
    67         else:
    68             bytes_to_read = min(size, self.bytes_left)
    69         data = self.f.read(bytes_to_read)
    70         if self.bytes_left is not None:
    71             self.bytes_left -= len(data)
    72         return data
    73 
    74 class DownloadCache:
    75     implements(IDownloadTarget)
    76 
    77     def __init__(self, filecap, storage_index, downloader,
    78                  cachedirectorymanager):
    79         self._downloader = downloader
    80         self._uri = filecap
    81         self._storage_index = storage_index
    82         self.milestones = set() # of (offset,size,Deferred)
    83         self.cachedirectorymanager = cachedirectorymanager
    84         self.cachefile = None
    85         self.download_in_progress = False
    86         # five states:
    87         #  new ImmutableFileNode, no downloads ever performed
    88         #  new ImmutableFileNode, leftover file (partial)
    89         #  new ImmutableFileNode, leftover file (whole)
    90         #  download in progress, not yet complete
    91         #  download complete
    92 
    93     def when_range_available(self, offset, size):
    94         assert isinstance(offset, (int,long))
    95         assert isinstance(size, (int,long))
    96 
    97         d = defer.Deferred()
    98         self.milestones.add( (offset,size,d) )
    99         self._check_milestones()
    100         if self.milestones and not self.download_in_progress:
    101             self.download_in_progress = True
    102             log.msg(format=("immutable filenode read [%(si)s]: " +
    103                             "starting download"),
    104                     si=base32.b2a(self._storage_index),
    105                     umid="h26Heg", level=log.OPERATIONAL)
    106             d2 = self._downloader.download(self._uri, self)
    107             d2.addBoth(self._download_done)
    108             d2.addErrback(self._download_failed)
    109             d2.addErrback(log.err, umid="cQaM9g")
    110         return d
    111 
    112     def read(self, consumer, offset, size):
    113         assert offset+size <= self.get_filesize()
    114         if not self.cachefile:
    115             self.cachefile = self.cachedirectorymanager.get_file(base32.b2a(self._storage_index))
    116         f = PortionOfFile(self.cachefile.get_filename(), offset, size)
    117         d = basic.FileSender().beginFileTransfer(f, consumer)
    118         d.addCallback(lambda lastSent: consumer)
    119         return d
    120 
    121     def _download_done(self, res):
    122         # clear download_in_progress, so failed downloads can be re-tried
    123         self.download_in_progress = False
    124         return res
    125 
    126     def _download_failed(self, f):
    127         # tell anyone who's waiting that we failed
    128         for m in self.milestones:
    129             (offset,size,d) = m
    130             eventually(d.errback, f)
    131         self.milestones.clear()
    132 
    133     def _check_milestones(self):
    134         current_size = self.get_filesize()
    135         for m in list(self.milestones):
    136             (offset,size,d) = m
    137             if offset+size <= current_size:
    138                 log.msg(format=("immutable filenode read [%(si)s] " +
    139                                 "%(offset)d+%(size)d vs %(filesize)d: " +
    140                                 "done"),
    141                         si=base32.b2a(self._storage_index),
    142                         offset=offset, size=size, filesize=current_size,
    143                         umid="nuedUg", level=log.NOISY)
    144                 self.milestones.discard(m)
    145                 eventually(d.callback, None)
    146             else:
    147                 log.msg(format=("immutable filenode read [%(si)s] " +
    148                                 "%(offset)d+%(size)d vs %(filesize)d: " +
    149                                 "still waiting"),
    150                         si=base32.b2a(self._storage_index),
    151                         offset=offset, size=size, filesize=current_size,
    152                         umid="8PKOhg", level=log.NOISY)
    153 
    154     def get_filesize(self):
    155         if not self.cachefile:
    156             self.cachefile = self.cachedirectorymanager.get_file(base32.b2a(self._storage_index))
    157         try:
    158             filesize = os.stat(self.cachefile.get_filename())[stat.ST_SIZE]
    159         except OSError:
    160             filesize = 0
    161         return filesize
    162 
    163 
    164     def open(self, size):
    165         if not self.cachefile:
    166             self.cachefile = self.cachedirectorymanager.get_file(base32.b2a(self._storage_index))
    167         self.f = open(self.cachefile.get_filename(), "wb")
    168 
    169     def write(self, data):
    170         self.f.write(data)
    171         self._check_milestones()
    172 
    173     def close(self):
    174         self.f.close()
    175         self._check_milestones()
    176 
    177     def fail(self, why):
    178         pass
    179     def register_canceller(self, cb):
    180         pass
    181     def finish(self):
    182         return None
    183     # The following methods are just because the target might be a
    184     # repairer.DownUpConnector, and just because the current CHKUpload object
    185     # expects to find the storage index and encoding parameters in its
    186     # Uploadable.
    187     def set_storageindex(self, storageindex):
    188         pass
    189     def set_encodingparams(self, encodingparams):
    190         pass
     8from twisted.internet.interfaces import IConsumer
    1919
     10from allmydata.interfaces import IImmutableFileNode, IUploadResults
     11from allmydata import uri
     12from allmydata.check_results import CheckResults, CheckAndRepairResults
     13from allmydata.util.dictutil import DictOfSets
     14from pycryptopp.cipher.aes import AES
    19215
    193 class ImmutableFileNode(_ImmutableFileNodeBase, log.PrefixingLogMixin):
    194     def __init__(self, filecap, storage_broker, secret_holder,
    195                  downloader, history, cachedirectorymanager):
    196         assert isinstance(filecap, CHKFileURI)
    197         self.u = filecap
     16# local imports
     17from allmydata.immutable.checker import Checker
     18from allmydata.immutable.repairer import Repairer
     19from allmydata.immutable.downloader.node import DownloadNode
     20from allmydata.immutable.downloader.status import DownloadStatus
     21
     22class CiphertextFileNode:
     23    def __init__(self, verifycap, storage_broker, secret_holder,
     24                 terminator, history, download_status=None):
     25        assert isinstance(verifycap, uri.CHKFileVerifierURI)
     26        self._verifycap = verifycap
    19827        self._storage_broker = storage_broker
    19928        self._secret_holder = secret_holder
    200         self._downloader = downloader
    201         self._history = history
    202         storage_index = self.get_storage_index()
    203         self.download_cache = DownloadCache(filecap, storage_index, downloader,
    204                                             cachedirectorymanager)
    205         prefix = self.u.get_verify_cap().to_string()
    206         log.PrefixingLogMixin.__init__(self, "allmydata.immutable.filenode", prefix=prefix)
    207         self.log("starting", level=log.OPERATIONAL)
     29        if download_status is None:
     30            ds = DownloadStatus(verifycap.storage_index, verifycap.size)
     31            if history:
     32                history.add_download(ds)
     33            download_status = ds
     34        self._node = DownloadNode(verifycap, storage_broker, secret_holder,
     35                                  terminator, history, download_status)
     36
     37    def read(self, consumer, offset=0, size=None, read_ev=None):
     38        """I am the main entry point, from which FileNode.read() can get
     39        data. I feed the consumer with the desired range of ciphertext. I
     40        return a Deferred that fires (with the consumer) when the read is
     41        finished."""
     42        return self._node.read(consumer, offset, size, read_ev)
     43
     44    def get_segment(self, segnum):
     45        """Begin downloading a segment. I return a tuple (d, c): 'd' is a
     46        Deferred that fires with (offset,data) when the desired segment is
     47        available, and c is an object on which c.cancel() can be called to
     48        disavow interest in the segment (after which 'd' will never fire).
     49
     50        You probably need to know the segment size before calling this,
     51        unless you want the first few bytes of the file. If you ask for a
     52        segment number which turns out to be too large, the Deferred will
     53        errback with BadSegmentNumberError.
     54
     55        The Deferred fires with the offset of the first byte of the data
     56        segment, so that you can call get_segment() before knowing the
     57        segment size, and still know which data you received.
     58        """
     59        return self._node.get_segment(segnum)
     60
     61    def get_segment_size(self):
     62        # return a Deferred that fires with the file's real segment size
     63        return self._node.get_segsize()
    20864
    209     def get_size(self):
    210         return self.u.get_size()
    211     def get_current_size(self):
    212         return defer.succeed(self.get_size())
    213 
    214     def get_cap(self):
    215         return self.u
    216     def get_readcap(self):
    217         return self.u.get_readonly()
     65    def get_storage_index(self):
     66        return self._verifycap.storage_index
    21867    def get_verify_cap(self):
    219         return self.u.get_verify_cap()
    220     def get_repair_cap(self):
    221         # CHK files can be repaired with just the verifycap
    222         return self.u.get_verify_cap()
     68        return self._verifycap
     69    def get_size(self):
     70        return self._verifycap.size
    22371
    224     def get_uri(self):
    225         return self.u.to_string()
     72    def raise_error(self):
     73        pass
    22674
    227     def get_storage_index(self):
    228         return self.u.get_storage_index()
    22975
    23076    def check_and_repair(self, monitor, verify=False, add_lease=False):
    231         verifycap = self.get_verify_cap()
     77        verifycap = self._verifycap
     78        storage_index = verifycap.storage_index
    23279        sb = self._storage_broker
    23380        servers = sb.get_all_servers()
    23481        sh = self._secret_holder
    class ImmutableFileNode(_ImmutableFileNodeBase, log.PrefixingLogMixin): 
    23885                    monitor=monitor)
    23986        d = c.start()
    24087        def _maybe_repair(cr):
    241             crr = CheckAndRepairResults(self.u.get_storage_index())
     88            crr = CheckAndRepairResults(storage_index)
    24289            crr.pre_repair_results = cr
    24390            if cr.is_healthy():
    24491                crr.post_repair_results = cr
    class ImmutableFileNode(_ImmutableFileNodeBase, log.PrefixingLogMixin): 
    24895                crr.repair_successful = False # until proven successful
    24996                def _gather_repair_results(ur):
    25097                    assert IUploadResults.providedBy(ur), ur
    251                     # clone the cr -- check results to form the basic of the prr -- post-repair results
     98                    # clone the cr (check results) to form the basis of the
     99                    # prr (post-repair results)
    252100                    prr = CheckResults(cr.uri, cr.storage_index)
    253101                    prr.data = copy.deepcopy(cr.data)
    254102
    255103                    sm = prr.data['sharemap']
    256                     assert isinstance(sm, dictutil.DictOfSets), sm
     104                    assert isinstance(sm, DictOfSets), sm
    257105                    sm.update(ur.sharemap)
    258106                    servers_responding = set(prr.data['servers-responding'])
    259107                    servers_responding.union(ur.sharemap.iterkeys())
    260108                    prr.data['servers-responding'] = list(servers_responding)
    261109                    prr.data['count-shares-good'] = len(sm)
    262110                    prr.data['count-good-share-hosts'] = len(sm)
    263                     is_healthy = bool(len(sm) >= self.u.total_shares)
    264                     is_recoverable = bool(len(sm) >= self.u.needed_shares)
     111                    is_healthy = bool(len(sm) >= verifycap.total_shares)
     112                    is_recoverable = bool(len(sm) >= verifycap.needed_shares)
    265113                    prr.set_healthy(is_healthy)
    266114                    prr.set_recoverable(is_recoverable)
    267115                    crr.repair_successful = is_healthy
    268                     prr.set_needs_rebalancing(len(sm) >= self.u.total_shares)
     116                    prr.set_needs_rebalancing(len(sm) >= verifycap.total_shares)
    269117
    270118                    crr.post_repair_results = prr
    271119                    return crr
    class ImmutableFileNode(_ImmutableFileNodeBase, log.PrefixingLogMixin): 
    275123                    crr.repair_successful = False
    276124                    crr.repair_failure = f
    277125                    return f
    278                 r = Repairer(storage_broker=sb, secret_holder=sh,
    279                              verifycap=verifycap, monitor=monitor)
     126                r = Repairer(self, storage_broker=sb, secret_holder=sh,
     127                             monitor=monitor)
    280128                d = r.start()
    281129                d.addCallbacks(_gather_repair_results, _repair_error)
    282130                return d
    class ImmutableFileNode(_ImmutableFileNodeBase, log.PrefixingLogMixin): 
    285133        return d
    286134
    287135    def check(self, monitor, verify=False, add_lease=False):
    288         verifycap = self.get_verify_cap()
     136        verifycap = self._verifycap
    289137        sb = self._storage_broker
    290138        servers = sb.get_all_servers()
    291139        sh = self._secret_holder
    class ImmutableFileNode(_ImmutableFileNodeBase, log.PrefixingLogMixin): 
    295143                    monitor=monitor)
    296144        return v.start()
    297145
     146
     147class DecryptingConsumer:
     148    """I sit between a CiphertextDownloader (which acts as a Producer) and
     149    the real Consumer, decrypting everything that passes by. The real
     150    Consumer sees the real Producer, but the Producer sees us instead of the
     151    real consumer."""
     152    implements(IConsumer)
     153
     154    def __init__(self, consumer, readkey, offset, read_event):
     155        self._consumer = consumer
     156        self._read_event = read_event
     157        # TODO: pycryptopp CTR-mode needs random-access operations: I want
     158        # either a=AES(readkey, offset) or better yet both of:
     159        #  a=AES(readkey, offset=0)
     160        #  a.process(ciphertext, offset=xyz)
     161        # For now, we fake it with the existing iv= argument.
     162        offset_big = offset // 16
     163        offset_small = offset % 16
     164        iv = binascii.unhexlify("%032x" % offset_big)
     165        self._decryptor = AES(readkey, iv=iv)
     166        self._decryptor.process("\x00"*offset_small)
     167
     168    def registerProducer(self, producer, streaming):
     169        # this passes through, so the real consumer can flow-control the real
     170        # producer. Therefore we don't need to provide any IPushProducer
     171        # methods. We implement all the IConsumer methods as pass-throughs,
     172        # and only intercept write() to perform decryption.
     173        self._consumer.registerProducer(producer, streaming)
     174    def unregisterProducer(self):
     175        self._consumer.unregisterProducer()
     176    def write(self, ciphertext):
     177        started = now()
     178        plaintext = self._decryptor.process(ciphertext)
     179        elapsed = now() - started
     180        self._read_event.update(0, elapsed, 0)
     181        self._consumer.write(plaintext)
     182
     183class ImmutableFileNode:
     184    implements(IImmutableFileNode)
     185
     186    # I wrap a CiphertextFileNode with a decryption key
     187    def __init__(self, filecap, storage_broker, secret_holder, terminator,
     188                 history):
     189        assert isinstance(filecap, uri.CHKFileURI)
     190        verifycap = filecap.get_verify_cap()
     191        ds = DownloadStatus(verifycap.storage_index, verifycap.size)
     192        if history:
     193            history.add_download(ds)
     194        self._download_status = ds
     195        self._cnode = CiphertextFileNode(verifycap, storage_broker,
     196                                         secret_holder, terminator, history, ds)
     197        assert isinstance(filecap, uri.CHKFileURI)
     198        self.u = filecap
     199        self._readkey = filecap.key
     200
     201    # TODO: I'm not sure about this.. what's the use case for node==node? If
     202    # we keep it here, we should also put this on CiphertextFileNode
     203    def __hash__(self):
     204        return self.u.__hash__()
     205    def __eq__(self, other):
     206        if isinstance(other, ImmutableFileNode):
     207            return self.u.__eq__(other.u)
     208        else:
     209            return False
     210    def __ne__(self, other):
     211        if isinstance(other, ImmutableFileNode):
     212            return self.u.__eq__(other.u)
     213        else:
     214            return True
     215
    298216    def read(self, consumer, offset=0, size=None):
    299         self.log("read", offset=offset, size=size,
    300                  umid="UPP8FA", level=log.OPERATIONAL)
    301         if size is None:
    302             size = self.get_size() - offset
    303         size = min(size, self.get_size() - offset)
    304 
    305         if offset == 0 and size == self.get_size():
    306             # don't use the cache, just do a normal streaming download
    307             self.log("doing normal full download", umid="VRSBwg", level=log.OPERATIONAL)
    308             target = download.ConsumerAdapter(consumer)
    309             return self._downloader.download(self.get_cap(), target,
    310                                              self._parentmsgid,
    311                                              history=self._history)
    312 
    313         d = self.download_cache.when_range_available(offset, size)
    314         d.addCallback(lambda res:
    315                       self.download_cache.read(consumer, offset, size))
     217        actual_size = size
     218        if actual_size == None:
     219            actual_size = self.u.size
     220        actual_size = actual_size - offset
     221        read_ev = self._download_status.add_read_event(offset,actual_size,
     222                                                       now())
     223        decryptor = DecryptingConsumer(consumer, self._readkey, offset, read_ev)
     224        d = self._cnode.read(decryptor, offset, size, read_ev)
     225        d.addCallback(lambda dc: consumer)
    316226        return d
    317227
    318 class LiteralProducer:
    319     implements(IPushProducer)
    320     def resumeProducing(self):
    321         pass
    322     def stopProducing(self):
     228    def raise_error(self):
    323229        pass
    324230
     231    def get_write_uri(self):
     232        return None
    325233
    326 class LiteralFileNode(_ImmutableFileNodeBase):
    327 
    328     def __init__(self, filecap):
    329         assert isinstance(filecap, LiteralFileURI)
    330         self.u = filecap
    331 
    332     def get_size(self):
    333         return len(self.u.data)
    334     def get_current_size(self):
    335         return defer.succeed(self.get_size())
     234    def get_readonly_uri(self):
     235        return self.get_uri()
    336236
     237    def get_uri(self):
     238        return self.u.to_string()
    337239    def get_cap(self):
    338240        return self.u
    339241    def get_readcap(self):
    340         return self.u
     242        return self.u.get_readonly()
    341243    def get_verify_cap(self):
    342         return None
     244        return self.u.get_verify_cap()
    343245    def get_repair_cap(self):
    344         return None
    345 
    346     def get_uri(self):
    347         return self.u.to_string()
     246        # CHK files can be repaired with just the verifycap
     247        return self.u.get_verify_cap()
    348248
    349249    def get_storage_index(self):
    350         return None
     250        return self.u.get_storage_index()
    351251
    352     def check(self, monitor, verify=False, add_lease=False):
    353         return defer.succeed(None)
     252    def get_size(self):
     253        return self.u.get_size()
     254    def get_current_size(self):
     255        return defer.succeed(self.get_size())
    354256
    355     def check_and_repair(self, monitor, verify=False, add_lease=False):
    356         return defer.succeed(None)
     257    def is_mutable(self):
     258        return False
    357259
    358     def read(self, consumer, offset=0, size=None):
    359         if size is None:
    360             data = self.u.data[offset:]
    361         else:
    362             data = self.u.data[offset:offset+size]
    363 
    364         # We use twisted.protocols.basic.FileSender, which only does
    365         # non-streaming, i.e. PullProducer, where the receiver/consumer must
    366         # ask explicitly for each chunk of data. There are only two places in
    367         # the Twisted codebase that can't handle streaming=False, both of
    368         # which are in the upload path for an FTP/SFTP server
    369         # (protocols.ftp.FileConsumer and
    370         # vfs.adapters.ftp._FileToConsumerAdapter), neither of which is
    371         # likely to be used as the target for a Tahoe download.
    372 
    373         d = basic.FileSender().beginFileTransfer(StringIO(data), consumer)
    374         d.addCallback(lambda lastSent: consumer)
    375         return d
     260    def is_readonly(self):
     261        return True
     262
     263    def is_unknown(self):
     264        return False
     265
     266    def is_allowed_in_immutable_directory(self):
     267        return True
     268
     269    def check_and_repair(self, monitor, verify=False, add_lease=False):
     270        return self._cnode.check_and_repair(monitor, verify, add_lease)
     271    def check(self, monitor, verify=False, add_lease=False):
     272        return self._cnode.check(monitor, verify, add_lease)
  • src/allmydata/immutable/layout.py

    diff --git a/src/allmydata/immutable/layout.py b/src/allmydata/immutable/layout.py
    index 6e07da7..27fb844 100644
    a b limitations described in #346. 
    7474# they are still provided when writing so that older versions of Tahoe can
    7575# read them.
    7676
     77FORCE_V2 = False # set briefly by unit tests to make small-sized V2 shares
     78
    7779def make_write_bucket_proxy(rref, data_size, block_size, num_segments,
    7880                            num_share_hashes, uri_extension_size_max, nodeid):
    7981    # Use layout v1 for small files, so they'll be readable by older versions
    8082    # (<tahoe-1.3.0). Use layout v2 for large files; they'll only be readable
    8183    # by tahoe-1.3.0 or later.
    8284    try:
     85        if FORCE_V2:
     86            raise FileTooLargeError
    8387        wbp = WriteBucketProxy(rref, data_size, block_size, num_segments,
    8488                               num_share_hashes, uri_extension_size_max, nodeid)
    8589    except FileTooLargeError:
  • new file src/allmydata/immutable/literal.py

    diff --git a/src/allmydata/immutable/literal.py b/src/allmydata/immutable/literal.py
    new file mode 100644
    index 0000000..09466cb
    - +  
     1from cStringIO import StringIO
     2from zope.interface import implements
     3from twisted.internet import defer
     4from twisted.internet.interfaces import IPushProducer
     5from twisted.protocols import basic
     6from allmydata.interfaces import IImmutableFileNode, ICheckable
     7from allmydata.uri import LiteralFileURI
     8
     9class _ImmutableFileNodeBase(object):
     10    implements(IImmutableFileNode, ICheckable)
     11
     12    def get_write_uri(self):
     13        return None
     14
     15    def get_readonly_uri(self):
     16        return self.get_uri()
     17
     18    def is_mutable(self):
     19        return False
     20
     21    def is_readonly(self):
     22        return True
     23
     24    def is_unknown(self):
     25        return False
     26
     27    def is_allowed_in_immutable_directory(self):
     28        return True
     29
     30    def raise_error(self):
     31        pass
     32
     33    def __hash__(self):
     34        return self.u.__hash__()
     35    def __eq__(self, other):
     36        if isinstance(other, _ImmutableFileNodeBase):
     37            return self.u.__eq__(other.u)
     38        else:
     39            return False
     40    def __ne__(self, other):
     41        if isinstance(other, _ImmutableFileNodeBase):
     42            return self.u.__eq__(other.u)
     43        else:
     44            return True
     45
     46
     47class LiteralProducer:
     48    implements(IPushProducer)
     49    def resumeProducing(self):
     50        pass
     51    def stopProducing(self):
     52        pass
     53
     54
     55class LiteralFileNode(_ImmutableFileNodeBase):
     56
     57    def __init__(self, filecap):
     58        assert isinstance(filecap, LiteralFileURI)
     59        self.u = filecap
     60
     61    def get_size(self):
     62        return len(self.u.data)
     63    def get_current_size(self):
     64        return defer.succeed(self.get_size())
     65
     66    def get_cap(self):
     67        return self.u
     68    def get_readcap(self):
     69        return self.u
     70    def get_verify_cap(self):
     71        return None
     72    def get_repair_cap(self):
     73        return None
     74
     75    def get_uri(self):
     76        return self.u.to_string()
     77
     78    def get_storage_index(self):
     79        return None
     80
     81    def check(self, monitor, verify=False, add_lease=False):
     82        return defer.succeed(None)
     83
     84    def check_and_repair(self, monitor, verify=False, add_lease=False):
     85        return defer.succeed(None)
     86
     87    def read(self, consumer, offset=0, size=None):
     88        if size is None:
     89            data = self.u.data[offset:]
     90        else:
     91            data = self.u.data[offset:offset+size]
     92
     93        # We use twisted.protocols.basic.FileSender, which only does
     94        # non-streaming, i.e. PullProducer, where the receiver/consumer must
     95        # ask explicitly for each chunk of data. There are only two places in
     96        # the Twisted codebase that can't handle streaming=False, both of
     97        # which are in the upload path for an FTP/SFTP server
     98        # (protocols.ftp.FileConsumer and
     99        # vfs.adapters.ftp._FileToConsumerAdapter), neither of which is
     100        # likely to be used as the target for a Tahoe download.
     101
     102        d = basic.FileSender().beginFileTransfer(StringIO(data), consumer)
     103        d.addCallback(lambda lastSent: consumer)
     104        return d
  • src/allmydata/immutable/repairer.py

    diff --git a/src/allmydata/immutable/repairer.py b/src/allmydata/immutable/repairer.py
    index fa6a604..64fb9a1 100644
    a b  
    11from zope.interface import implements
    22from twisted.internet import defer
    33from allmydata.storage.server import si_b2a
    4 from allmydata.util import log, observer
    5 from allmydata.util.assertutil import precondition, _assert
    6 from allmydata.uri import CHKFileVerifierURI
    7 from allmydata.interfaces import IEncryptedUploadable, IDownloadTarget
    8 from twisted.internet.interfaces import IConsumer
     4from allmydata.util import log, consumer
     5from allmydata.util.assertutil import precondition
     6from allmydata.interfaces import IEncryptedUploadable
    97
    10 from allmydata.immutable import download, upload
    11 
    12 import collections
     8from allmydata.immutable import upload
    139
    1410class Repairer(log.PrefixingLogMixin):
     11    implements(IEncryptedUploadable)
    1512    """I generate any shares which were not available and upload them to
    1613    servers.
    1714
    class Repairer(log.PrefixingLogMixin): 
    4340    cancelled (by invoking its raise_if_cancelled() method).
    4441    """
    4542
    46     def __init__(self, storage_broker, secret_holder, verifycap, monitor):
    47         assert precondition(isinstance(verifycap, CHKFileVerifierURI))
    48 
    49         logprefix = si_b2a(verifycap.get_storage_index())[:5]
     43    def __init__(self, filenode, storage_broker, secret_holder, monitor):
     44        logprefix = si_b2a(filenode.get_storage_index())[:5]
    5045        log.PrefixingLogMixin.__init__(self, "allmydata.immutable.repairer",
    5146                                       prefix=logprefix)
    52 
     47        self._filenode = filenode
    5348        self._storage_broker = storage_broker
    5449        self._secret_holder = secret_holder
    55         self._verifycap = verifycap
    5650        self._monitor = monitor
     51        self._offset = 0
    5752
    5853    def start(self):
    5954        self.log("starting repair")
    60         duc = DownUpConnector()
    61         dl = download.CiphertextDownloader(self._storage_broker,
    62                                            self._verifycap, target=duc,
    63                                            monitor=self._monitor)
    64         ul = upload.CHKUploader(self._storage_broker, self._secret_holder)
    65 
    66         d = defer.Deferred()
    67 
    68         # If the upload or the download fails or is stopped, then the repair
    69         # failed.
    70         def _errb(f):
    71             d.errback(f)
    72             return None
    73 
    74         # If the upload succeeds, then the repair has succeeded.
    75         def _cb(res):
    76             d.callback(res)
    77         ul.start(duc).addCallbacks(_cb, _errb)
    78 
    79         # If the download fails or is stopped, then the repair failed.
    80         d2 = dl.start()
    81         d2.addErrback(_errb)
    82 
    83         # We ignore the callback from d2.  Is this right?  Ugh.
    84 
     55        d = self._filenode.get_segment_size()
     56        def _got_segsize(segsize):
     57            vcap = self._filenode.get_verify_cap()
     58            k = vcap.needed_shares
     59            N = vcap.total_shares
     60            happy = upload.BaseUploadable.default_encoding_param_happy
     61            self._encodingparams = (k, happy, N, segsize)
     62            ul = upload.CHKUploader(self._storage_broker, self._secret_holder)
     63            return ul.start(self) # I am the IEncryptedUploadable
     64        d.addCallback(_got_segsize)
    8565        return d
    8666
    87 class DownUpConnector(log.PrefixingLogMixin):
    88     implements(IEncryptedUploadable, IDownloadTarget, IConsumer)
    89     """I act like an 'encrypted uploadable' -- something that a local
    90     uploader can read ciphertext from in order to upload the ciphertext.
    91     However, unbeknownst to the uploader, I actually download the ciphertext
    92     from a CiphertextDownloader instance as it is needed.
    93 
    94     On the other hand, I act like a 'download target' -- something that a
    95     local downloader can write ciphertext to as it downloads the ciphertext.
    96     That downloader doesn't realize, of course, that I'm just turning around
    97     and giving the ciphertext to the uploader."""
    98 
    99     # The theory behind this class is nice: just satisfy two separate
    100     # interfaces. The implementation is slightly horrible, because of
    101     # "impedance mismatch" -- the downloader expects to be able to
    102     # synchronously push data in, and the uploader expects to be able to read
    103     # data out with a "read(THIS_SPECIFIC_LENGTH)" which returns a deferred.
    104     # The two interfaces have different APIs for pausing/unpausing. The
    105     # uploader requests metadata like size and encodingparams which the
    106     # downloader provides either eventually or not at all (okay I just now
    107     # extended the downloader to provide encodingparams). Most of this
    108     # slightly horrible code would disappear if CiphertextDownloader just
    109     # used this object as an IConsumer (plus maybe a couple of other methods)
    110     # and if the Uploader simply expected to be treated as an IConsumer (plus
    111     # maybe a couple of other things).
    112 
    113     def __init__(self, buflim=2**19):
    114         """If we're already holding at least buflim bytes, then tell the
    115         downloader to pause until we have less than buflim bytes."""
    116         log.PrefixingLogMixin.__init__(self, "allmydata.immutable.repairer")
    117         self.buflim = buflim
    118         self.bufs = collections.deque() # list of strings
    119         self.bufsiz = 0 # how many bytes total in bufs
    120 
    121         # list of deferreds which will fire with the requested ciphertext
    122         self.next_read_ds = collections.deque()
    123 
    124         # how many bytes of ciphertext were requested by each deferred
    125         self.next_read_lens = collections.deque()
    126 
    127         self._size_osol = observer.OneShotObserverList()
    128         self._encodingparams_osol = observer.OneShotObserverList()
    129         self._storageindex_osol = observer.OneShotObserverList()
    130         self._closed_to_pusher = False
    131 
    132         # once seg size is available, the following attribute will be created
    133         # to hold it:
    134 
    135         # self.encodingparams # (provided by the object which is pushing data
    136         # into me, required by the object which is pulling data out of me)
    137 
    138         # open() will create the following attribute:
    139         # self.size # size of the whole file (provided by the object which is
    140         # pushing data into me, required by the object which is pulling data
    141         # out of me)
    142 
    143         # set_upload_status() will create the following attribute:
    144 
    145         # self.upload_status # XXX do we need to actually update this? Is
    146         # anybody watching the results during a repair?
    147 
    148     def _satisfy_reads_if_possible(self):
    149         assert bool(self.next_read_ds) == bool(self.next_read_lens)
    150         while self.next_read_ds and ((self.bufsiz >= self.next_read_lens[0])
    151                                      or self._closed_to_pusher):
    152             nrd = self.next_read_ds.popleft()
    153             nrl = self.next_read_lens.popleft()
    154 
    155             # Pick out the requested number of bytes from self.bufs, turn it
    156             # into a string, and callback the deferred with that.
    157             res = []
    158             ressize = 0
    159             while ressize < nrl and self.bufs:
    160                 nextbuf = self.bufs.popleft()
    161                 res.append(nextbuf)
    162                 ressize += len(nextbuf)
    163                 if ressize > nrl:
    164                     extra = ressize - nrl
    165                     self.bufs.appendleft(nextbuf[:-extra])
    166                     res[-1] = nextbuf[:-extra]
    167             assert _assert(sum(len(x) for x in res) <= nrl, [len(x) for x in res], nrl)
    168             assert _assert(sum(len(x) for x in res) == nrl or self._closed_to_pusher, [len(x) for x in res], nrl)
    169             self.bufsiz -= nrl
    170             if self.bufsiz < self.buflim and self.producer:
    171                 self.producer.resumeProducing()
    172             nrd.callback(res)
    173 
    174     # methods to satisfy the IConsumer and IDownloadTarget interfaces. (From
    175     # the perspective of a downloader I am an IDownloadTarget and an
    176     # IConsumer.)
    177     def registerProducer(self, producer, streaming):
    178         assert streaming # We know how to handle only streaming producers.
    179         self.producer = producer # the downloader
    180     def unregisterProducer(self):
    181         self.producer = None
    182     def open(self, size):
    183         self.size = size
    184         self._size_osol.fire(self.size)
    185     def set_encodingparams(self, encodingparams):
    186         self.encodingparams = encodingparams
    187         self._encodingparams_osol.fire(self.encodingparams)
    188     def set_storageindex(self, storageindex):
    189         self.storageindex = storageindex
    190         self._storageindex_osol.fire(self.storageindex)
    191     def write(self, data):
    192         precondition(data) # please don't write empty strings
    193         self.bufs.append(data)
    194         self.bufsiz += len(data)
    195         self._satisfy_reads_if_possible()
    196         if self.bufsiz >= self.buflim and self.producer:
    197             self.producer.pauseProducing()
    198     def finish(self):
    199         pass
    200     def close(self):
    201         self._closed_to_pusher = True
    202         # Any reads which haven't been satisfied by now are going to
    203         # have to be satisfied with short reads.
    204         self._satisfy_reads_if_possible()
    20567
    20668    # methods to satisfy the IEncryptedUploader interface
    20769    # (From the perspective of an uploader I am an IEncryptedUploadable.)
    20870    def set_upload_status(self, upload_status):
    20971        self.upload_status = upload_status
    21072    def get_size(self):
    211         if hasattr(self, 'size'): # attribute created by self.open()
    212             return defer.succeed(self.size)
    213         else:
    214             return self._size_osol.when_fired()
     73        size = self._filenode.get_size()
     74        assert size is not None
     75        return defer.succeed(size)
    21576    def get_all_encoding_parameters(self):
    216         # We have to learn the encoding params from pusher.
    217         if hasattr(self, 'encodingparams'):
    218             # attribute created by self.set_encodingparams()
    219             return defer.succeed(self.encodingparams)
    220         else:
    221             return self._encodingparams_osol.when_fired()
     77        return defer.succeed(self._encodingparams)
    22278    def read_encrypted(self, length, hash_only):
    223         """Returns a deferred which eventually fired with the requested
    224         ciphertext."""
     79        """Returns a deferred which eventually fires with the requested
     80        ciphertext, as a list of strings."""
    22581        precondition(length) # please don't ask to read 0 bytes
    226         d = defer.Deferred()
    227         self.next_read_ds.append(d)
    228         self.next_read_lens.append(length)
    229         self._satisfy_reads_if_possible()
     82        mc = consumer.MemoryConsumer()
     83        d = self._filenode.read(mc, self._offset, length)
     84        self._offset += length
     85        d.addCallback(lambda ign: mc.chunks)
    23086        return d
    23187    def get_storage_index(self):
    232         # We have to learn the storage index from pusher.
    233         if hasattr(self, 'storageindex'):
    234             # attribute created by self.set_storageindex()
    235             return defer.succeed(self.storageindex)
    236         else:
    237             return self._storageindex.when_fired()
     88        return self._filenode.get_storage_index()
     89    def close(self):
     90        pass
  • src/allmydata/immutable/upload.py

    diff --git a/src/allmydata/immutable/upload.py b/src/allmydata/immutable/upload.py
    index dc46800..a3f8c92 100644
    a b from allmydata.util.assertutil import precondition 
    2020from allmydata.util.rrefutil import add_version_to_remote_reference
    2121from allmydata.interfaces import IUploadable, IUploader, IUploadResults, \
    2222     IEncryptedUploadable, RIEncryptedUploadable, IUploadStatus, \
    23      NoServersError, InsufficientVersionError, UploadUnhappinessError
     23     NoServersError, InsufficientVersionError, UploadUnhappinessError, \
     24     DEFAULT_MAX_SEGMENT_SIZE
    2425from allmydata.immutable import layout
    2526from pycryptopp.cipher.aes import AES
    2627
    class AssistedUploader: 
    12051206        return self._upload_status
    12061207
    12071208class BaseUploadable:
    1208     default_max_segment_size = 128*KiB # overridden by max_segment_size
     1209    # this is overridden by max_segment_size
     1210    default_max_segment_size = DEFAULT_MAX_SEGMENT_SIZE
    12091211    default_encoding_param_k = 3 # overridden by encoding_parameters
    12101212    default_encoding_param_happy = 7
    12111213    default_encoding_param_n = 10
  • src/allmydata/interfaces.py

    diff --git a/src/allmydata/interfaces.py b/src/allmydata/interfaces.py
    index 4cfe9c9..3a7fa7f 100644
    a b WriteEnablerSecret = Hash # used to protect mutable bucket modifications 
    2424LeaseRenewSecret = Hash # used to protect bucket lease renewal requests
    2525LeaseCancelSecret = Hash # used to protect bucket lease cancellation requests
    2626
     27KiB = 1024
     28DEFAULT_MAX_SEGMENT_SIZE = 128*KiB
     29
    2730class RIStubClient(RemoteInterface):
    2831    """Each client publishes a service announcement for a dummy object called
    2932    the StubClient. This object doesn't actually offer any services, but the
  • src/allmydata/nodemaker.py

    diff --git a/src/allmydata/nodemaker.py b/src/allmydata/nodemaker.py
    index c852f68..3b74d90 100644
    a b  
    11import weakref
    22from zope.interface import implements
    33from allmydata.interfaces import INodeMaker
    4 from allmydata.immutable.filenode import ImmutableFileNode, LiteralFileNode
     4from allmydata.immutable.literal import LiteralFileNode
     5from allmydata.immutable.filenode import ImmutableFileNode, CiphertextFileNode
    56from allmydata.immutable.upload import Data
    67from allmydata.mutable.filenode import MutableFileNode
    78from allmydata.dirnode import DirectoryNode, pack_children
    class NodeMaker: 
    1213    implements(INodeMaker)
    1314
    1415    def __init__(self, storage_broker, secret_holder, history,
    15                  uploader, downloader, download_cache_dirman,
     16                 uploader, terminator,
    1617                 default_encoding_parameters, key_generator):
    1718        self.storage_broker = storage_broker
    1819        self.secret_holder = secret_holder
    1920        self.history = history
    2021        self.uploader = uploader
    21         self.downloader = downloader
    22         self.download_cache_dirman = download_cache_dirman
     22        self.terminator = terminator
    2323        self.default_encoding_parameters = default_encoding_parameters
    2424        self.key_generator = key_generator
    2525
    class NodeMaker: 
    2929        return LiteralFileNode(cap)
    3030    def _create_immutable(self, cap):
    3131        return ImmutableFileNode(cap, self.storage_broker, self.secret_holder,
    32                                  self.downloader, self.history,
    33                                  self.download_cache_dirman)
     32                                 self.terminator, self.history)
     33    def _create_immutable_verifier(self, cap):
     34        return CiphertextFileNode(cap, self.storage_broker, self.secret_holder,
     35                                  self.terminator, self.history)
    3436    def _create_mutable(self, cap):
    3537        n = MutableFileNode(self.storage_broker, self.secret_holder,
    3638                            self.default_encoding_parameters,
    class NodeMaker: 
    7375            return self._create_lit(cap)
    7476        if isinstance(cap, uri.CHKFileURI):
    7577            return self._create_immutable(cap)
     78        if isinstance(cap, uri.CHKFileVerifierURI):
     79            return self._create_immutable_verifier(cap)
    7680        if isinstance(cap, (uri.ReadonlySSKFileURI, uri.WriteableSSKFileURI)):
    7781            return self._create_mutable(cap)
    7882        if isinstance(cap, (uri.DirectoryURI,
  • src/allmydata/test/no_network.py

    diff --git a/src/allmydata/test/no_network.py b/src/allmydata/test/no_network.py
    index 771dffd..a1c475d 100644
    a b class NoNetworkGrid(service.MultiService): 
    223223        fileutil.make_dirs(serverdir)
    224224        ss = StorageServer(serverdir, serverid, stats_provider=SimpleStats(),
    225225                           readonly_storage=readonly)
     226        ss._no_network_server_number = i
    226227        return ss
    227228
    228229    def add_server(self, i, ss):
    class GridTestMixin: 
    319320                    pass
    320321        return sorted(shares)
    321322
     323    def copy_shares(self, uri):
     324        shares = {}
     325        for (shnum, serverid, sharefile) in self.find_shares(uri):
     326            shares[sharefile] = open(sharefile, "rb").read()
     327        return shares
     328
     329    def restore_all_shares(self, shares):
     330        for sharefile, data in shares.items():
     331            open(sharefile, "wb").write(data)
     332
    322333    def delete_share(self, (shnum, serverid, sharefile)):
    323334        os.unlink(sharefile)
    324335
    class GridTestMixin: 
    339350                corruptdata = corruptor(sharedata, debug=debug)
    340351                open(i_sharefile, "wb").write(corruptdata)
    341352
     353    def corrupt_all_shares(self, uri, corruptor, debug=False):
     354        for (i_shnum, i_serverid, i_sharefile) in self.find_shares(uri):
     355            sharedata = open(i_sharefile, "rb").read()
     356            corruptdata = corruptor(sharedata, debug=debug)
     357            open(i_sharefile, "wb").write(corruptdata)
     358
    342359    def GET(self, urlpath, followRedirect=False, return_response=False,
    343360            method="GET", clientnum=0, **kwargs):
    344361        # if return_response=True, this fires with (data, statuscode,
  • src/allmydata/test/test_cli.py

    diff --git a/src/allmydata/test/test_cli.py b/src/allmydata/test/test_cli.py
    index cec32e4..1e88053 100644
    a b class Errors(GridTestMixin, CLITestMixin, unittest.TestCase): 
    23002300            self.delete_shares_numbered(ur.uri, range(1,10))
    23012301        d.addCallback(_stash_bad)
    23022302
     2303        # the download is abandoned as soon as it's clear that we won't get
     2304        # enough shares. The one remaining share might be in either the
     2305        # COMPLETE or the PENDING state.
     2306        in_complete_msg = "ran out of shares: 1 complete, 0 pending, 0 overdue, 0 unused, need 3"
     2307        in_pending_msg = "ran out of shares: 0 complete, 1 pending, 0 overdue, 0 unused, need 3"
     2308
    23032309        d.addCallback(lambda ign: self.do_cli("get", self.uri_1share))
    23042310        def _check1((rc, out, err)):
    23052311            self.failIfEqual(rc, 0)
    23062312            self.failUnless("410 Gone" in err, err)
    23072313            self.failUnlessIn("NotEnoughSharesError: ", err)
    2308             self.failUnlessIn("Failed to get enough shareholders: have 1, need 3", err)
     2314            self.failUnless(in_complete_msg in err or in_pending_msg in err,
     2315                            err)
    23092316        d.addCallback(_check1)
    23102317
    23112318        targetf = os.path.join(self.basedir, "output")
    class Errors(GridTestMixin, CLITestMixin, unittest.TestCase): 
    23142321            self.failIfEqual(rc, 0)
    23152322            self.failUnless("410 Gone" in err, err)
    23162323            self.failUnlessIn("NotEnoughSharesError: ", err)
    2317             self.failUnlessIn("Failed to get enough shareholders: have 1, need 3", err)
     2324            self.failUnless(in_complete_msg in err or in_pending_msg in err,
     2325                            err)
    23182326            self.failIf(os.path.exists(targetf))
    23192327        d.addCallback(_check2)
    23202328
  • src/allmydata/test/test_dirnode.py

    diff --git a/src/allmydata/test/test_dirnode.py b/src/allmydata/test/test_dirnode.py
    index 7d8d66d..8122def 100644
    a b class Packing(testutil.ReallyEqualMixin, unittest.TestCase): 
    12021202    def test_unpack_and_pack_behavior(self):
    12031203        known_tree = b32decode(self.known_tree)
    12041204        nodemaker = NodeMaker(None, None, None,
    1205                               None, None, None,
     1205                              None, None,
    12061206                              {"k": 3, "n": 10}, None)
    12071207        write_uri = "URI:SSK-RO:e3mdrzfwhoq42hy5ubcz6rp3o4:ybyibhnp3vvwuq2vaw2ckjmesgkklfs6ghxleztqidihjyofgw7q"
    12081208        filenode = nodemaker.create_from_cap(write_uri)
    class Packing(testutil.ReallyEqualMixin, unittest.TestCase): 
    12641264        return kids
    12651265
    12661266    def test_deep_immutable(self):
    1267         nm = NodeMaker(None, None, None, None, None, None, {"k": 3, "n": 10},
    1268                        None)
     1267        nm = NodeMaker(None, None, None, None, None, {"k": 3, "n": 10}, None)
    12691268        fn = MinimalFakeMutableFile()
    12701269
    12711270        kids = self._make_kids(nm, ["imm", "lit", "write", "read",
    class FakeNodeMaker(NodeMaker): 
    13591358class FakeClient2(Client):
    13601359    def __init__(self):
    13611360        self.nodemaker = FakeNodeMaker(None, None, None,
    1362                                        None, None, None,
     1361                                       None, None,
    13631362                                       {"k":3,"n":10}, None)
    13641363    def create_node_from_uri(self, rwcap, rocap):
    13651364        return self.nodemaker.create_from_cap(rwcap, rocap)
    class Deleter(GridTestMixin, testutil.ReallyEqualMixin, unittest.TestCase): 
    16431642        def _do_delete(ignored):
    16441643            nm = UCWEingNodeMaker(c0.storage_broker, c0._secret_holder,
    16451644                                  c0.get_history(), c0.getServiceNamed("uploader"),
    1646                                   c0.downloader,
    1647                                   c0.download_cache_dirman,
     1645                                  c0.terminator,
    16481646                                  c0.get_encoding_parameters(),
    16491647                                  c0._key_generator)
    16501648            n = nm.create_from_cap(self.root_uri)
  • src/allmydata/test/test_download.py

    diff --git a/src/allmydata/test/test_download.py b/src/allmydata/test/test_download.py
    index b54bf01..520eaf2 100644
    a b  
    55
    66import os
    77from twisted.trial import unittest
     8from twisted.internet import defer, reactor
    89from allmydata import uri
    910from allmydata.storage.server import storage_index_to_dir
    10 from allmydata.util import base32, fileutil
    11 from allmydata.util.consumer import download_to_data
    12 from allmydata.immutable import upload
     11from allmydata.util import base32, fileutil, spans, log
     12from allmydata.util.consumer import download_to_data, MemoryConsumer
     13from allmydata.immutable import upload, layout
    1314from allmydata.test.no_network import GridTestMixin
     15from allmydata.test.common import ShouldFailMixin
     16from allmydata.interfaces import NotEnoughSharesError, NoSharesError
     17from allmydata.immutable.downloader.common import BadSegmentNumberError, \
     18     BadCiphertextHashError, DownloadStopped
     19from allmydata.codec import CRSDecoder
     20from foolscap.eventual import fireEventually, flushEventualQueue
    1421
    1522plaintext = "This is a moderate-sized file.\n" * 10
    1623mutable_plaintext = "This is a moderate-sized mutable file.\n" * 10
    mutable_shares = { 
    6875}
    6976#--------- END stored_shares.py ----------------
    7077
    71 class DownloadTest(GridTestMixin, unittest.TestCase):
    72     timeout = 2400 # It takes longer than 240 seconds on Zandr's ARM box.
    73     def test_download(self):
    74         self.basedir = self.mktemp()
    75         self.set_up_grid()
    76         self.c0 = self.g.clients[0]
    77 
    78         # do this to create the shares
    79         #return self.create_shares()
    80 
    81         self.load_shares()
    82         d = self.download_immutable()
    83         d.addCallback(self.download_mutable)
    84         return d
     78class _Base(GridTestMixin, ShouldFailMixin):
    8579
    8680    def create_shares(self, ignored=None):
    8781        u = upload.Data(plaintext, None)
    class DownloadTest(GridTestMixin, unittest.TestCase): 
    178172        def _got_data(data):
    179173            self.failUnlessEqual(data, plaintext)
    180174        d.addCallback(_got_data)
     175        # make sure we can use the same node twice
     176        d.addCallback(lambda ign: download_to_data(n))
     177        d.addCallback(_got_data)
    181178        return d
    182179
    183180    def download_mutable(self, ignored=None):
    class DownloadTest(GridTestMixin, unittest.TestCase): 
    188185        d.addCallback(_got_data)
    189186        return d
    190187
     188class DownloadTest(_Base, unittest.TestCase):
     189    timeout = 2400 # It takes longer than 240 seconds on Zandr's ARM box.
     190    def test_download(self):
     191        self.basedir = self.mktemp()
     192        self.set_up_grid()
     193        self.c0 = self.g.clients[0]
     194
     195        # do this to create the shares
     196        #return self.create_shares()
     197
     198        self.load_shares()
     199        d = self.download_immutable()
     200        d.addCallback(self.download_mutable)
     201        return d
     202
     203    def test_download_failover(self):
     204        self.basedir = self.mktemp()
     205        self.set_up_grid()
     206        self.c0 = self.g.clients[0]
     207
     208        self.load_shares()
     209        si = uri.from_string(immutable_uri).get_storage_index()
     210        si_dir = storage_index_to_dir(si)
     211
     212        n = self.c0.create_node_from_uri(immutable_uri)
     213        d = download_to_data(n)
     214        def _got_data(data):
     215            self.failUnlessEqual(data, plaintext)
     216        d.addCallback(_got_data)
     217
     218        def _clobber_some_shares(ign):
     219            # find the three shares that were used, and delete them. Then
     220            # download again, forcing the downloader to fail over to other
     221            # shares
     222            for s in n._cnode._node._shares:
     223                for clientnum in immutable_shares:
     224                    for shnum in immutable_shares[clientnum]:
     225                        if s._shnum == shnum:
     226                            fn = os.path.join(self.get_serverdir(clientnum),
     227                                              "shares", si_dir, str(shnum))
     228                            os.unlink(fn)
     229        d.addCallback(_clobber_some_shares)
     230        d.addCallback(lambda ign: download_to_data(n))
     231        d.addCallback(_got_data)
     232
     233        def _clobber_most_shares(ign):
     234            # delete all but one of the shares that are still alive
     235            live_shares = [s for s in n._cnode._node._shares if s.is_alive()]
     236            save_me = live_shares[0]._shnum
     237            for clientnum in immutable_shares:
     238                for shnum in immutable_shares[clientnum]:
     239                    if shnum == save_me:
     240                        continue
     241                    fn = os.path.join(self.get_serverdir(clientnum),
     242                                      "shares", si_dir, str(shnum))
     243                    if os.path.exists(fn):
     244                        os.unlink(fn)
     245            # now the download should fail with NotEnoughSharesError
     246            return self.shouldFail(NotEnoughSharesError, "1shares", None,
     247                                   download_to_data, n)
     248        d.addCallback(_clobber_most_shares)
     249
     250        def _clobber_all_shares(ign):
     251            # delete the last remaining share
     252            for clientnum in immutable_shares:
     253                for shnum in immutable_shares[clientnum]:
     254                    fn = os.path.join(self.get_serverdir(clientnum),
     255                                      "shares", si_dir, str(shnum))
     256                    if os.path.exists(fn):
     257                        os.unlink(fn)
     258            # now a new download should fail with NoSharesError. We want a
     259            # new ImmutableFileNode so it will forget about the old shares.
     260            # If we merely called create_node_from_uri() without first
     261            # dereferencing the original node, the NodeMaker's _node_cache
     262            # would give us back the old one.
     263            n = None
     264            n = self.c0.create_node_from_uri(immutable_uri)
     265            return self.shouldFail(NoSharesError, "0shares", None,
     266                                   download_to_data, n)
     267        d.addCallback(_clobber_all_shares)
     268        return d
     269
     270    def test_lost_servers(self):
     271        # while downloading a file (after seg[0], before seg[1]), lose the
     272        # three servers that we were using. The download should switch over
     273        # to other servers.
     274        self.basedir = self.mktemp()
     275        self.set_up_grid()
     276        self.c0 = self.g.clients[0]
     277
     278        # upload a file with multiple segments, so we can catch the download
     279        # in the middle.
     280        u = upload.Data(plaintext, None)
     281        u.max_segment_size = 70 # 5 segs
     282        d = self.c0.upload(u)
     283        def _uploaded(ur):
     284            self.uri = ur.uri
     285            self.n = self.c0.create_node_from_uri(self.uri)
     286            return download_to_data(self.n)
     287        d.addCallback(_uploaded)
     288        def _got_data(data):
     289            self.failUnlessEqual(data, plaintext)
     290        d.addCallback(_got_data)
     291        def _kill_some_servers():
     292            # find the three shares that were used, and delete them. Then
     293            # download again, forcing the downloader to fail over to other
     294            # shares
     295            servers = []
     296            shares = sorted([s._shnum for s in self.n._cnode._node._shares])
     297            self.failUnlessEqual(shares, [0,1,2])
     298            # break the RIBucketReader references
     299            for s in self.n._cnode._node._shares:
     300                s._rref.broken = True
     301                for servernum in immutable_shares:
     302                    for shnum in immutable_shares[servernum]:
     303                        if s._shnum == shnum:
     304                            ss = self.g.servers_by_number[servernum]
     305                            servers.append(ss)
     306            # and, for good measure, break the RIStorageServer references
     307            # too, just in case the downloader gets more aggressive in the
     308            # future and tries to re-fetch the same share.
     309            for ss in servers:
     310                wrapper = self.g.servers_by_id[ss.my_nodeid]
     311                wrapper.broken = True
     312        def _download_again(ign):
     313            c = StallingConsumer(_kill_some_servers)
     314            return self.n.read(c)
     315        d.addCallback(_download_again)
     316        def _check_failover(c):
     317            self.failUnlessEqual("".join(c.chunks), plaintext)
     318            shares = sorted([s._shnum for s in self.n._cnode._node._shares])
     319            # we should now be using more shares than we were before
     320            self.failIfEqual(shares, [0,1,2])
     321        d.addCallback(_check_failover)
     322        return d
     323
     324    def test_badguess(self):
     325        self.basedir = self.mktemp()
     326        self.set_up_grid()
     327        self.c0 = self.g.clients[0]
     328        self.load_shares()
     329        n = self.c0.create_node_from_uri(immutable_uri)
     330
     331        # Cause the downloader to guess a segsize that's too low, so it will
     332        # ask for a segment number that's too high (beyond the end of the
     333        # real list, causing BadSegmentNumberError), to exercise
     334        # Segmentation._retry_bad_segment
     335
     336        con1 = MemoryConsumer()
     337        n._cnode._node._build_guessed_tables(90)
     338        # plaintext size of 310 bytes, wrong-segsize of 90 bytes, will make
     339        # us think that file[180:200] is in the third segment (segnum=2), but
     340        # really there's only one segment
     341        d = n.read(con1, 180, 20)
     342        def _done(res):
     343            self.failUnlessEqual("".join(con1.chunks), plaintext[180:200])
     344        d.addCallback(_done)
     345        return d
     346
     347    def test_simultaneous_badguess(self):
     348        self.basedir = self.mktemp()
     349        self.set_up_grid()
     350        self.c0 = self.g.clients[0]
     351
     352        # upload a file with multiple segments, and a non-default segsize, to
     353        # exercise the offset-guessing code. Because we don't tell the
     354        # downloader about the unusual segsize, it will guess wrong, and have
     355        # to do extra roundtrips to get the correct data.
     356        u = upload.Data(plaintext, None)
     357        u.max_segment_size = 70 # 5 segs, 8-wide hashtree
     358        con1 = MemoryConsumer()
     359        con2 = MemoryConsumer()
     360        d = self.c0.upload(u)
     361        def _uploaded(ur):
     362            n = self.c0.create_node_from_uri(ur.uri)
     363            d1 = n.read(con1, 70, 20)
     364            d2 = n.read(con2, 140, 20)
     365            return defer.gatherResults([d1,d2])
     366        d.addCallback(_uploaded)
     367        def _done(res):
     368            self.failUnlessEqual("".join(con1.chunks), plaintext[70:90])
     369            self.failUnlessEqual("".join(con2.chunks), plaintext[140:160])
     370        d.addCallback(_done)
     371        return d
     372
     373    def test_simultaneous_goodguess(self):
     374        self.basedir = self.mktemp()
     375        self.set_up_grid()
     376        self.c0 = self.g.clients[0]
     377
     378        # upload a file with multiple segments, and a non-default segsize, to
     379        # exercise the offset-guessing code. This time we *do* tell the
     380        # downloader about the unusual segsize, so it can guess right.
     381        u = upload.Data(plaintext, None)
     382        u.max_segment_size = 70 # 5 segs, 8-wide hashtree
     383        con1 = MemoryConsumer()
     384        con2 = MemoryConsumer()
     385        d = self.c0.upload(u)
     386        def _uploaded(ur):
     387            n = self.c0.create_node_from_uri(ur.uri)
     388            n._cnode._node._build_guessed_tables(u.max_segment_size)
     389            d1 = n.read(con1, 70, 20)
     390            #d2 = n.read(con2, 140, 20) # XXX
     391            d2 = defer.succeed(None)
     392            return defer.gatherResults([d1,d2])
     393        d.addCallback(_uploaded)
     394        def _done(res):
     395            self.failUnlessEqual("".join(con1.chunks), plaintext[70:90])
     396            self.failUnlessEqual("".join(con2.chunks), plaintext[140:160])
     397        #d.addCallback(_done)
     398        return d
     399
     400    def test_sequential_goodguess(self):
     401        self.basedir = self.mktemp()
     402        self.set_up_grid()
     403        self.c0 = self.g.clients[0]
     404        data = (plaintext*100)[:30000] # multiple of k
     405
     406        # upload a file with multiple segments, and a non-default segsize, to
     407        # exercise the offset-guessing code. This time we *do* tell the
     408        # downloader about the unusual segsize, so it can guess right.
     409        u = upload.Data(data, None)
     410        u.max_segment_size = 6000 # 5 segs, 8-wide hashtree
     411        con1 = MemoryConsumer()
     412        con2 = MemoryConsumer()
     413        d = self.c0.upload(u)
     414        def _uploaded(ur):
     415            n = self.c0.create_node_from_uri(ur.uri)
     416            n._cnode._node._build_guessed_tables(u.max_segment_size)
     417            d = n.read(con1, 12000, 20)
     418            def _read1(ign):
     419                self.failUnlessEqual("".join(con1.chunks), data[12000:12020])
     420                return n.read(con2, 24000, 20)
     421            d.addCallback(_read1)
     422            def _read2(ign):
     423                self.failUnlessEqual("".join(con2.chunks), data[24000:24020])
     424            d.addCallback(_read2)
     425            return d
     426        d.addCallback(_uploaded)
     427        return d
     428
     429
     430    def test_simultaneous_get_blocks(self):
     431        self.basedir = self.mktemp()
     432        self.set_up_grid()
     433        self.c0 = self.g.clients[0]
     434
     435        self.load_shares()
     436        stay_empty = []
     437
     438        n = self.c0.create_node_from_uri(immutable_uri)
     439        d = download_to_data(n)
     440        def _use_shares(ign):
     441            shares = list(n._cnode._node._shares)
     442            s0 = shares[0]
     443            # make sure .cancel works too
     444            o0 = s0.get_block(0)
     445            o0.subscribe(lambda **kwargs: stay_empty.append(kwargs))
     446            o1 = s0.get_block(0)
     447            o2 = s0.get_block(0)
     448            o0.cancel()
     449            o3 = s0.get_block(1) # state=BADSEGNUM
     450            d1 = defer.Deferred()
     451            d2 = defer.Deferred()
     452            d3 = defer.Deferred()
     453            o1.subscribe(lambda **kwargs: d1.callback(kwargs))
     454            o2.subscribe(lambda **kwargs: d2.callback(kwargs))
     455            o3.subscribe(lambda **kwargs: d3.callback(kwargs))
     456            return defer.gatherResults([d1,d2,d3])
     457        d.addCallback(_use_shares)
     458        def _done(res):
     459            r1,r2,r3 = res
     460            self.failUnlessEqual(r1["state"], "COMPLETE")
     461            self.failUnlessEqual(r2["state"], "COMPLETE")
     462            self.failUnlessEqual(r3["state"], "BADSEGNUM")
     463            self.failUnless("block" in r1)
     464            self.failUnless("block" in r2)
     465            self.failIf(stay_empty)
     466        d.addCallback(_done)
     467        return d
     468
     469    def test_download_no_overrun(self):
     470        self.basedir = self.mktemp()
     471        self.set_up_grid()
     472        self.c0 = self.g.clients[0]
     473
     474        self.load_shares()
     475
     476        # tweak the client's copies of server-version data, so it believes
     477        # that they're old and can't handle reads that overrun the length of
     478        # the share. This exercises a different code path.
     479        for (peerid, rref) in self.c0.storage_broker.get_all_servers():
     480            v1 = rref.version["http://allmydata.org/tahoe/protocols/storage/v1"]
     481            v1["tolerates-immutable-read-overrun"] = False
     482
     483        n = self.c0.create_node_from_uri(immutable_uri)
     484        d = download_to_data(n)
     485        def _got_data(data):
     486            self.failUnlessEqual(data, plaintext)
     487        d.addCallback(_got_data)
     488        return d
     489
     490    def test_download_segment(self):
     491        self.basedir = self.mktemp()
     492        self.set_up_grid()
     493        self.c0 = self.g.clients[0]
     494        self.load_shares()
     495        n = self.c0.create_node_from_uri(immutable_uri)
     496        cn = n._cnode
     497        (d,c) = cn.get_segment(0)
     498        def _got_segment((offset,data,decodetime)):
     499            self.failUnlessEqual(offset, 0)
     500            self.failUnlessEqual(len(data), len(plaintext))
     501        d.addCallback(_got_segment)
     502        return d
     503
     504    def test_download_segment_cancel(self):
     505        self.basedir = self.mktemp()
     506        self.set_up_grid()
     507        self.c0 = self.g.clients[0]
     508        self.load_shares()
     509        n = self.c0.create_node_from_uri(immutable_uri)
     510        cn = n._cnode
     511        (d,c) = cn.get_segment(0)
     512        fired = []
     513        d.addCallback(fired.append)
     514        c.cancel()
     515        d = fireEventually()
     516        d.addCallback(flushEventualQueue)
     517        def _check(ign):
     518            self.failUnlessEqual(fired, [])
     519        d.addCallback(_check)
     520        return d
     521
     522    def test_download_bad_segment(self):
     523        self.basedir = self.mktemp()
     524        self.set_up_grid()
     525        self.c0 = self.g.clients[0]
     526        self.load_shares()
     527        n = self.c0.create_node_from_uri(immutable_uri)
     528        cn = n._cnode
     529        def _try_download():
     530            (d,c) = cn.get_segment(1)
     531            return d
     532        d = self.shouldFail(BadSegmentNumberError, "badseg",
     533                            "segnum=1, numsegs=1",
     534                            _try_download)
     535        return d
     536
     537    def test_download_segment_terminate(self):
     538        self.basedir = self.mktemp()
     539        self.set_up_grid()
     540        self.c0 = self.g.clients[0]
     541        self.load_shares()
     542        n = self.c0.create_node_from_uri(immutable_uri)
     543        cn = n._cnode
     544        (d,c) = cn.get_segment(0)
     545        fired = []
     546        d.addCallback(fired.append)
     547        self.c0.terminator.disownServiceParent()
     548        d = fireEventually()
     549        d.addCallback(flushEventualQueue)
     550        def _check(ign):
     551            self.failUnlessEqual(fired, [])
     552        d.addCallback(_check)
     553        return d
     554
     555    def test_pause(self):
     556        self.basedir = self.mktemp()
     557        self.set_up_grid()
     558        self.c0 = self.g.clients[0]
     559        self.load_shares()
     560        n = self.c0.create_node_from_uri(immutable_uri)
     561        c = PausingConsumer()
     562        d = n.read(c)
     563        def _downloaded(mc):
     564            newdata = "".join(mc.chunks)
     565            self.failUnlessEqual(newdata, plaintext)
     566        d.addCallback(_downloaded)
     567        return d
     568
     569    def test_pause_then_stop(self):
     570        self.basedir = self.mktemp()
     571        self.set_up_grid()
     572        self.c0 = self.g.clients[0]
     573        self.load_shares()
     574        n = self.c0.create_node_from_uri(immutable_uri)
     575        c = PausingAndStoppingConsumer()
     576        d = self.shouldFail(DownloadStopped, "test_pause_then_stop",
     577                            "our Consumer called stopProducing()",
     578                            n.read, c)
     579        return d
     580
     581    def test_stop(self):
     582        # use a download targetthat does an immediate stop (ticket #473)
     583        self.basedir = self.mktemp()
     584        self.set_up_grid()
     585        self.c0 = self.g.clients[0]
     586        self.load_shares()
     587        n = self.c0.create_node_from_uri(immutable_uri)
     588        c = StoppingConsumer()
     589        d = self.shouldFail(DownloadStopped, "test_stop",
     590                            "our Consumer called stopProducing()",
     591                            n.read, c)
     592        return d
     593
     594    def test_download_segment_bad_ciphertext_hash(self):
     595        # The crypttext_hash_tree asserts the integrity of the decoded
     596        # ciphertext, and exists to detect two sorts of problems. The first
     597        # is a bug in zfec decode. The second is the "two-sided t-shirt"
     598        # attack (found by Christian Grothoff), in which a malicious uploader
     599        # creates two sets of shares (one for file A, second for file B),
     600        # uploads a combination of them (shares 0-4 of A, 5-9 of B), and then
     601        # builds an otherwise normal UEB around those shares: their goal is
     602        # to give their victim a filecap which sometimes downloads the good A
     603        # contents, and sometimes the bad B contents, depending upon which
     604        # servers/shares they can get to. Having a hash of the ciphertext
     605        # forces them to commit to exactly one version. (Christian's prize
     606        # for finding this problem was a t-shirt with two sides: the shares
     607        # of file A on the front, B on the back).
     608
     609        # creating a set of shares with this property is too hard, although
     610        # it'd be nice to do so and confirm our fix. (it requires a lot of
     611        # tampering with the uploader). So instead, we just damage the
     612        # decoder. The tail decoder is rebuilt each time, so we need to use a
     613        # file with multiple segments.
     614        self.basedir = self.mktemp()
     615        self.set_up_grid()
     616        self.c0 = self.g.clients[0]
     617
     618        u = upload.Data(plaintext, None)
     619        u.max_segment_size = 60 # 6 segs
     620        d = self.c0.upload(u)
     621        def _uploaded(ur):
     622            n = self.c0.create_node_from_uri(ur.uri)
     623            n._cnode._node._build_guessed_tables(u.max_segment_size)
     624
     625            d = download_to_data(n)
     626            def _break_codec(data):
     627                # the codec isn't created until the UEB is retrieved
     628                node = n._cnode._node
     629                vcap = node._verifycap
     630                k, N = vcap.needed_shares, vcap.total_shares
     631                bad_codec = BrokenDecoder()
     632                bad_codec.set_params(node.segment_size, k, N)
     633                node._codec = bad_codec
     634            d.addCallback(_break_codec)
     635            # now try to download it again. The broken codec will provide
     636            # ciphertext that fails the hash test.
     637            d.addCallback(lambda ign:
     638                          self.shouldFail(BadCiphertextHashError, "badhash",
     639                                          "hash failure in "
     640                                          "ciphertext_hash_tree: segnum=0",
     641                                          download_to_data, n))
     642            return d
     643        d.addCallback(_uploaded)
     644        return d
     645
     646    def OFFtest_download_segment_XXX(self):
     647        self.basedir = self.mktemp()
     648        self.set_up_grid()
     649        self.c0 = self.g.clients[0]
     650
     651        # upload a file with multiple segments, and a non-default segsize, to
     652        # exercise the offset-guessing code. This time we *do* tell the
     653        # downloader about the unusual segsize, so it can guess right.
     654        u = upload.Data(plaintext, None)
     655        u.max_segment_size = 70 # 5 segs, 8-wide hashtree
     656        con1 = MemoryConsumer()
     657        con2 = MemoryConsumer()
     658        d = self.c0.upload(u)
     659        def _uploaded(ur):
     660            n = self.c0.create_node_from_uri(ur.uri)
     661            n._cnode._node._build_guessed_tables(u.max_segment_size)
     662            d1 = n.read(con1, 70, 20)
     663            #d2 = n.read(con2, 140, 20)
     664            d2 = defer.succeed(None)
     665            return defer.gatherResults([d1,d2])
     666        d.addCallback(_uploaded)
     667        def _done(res):
     668            self.failUnlessEqual("".join(con1.chunks), plaintext[70:90])
     669            self.failUnlessEqual("".join(con2.chunks), plaintext[140:160])
     670        #d.addCallback(_done)
     671        return d
     672
     673    def test_duplicate_shares(self):
     674        self.basedir = self.mktemp()
     675        self.set_up_grid()
     676        self.c0 = self.g.clients[0]
     677
     678        self.load_shares()
     679        # make sure everybody has a copy of sh0. The second server contacted
     680        # will report two shares, and the ShareFinder will handle the
     681        # duplicate by attaching both to the same CommonShare instance.
     682        si = uri.from_string(immutable_uri).get_storage_index()
     683        si_dir = storage_index_to_dir(si)
     684        sh0_file = [sharefile
     685                    for (shnum, serverid, sharefile)
     686                    in self.find_shares(immutable_uri)
     687                    if shnum == 0][0]
     688        sh0_data = open(sh0_file, "rb").read()
     689        for clientnum in immutable_shares:
     690            if 0 in immutable_shares[clientnum]:
     691                continue
     692            cdir = self.get_serverdir(clientnum)
     693            target = os.path.join(cdir, "shares", si_dir, "0")
     694            outf = open(target, "wb")
     695            outf.write(sh0_data)
     696            outf.close()
     697
     698        d = self.download_immutable()
     699        return d
     700
     701    def test_verifycap(self):
     702        self.basedir = self.mktemp()
     703        self.set_up_grid()
     704        self.c0 = self.g.clients[0]
     705        self.load_shares()
     706
     707        n = self.c0.create_node_from_uri(immutable_uri)
     708        vcap = n.get_verify_cap().to_string()
     709        vn = self.c0.create_node_from_uri(vcap)
     710        d = download_to_data(vn)
     711        def _got_ciphertext(ciphertext):
     712            self.failUnlessEqual(len(ciphertext), len(plaintext))
     713            self.failIfEqual(ciphertext, plaintext)
     714        d.addCallback(_got_ciphertext)
     715        return d
     716
     717class BrokenDecoder(CRSDecoder):
     718    def decode(self, shares, shareids):
     719        d = CRSDecoder.decode(self, shares, shareids)
     720        def _decoded(buffers):
     721            def _corruptor(s, which):
     722                return s[:which] + chr(ord(s[which])^0x01) + s[which+1:]
     723            buffers[0] = _corruptor(buffers[0], 0) # flip lsb of first byte
     724            return buffers
     725        d.addCallback(_decoded)
     726        return d
     727
     728
     729class PausingConsumer(MemoryConsumer):
     730    def __init__(self):
     731        MemoryConsumer.__init__(self)
     732        self.size = 0
     733        self.writes = 0
     734    def write(self, data):
     735        self.size += len(data)
     736        self.writes += 1
     737        if self.writes <= 2:
     738            # we happen to use 4 segments, and want to avoid pausing on the
     739            # last one (since then the _unpause timer will still be running)
     740            self.producer.pauseProducing()
     741            reactor.callLater(0.1, self._unpause)
     742        return MemoryConsumer.write(self, data)
     743    def _unpause(self):
     744        self.producer.resumeProducing()
     745
     746class PausingAndStoppingConsumer(PausingConsumer):
     747    def write(self, data):
     748        self.producer.pauseProducing()
     749        reactor.callLater(0.5, self._stop)
     750    def _stop(self):
     751        self.producer.stopProducing()
     752
     753class StoppingConsumer(PausingConsumer):
     754    def write(self, data):
     755        self.producer.stopProducing()
     756
     757class StallingConsumer(MemoryConsumer):
     758    def __init__(self, halfway_cb):
     759        MemoryConsumer.__init__(self)
     760        self.halfway_cb = halfway_cb
     761        self.writes = 0
     762    def write(self, data):
     763        self.writes += 1
     764        if self.writes == 1:
     765            self.halfway_cb()
     766        return MemoryConsumer.write(self, data)
     767
     768class Corruption(_Base, unittest.TestCase):
     769
     770    def _corrupt_flip(self, ign, imm_uri, which):
     771        log.msg("corrupt %d" % which)
     772        def _corruptor(s, debug=False):
     773            return s[:which] + chr(ord(s[which])^0x01) + s[which+1:]
     774        self.corrupt_shares_numbered(imm_uri, [0], _corruptor)
     775
     776    def _corrupt_set(self, ign, imm_uri, which, newvalue):
     777        log.msg("corrupt %d" % which)
     778        def _corruptor(s, debug=False):
     779            return s[:which] + chr(newvalue) + s[which+1:]
     780        self.corrupt_shares_numbered(imm_uri, [0], _corruptor)
     781
     782    def test_each_byte(self):
     783        # Setting catalog_detection=True performs an exhaustive test of the
     784        # Downloader's response to corruption in the lsb of each byte of the
     785        # 2070-byte share, with two goals: make sure we tolerate all forms of
     786        # corruption (i.e. don't hang or return bad data), and make a list of
     787        # which bytes can be corrupted without influencing the download
     788        # (since we don't need every byte of the share). That takes 50s to
     789        # run on my laptop and doesn't have any actual asserts, so we don't
     790        # normally do that.
     791        self.catalog_detection = False
     792
     793        self.basedir = "download/Corruption/each_byte"
     794        self.set_up_grid()
     795        self.c0 = self.g.clients[0]
     796
     797        # to exercise the block-hash-tree code properly, we need to have
     798        # multiple segments. We don't tell the downloader about the different
     799        # segsize, so it guesses wrong and must do extra roundtrips.
     800        u = upload.Data(plaintext, None)
     801        u.max_segment_size = 120 # 3 segs, 4-wide hashtree
     802
     803        if self.catalog_detection:
     804            undetected = spans.Spans()
     805
     806        def _download(ign, imm_uri, which, expected):
     807            n = self.c0.create_node_from_uri(imm_uri)
     808            # for this test to work, we need to have a new Node each time.
     809            # Make sure the NodeMaker's weakcache hasn't interfered.
     810            assert not n._cnode._node._shares
     811            d = download_to_data(n)
     812            def _got_data(data):
     813                self.failUnlessEqual(data, plaintext)
     814                shnums = sorted([s._shnum for s in n._cnode._node._shares])
     815                no_sh0 = bool(0 not in shnums)
     816                sh0 = [s for s in n._cnode._node._shares if s._shnum == 0]
     817                sh0_had_corruption = False
     818                if sh0 and sh0[0].had_corruption:
     819                    sh0_had_corruption = True
     820                num_needed = len(n._cnode._node._shares)
     821                if self.catalog_detection:
     822                    detected = no_sh0 or sh0_had_corruption or (num_needed!=3)
     823                    if not detected:
     824                        undetected.add(which, 1)
     825                if expected == "no-sh0":
     826                    self.failIfIn(0, shnums)
     827                elif expected == "0bad-need-3":
     828                    self.failIf(no_sh0)
     829                    self.failUnless(sh0[0].had_corruption)
     830                    self.failUnlessEqual(num_needed, 3)
     831                elif expected == "need-4th":
     832                    self.failIf(no_sh0)
     833                    self.failUnless(sh0[0].had_corruption)
     834                    self.failIfEqual(num_needed, 3)
     835            d.addCallback(_got_data)
     836            return d
     837
     838
     839        d = self.c0.upload(u)
     840        def _uploaded(ur):
     841            imm_uri = ur.uri
     842            self.shares = self.copy_shares(imm_uri)
     843            d = defer.succeed(None)
     844            # 'victims' is a list of corruption tests to run. Each one flips
     845            # the low-order bit of the specified offset in the share file (so
     846            # offset=0 is the MSB of the container version, offset=15 is the
     847            # LSB of the share version, offset=24 is the MSB of the
     848            # data-block-offset, and offset=48 is the first byte of the first
     849            # data-block). Each one also specifies what sort of corruption
     850            # we're expecting to see.
     851            no_sh0_victims = [0,1,2,3] # container version
     852            need3_victims =  [ ] # none currently in this category
     853            # when the offsets are corrupted, the Share will be unable to
     854            # retrieve the data it wants (because it thinks that data lives
     855            # off in the weeds somewhere), and Share treats DataUnavailable
     856            # as abandon-this-share, so in general we'll be forced to look
     857            # for a 4th share.
     858            need_4th_victims = [12,13,14,15, # share version
     859                                24,25,26,27, # offset[data]
     860                                32,33,34,35, # offset[crypttext_hash_tree]
     861                                36,37,38,39, # offset[block_hashes]
     862                                44,45,46,47, # offset[UEB]
     863                                ]
     864            need_4th_victims.append(48) # block data
     865            # when corrupting hash trees, we must corrupt a value that isn't
     866            # directly set from somewhere else. Since we download data from
     867            # seg0, corrupt something on its hash chain, like [2] (the
     868            # right-hand child of the root)
     869            need_4th_victims.append(600+2*32) # block_hashes[2]
     870            # Share.loop is pretty conservative: it abandons the share at the
     871            # first sign of corruption. It doesn't strictly need to be this
     872            # way: if the UEB were corrupt, we could still get good block
     873            # data from that share, as long as there was a good copy of the
     874            # UEB elsewhere. If this behavior is relaxed, then corruption in
     875            # the following fields (which are present in multiple shares)
     876            # should fall into the "need3_victims" case instead of the
     877            # "need_4th_victims" case.
     878            need_4th_victims.append(376+2*32) # crypttext_hash_tree[2]
     879            need_4th_victims.append(824) # share_hashes
     880            need_4th_victims.append(994) # UEB length
     881            need_4th_victims.append(998) # UEB
     882            corrupt_me = ([(i,"no-sh0") for i in no_sh0_victims] +
     883                          [(i, "0bad-need-3") for i in need3_victims] +
     884                          [(i, "need-4th") for i in need_4th_victims])
     885            if self.catalog_detection:
     886                corrupt_me = [(i, "") for i in range(len(self.sh0_orig))]
     887            for i,expected in corrupt_me:
     888                # All these tests result in a successful download. What we're
     889                # measuring is how many shares the downloader had to use.
     890                d.addCallback(self._corrupt_flip, imm_uri, i)
     891                d.addCallback(_download, imm_uri, i, expected)
     892                d.addCallback(lambda ign: self.restore_all_shares(self.shares))
     893                d.addCallback(fireEventually)
     894            corrupt_values = [(3, 2, "no-sh0"),
     895                              (15, 2, "need-4th"), # share looks v2
     896                              ]
     897            for i,newvalue,expected in corrupt_values:
     898                d.addCallback(self._corrupt_set, imm_uri, i, newvalue)
     899                d.addCallback(_download, imm_uri, i, expected)
     900                d.addCallback(lambda ign: self.restore_all_shares(self.shares))
     901                d.addCallback(fireEventually)
     902            return d
     903        d.addCallback(_uploaded)
     904        def _show_results(ign):
     905            print
     906            print ("of [0:%d], corruption ignored in %s" %
     907                   (len(self.sh0_orig), undetected.dump()))
     908        if self.catalog_detection:
     909            d.addCallback(_show_results)
     910            # of [0:2070], corruption ignored in len=1133:
     911            # [4-11],[16-23],[28-31],[152-439],[600-663],[1309-2069]
     912            #  [4-11]: container sizes
     913            #  [16-23]: share block/data sizes
     914            #  [152-375]: plaintext hash tree
     915            #  [376-408]: crypttext_hash_tree[0] (root)
     916            #  [408-439]: crypttext_hash_tree[1] (computed)
     917            #  [600-631]: block hash tree[0] (root)
     918            #  [632-663]: block hash tree[1] (computed)
     919            #  [1309-]: reserved+unused UEB space
     920        return d
     921
     922    def test_failure(self):
     923        # this test corrupts all shares in the same way, and asserts that the
     924        # download fails.
     925
     926        self.basedir = "download/Corruption/failure"
     927        self.set_up_grid()
     928        self.c0 = self.g.clients[0]
     929
     930        # to exercise the block-hash-tree code properly, we need to have
     931        # multiple segments. We don't tell the downloader about the different
     932        # segsize, so it guesses wrong and must do extra roundtrips.
     933        u = upload.Data(plaintext, None)
     934        u.max_segment_size = 120 # 3 segs, 4-wide hashtree
     935
     936        d = self.c0.upload(u)
     937        def _uploaded(ur):
     938            imm_uri = ur.uri
     939            self.shares = self.copy_shares(imm_uri)
     940
     941            corrupt_me = [(48, "block data", "Last failure: None"),
     942                          (600+2*32, "block_hashes[2]", "BadHashError"),
     943                          (376+2*32, "crypttext_hash_tree[2]", "BadHashError"),
     944                          (824, "share_hashes", "BadHashError"),
     945                          ]
     946            def _download(imm_uri):
     947                n = self.c0.create_node_from_uri(imm_uri)
     948                # for this test to work, we need to have a new Node each time.
     949                # Make sure the NodeMaker's weakcache hasn't interfered.
     950                assert not n._cnode._node._shares
     951                return download_to_data(n)
     952
     953            d = defer.succeed(None)
     954            for i,which,substring in corrupt_me:
     955                # All these tests result in a failed download.
     956                d.addCallback(self._corrupt_flip_all, imm_uri, i)
     957                d.addCallback(lambda ign:
     958                              self.shouldFail(NotEnoughSharesError, which,
     959                                              substring,
     960                                              _download, imm_uri))
     961                d.addCallback(lambda ign: self.restore_all_shares(self.shares))
     962                d.addCallback(fireEventually)
     963            return d
     964        d.addCallback(_uploaded)
     965
     966        return d
     967
     968    def _corrupt_flip_all(self, ign, imm_uri, which):
     969        def _corruptor(s, debug=False):
     970            return s[:which] + chr(ord(s[which])^0x01) + s[which+1:]
     971        self.corrupt_all_shares(imm_uri, _corruptor)
     972
     973class DownloadV2(_Base, unittest.TestCase):
     974    # tests which exercise v2-share code. They first upload a file with
     975    # FORCE_V2 set.
     976
     977    def setUp(self):
     978        d = defer.maybeDeferred(_Base.setUp, self)
     979        def _set_force_v2(ign):
     980            self.old_force_v2 = layout.FORCE_V2
     981            layout.FORCE_V2 = True
     982        d.addCallback(_set_force_v2)
     983        return d
     984    def tearDown(self):
     985        layout.FORCE_V2 = self.old_force_v2
     986        return _Base.tearDown(self)
     987
     988    def test_download(self):
     989        self.basedir = self.mktemp()
     990        self.set_up_grid()
     991        self.c0 = self.g.clients[0]
     992
     993        # upload a file
     994        u = upload.Data(plaintext, None)
     995        d = self.c0.upload(u)
     996        def _uploaded(ur):
     997            imm_uri = ur.uri
     998            n = self.c0.create_node_from_uri(imm_uri)
     999            return download_to_data(n)
     1000        d.addCallback(_uploaded)
     1001        return d
     1002
     1003    def test_download_no_overrun(self):
     1004        self.basedir = self.mktemp()
     1005        self.set_up_grid()
     1006        self.c0 = self.g.clients[0]
     1007
     1008        # tweak the client's copies of server-version data, so it believes
     1009        # that they're old and can't handle reads that overrun the length of
     1010        # the share. This exercises a different code path.
     1011        for (peerid, rref) in self.c0.storage_broker.get_all_servers():
     1012            v1 = rref.version["http://allmydata.org/tahoe/protocols/storage/v1"]
     1013            v1["tolerates-immutable-read-overrun"] = False
     1014
     1015        # upload a file
     1016        u = upload.Data(plaintext, None)
     1017        d = self.c0.upload(u)
     1018        def _uploaded(ur):
     1019            imm_uri = ur.uri
     1020            n = self.c0.create_node_from_uri(imm_uri)
     1021            return download_to_data(n)
     1022        d.addCallback(_uploaded)
     1023        return d
     1024
     1025    def OFF_test_no_overrun_corrupt_shver(self): # unnecessary
     1026        self.basedir = self.mktemp()
     1027        self.set_up_grid()
     1028        self.c0 = self.g.clients[0]
     1029
     1030        for (peerid, rref) in self.c0.storage_broker.get_all_servers():
     1031            v1 = rref.version["http://allmydata.org/tahoe/protocols/storage/v1"]
     1032            v1["tolerates-immutable-read-overrun"] = False
     1033
     1034        # upload a file
     1035        u = upload.Data(plaintext, None)
     1036        d = self.c0.upload(u)
     1037        def _uploaded(ur):
     1038            imm_uri = ur.uri
     1039            def _do_corrupt(which, newvalue):
     1040                def _corruptor(s, debug=False):
     1041                    return s[:which] + chr(newvalue) + s[which+1:]
     1042                self.corrupt_shares_numbered(imm_uri, [0], _corruptor)
     1043            _do_corrupt(12+3, 0x00)
     1044            n = self.c0.create_node_from_uri(imm_uri)
     1045            d = download_to_data(n)
     1046            def _got_data(data):
     1047                self.failUnlessEqual(data, plaintext)
     1048            d.addCallback(_got_data)
     1049            return d
     1050        d.addCallback(_uploaded)
     1051        return d
  • src/allmydata/test/test_encode.py

    diff --git a/src/allmydata/test/test_encode.py b/src/allmydata/test/test_encode.py
    index 1108e18..c06fbbd 100644
    a b  
    11from zope.interface import implements
    22from twisted.trial import unittest
    3 from twisted.internet import defer, reactor
     3from twisted.internet import defer
    44from twisted.python.failure import Failure
    55from foolscap.api import fireEventually
    6 from allmydata import hashtree, uri
    7 from allmydata.immutable import encode, upload, download
     6from allmydata import uri
     7from allmydata.immutable import encode, upload, checker
    88from allmydata.util import hashutil
    99from allmydata.util.assertutil import _assert
    10 from allmydata.util.consumer import MemoryConsumer
    11 from allmydata.interfaces import IStorageBucketWriter, IStorageBucketReader, \
    12      NotEnoughSharesError, IStorageBroker, UploadUnhappinessError
    13 from allmydata.monitor import Monitor
    14 import allmydata.test.common_util as testutil
     10from allmydata.util.consumer import download_to_data
     11from allmydata.interfaces import IStorageBucketWriter, IStorageBucketReader
     12from allmydata.test.no_network import GridTestMixin
    1513
    1614class LostPeerError(Exception):
    1715    pass
    class LostPeerError(Exception): 
    1917def flip_bit(good): # flips the last bit
    2018    return good[:-1] + chr(ord(good[-1]) ^ 0x01)
    2119
    22 class FakeStorageBroker:
    23     implements(IStorageBroker)
    24 
    2520class FakeBucketReaderWriterProxy:
    2621    implements(IStorageBucketWriter, IStorageBucketReader)
    2722    # these are used for both reading and writing
    class FakeBucketReaderWriterProxy: 
    5954            self.blocks[segmentnum] = data
    6055        return defer.maybeDeferred(_try)
    6156
    62     def put_plaintext_hashes(self, hashes):
    63         def _try():
    64             assert not self.closed
    65             assert not self.plaintext_hashes
    66             self.plaintext_hashes = hashes
    67         return defer.maybeDeferred(_try)
    68 
    6957    def put_crypttext_hashes(self, hashes):
    7058        def _try():
    7159            assert not self.closed
    class ValidatedExtendedURIProxy(unittest.TestCase): 
    223211        fb = FakeBucketReaderWriterProxy()
    224212        fb.put_uri_extension(uebstring)
    225213        verifycap = uri.CHKFileVerifierURI(storage_index='x'*16, uri_extension_hash=uebhash, needed_shares=self.K, total_shares=self.M, size=self.SIZE)
    226         vup = download.ValidatedExtendedURIProxy(fb, verifycap)
     214        vup = checker.ValidatedExtendedURIProxy(fb, verifycap)
    227215        return vup.start()
    228216
    229217    def _test_accept(self, uebdict):
    class ValidatedExtendedURIProxy(unittest.TestCase): 
    237225
    238226    def _test_reject(self, uebdict):
    239227        d = self._test(uebdict)
    240         d.addBoth(self._should_fail, (KeyError, download.BadURIExtension))
     228        d.addBoth(self._should_fail, (KeyError, checker.BadURIExtension))
    241229        return d
    242230
    243231    def test_accept_minimal(self):
    class Encode(unittest.TestCase): 
    333321
    334322        return d
    335323
    336     # a series of 3*3 tests to check out edge conditions. One axis is how the
    337     # plaintext is divided into segments: kn+(-1,0,1). Another way to express
    338     # that is that n%k == -1 or 0 or 1. For example, for 25-byte segments, we
    339     # might test 74 bytes, 75 bytes, and 76 bytes.
    340 
    341     # on the other axis is how many leaves in the block hash tree we wind up
    342     # with, relative to a power of 2, so 2^a+(-1,0,1). Each segment turns
    343     # into a single leaf. So we'd like to check out, e.g., 3 segments, 4
    344     # segments, and 5 segments.
    345 
    346     # that results in the following series of data lengths:
    347     #  3 segs: 74, 75, 51
    348     #  4 segs: 99, 100, 76
    349     #  5 segs: 124, 125, 101
    350 
    351     # all tests encode to 100 shares, which means the share hash tree will
    352     # have 128 leaves, which means that buckets will be given an 8-long share
    353     # hash chain
    354 
    355     # all 3-segment files will have a 4-leaf blockhashtree, and thus expect
    356     # to get 7 blockhashes. 4-segment files will also get 4-leaf block hash
    357     # trees and 7 blockhashes. 5-segment files will get 8-leaf block hash
    358     # trees, which get 15 blockhashes.
    359 
    360324    def test_send_74(self):
    361325        # 3 segments (25, 25, 24)
    362326        return self.do_encode(25, 74, 100, 3, 7, 8)
    class Encode(unittest.TestCase): 
    387351        # 5 segments: 25, 25, 25, 25, 1
    388352        return self.do_encode(25, 101, 100, 5, 15, 8)
    389353
    390 class PausingConsumer(MemoryConsumer):
    391     def __init__(self):
    392         MemoryConsumer.__init__(self)
    393         self.size = 0
    394         self.writes = 0
    395     def write(self, data):
    396         self.size += len(data)
    397         self.writes += 1
    398         if self.writes <= 2:
    399             # we happen to use 4 segments, and want to avoid pausing on the
    400             # last one (since then the _unpause timer will still be running)
    401             self.producer.pauseProducing()
    402             reactor.callLater(0.1, self._unpause)
    403         return MemoryConsumer.write(self, data)
    404     def _unpause(self):
    405         self.producer.resumeProducing()
    406 
    407 class PausingAndStoppingConsumer(PausingConsumer):
    408     def write(self, data):
    409         self.producer.pauseProducing()
    410         reactor.callLater(0.5, self._stop)
    411     def _stop(self):
    412         self.producer.stopProducing()
    413 
    414 class StoppingConsumer(PausingConsumer):
    415     def write(self, data):
    416         self.producer.stopProducing()
    417 
    418 class Roundtrip(unittest.TestCase, testutil.ShouldFailMixin):
    419     timeout = 2400 # It takes longer than 240 seconds on Zandr's ARM box.
    420     def send_and_recover(self, k_and_happy_and_n=(25,75,100),
    421                          AVAILABLE_SHARES=None,
    422                          datalen=76,
    423                          max_segment_size=25,
    424                          bucket_modes={},
    425                          recover_mode="recover",
    426                          consumer=None,
    427                          ):
    428         if AVAILABLE_SHARES is None:
    429             AVAILABLE_SHARES = k_and_happy_and_n[2]
    430         data = make_data(datalen)
    431         d = self.send(k_and_happy_and_n, AVAILABLE_SHARES,
    432                       max_segment_size, bucket_modes, data)
    433         # that fires with (uri_extension_hash, e, shareholders)
    434         d.addCallback(self.recover, AVAILABLE_SHARES, recover_mode,
    435                       consumer=consumer)
    436         # that fires with newdata
    437         def _downloaded((newdata, fd)):
    438             self.failUnless(newdata == data, str((len(newdata), len(data))))
    439             return fd
    440         d.addCallback(_downloaded)
    441         return d
    442354
    443     def send(self, k_and_happy_and_n, AVAILABLE_SHARES, max_segment_size,
    444              bucket_modes, data):
    445         k, happy, n = k_and_happy_and_n
    446         NUM_SHARES = k_and_happy_and_n[2]
    447         if AVAILABLE_SHARES is None:
    448             AVAILABLE_SHARES = NUM_SHARES
    449         e = encode.Encoder()
    450         u = upload.Data(data, convergence="some convergence string")
    451         # force use of multiple segments by using a low max_segment_size
    452         u.max_segment_size = max_segment_size
    453         u.encoding_param_k = k
    454         u.encoding_param_happy = happy
    455         u.encoding_param_n = n
    456         eu = upload.EncryptAnUploadable(u)
    457         d = e.set_encrypted_uploadable(eu)
    458 
    459         shareholders = {}
    460         def _ready(res):
    461             k,happy,n = e.get_param("share_counts")
    462             assert n == NUM_SHARES # else we'll be completely confused
    463             servermap = {}
    464             for shnum in range(NUM_SHARES):
    465                 mode = bucket_modes.get(shnum, "good")
    466                 peer = FakeBucketReaderWriterProxy(mode, "peer%d" % shnum)
    467                 shareholders[shnum] = peer
    468                 servermap.setdefault(shnum, set()).add(peer.get_peerid())
    469             e.set_shareholders(shareholders, servermap)
    470             return e.start()
    471         d.addCallback(_ready)
    472         def _sent(res):
    473             d1 = u.get_encryption_key()
    474             d1.addCallback(lambda key: (res, key, shareholders))
    475             return d1
    476         d.addCallback(_sent)
    477         return d
     355class Roundtrip(GridTestMixin, unittest.TestCase):
    478356
    479     def recover(self, (res, key, shareholders), AVAILABLE_SHARES,
    480                 recover_mode, consumer=None):
    481         verifycap = res
    482 
    483         if "corrupt_key" in recover_mode:
    484             # we corrupt the key, so that the decrypted data is corrupted and
    485             # will fail the plaintext hash check. Since we're manually
    486             # attaching shareholders, the fact that the storage index is also
    487             # corrupted doesn't matter.
    488             key = flip_bit(key)
    489 
    490         u = uri.CHKFileURI(key=key,
    491                            uri_extension_hash=verifycap.uri_extension_hash,
    492                            needed_shares=verifycap.needed_shares,
    493                            total_shares=verifycap.total_shares,
    494                            size=verifycap.size)
    495 
    496         sb = FakeStorageBroker()
    497         if not consumer:
    498             consumer = MemoryConsumer()
    499         innertarget = download.ConsumerAdapter(consumer)
    500         target = download.DecryptingTarget(innertarget, u.key)
    501         fd = download.CiphertextDownloader(sb, u.get_verify_cap(), target, monitor=Monitor())
    502 
    503         # we manually cycle the CiphertextDownloader through a number of steps that
    504         # would normally be sequenced by a Deferred chain in
    505         # CiphertextDownloader.start(), to give us more control over the process.
    506         # In particular, by bypassing _get_all_shareholders, we skip
    507         # permuted-peerlist selection.
    508         for shnum, bucket in shareholders.items():
    509             if shnum < AVAILABLE_SHARES and bucket.closed:
    510                 fd.add_share_bucket(shnum, bucket)
    511         fd._got_all_shareholders(None)
    512 
    513         # Make it possible to obtain uri_extension from the shareholders.
    514         # Arrange for shareholders[0] to be the first, so we can selectively
    515         # corrupt the data it returns.
    516         uri_extension_sources = shareholders.values()
    517         uri_extension_sources.remove(shareholders[0])
    518         uri_extension_sources.insert(0, shareholders[0])
    519 
    520         d = defer.succeed(None)
    521 
    522         # have the CiphertextDownloader retrieve a copy of uri_extension itself
    523         d.addCallback(fd._obtain_uri_extension)
    524 
    525         if "corrupt_crypttext_hashes" in recover_mode:
    526             # replace everybody's crypttext hash trees with a different one
    527             # (computed over a different file), then modify our uri_extension
    528             # to reflect the new crypttext hash tree root
    529             def _corrupt_crypttext_hashes(unused):
    530                 assert isinstance(fd._vup, download.ValidatedExtendedURIProxy), fd._vup
    531                 assert fd._vup.crypttext_root_hash, fd._vup
    532                 badhash = hashutil.tagged_hash("bogus", "data")
    533                 bad_crypttext_hashes = [badhash] * fd._vup.num_segments
    534                 badtree = hashtree.HashTree(bad_crypttext_hashes)
    535                 for bucket in shareholders.values():
    536                     bucket.crypttext_hashes = list(badtree)
    537                 fd._crypttext_hash_tree = hashtree.IncompleteHashTree(fd._vup.num_segments)
    538                 fd._crypttext_hash_tree.set_hashes({0: badtree[0]})
    539                 return fd._vup
    540             d.addCallback(_corrupt_crypttext_hashes)
    541 
    542         # also have the CiphertextDownloader ask for hash trees
    543         d.addCallback(fd._get_crypttext_hash_tree)
    544 
    545         d.addCallback(fd._download_all_segments)
    546         d.addCallback(fd._done)
    547         def _done(t):
    548             newdata = "".join(consumer.chunks)
    549             return (newdata, fd)
    550         d.addCallback(_done)
    551         return d
    552 
    553     def test_not_enough_shares(self):
    554         d = self.send_and_recover((4,8,10), AVAILABLE_SHARES=2)
    555         def _done(res):
    556             self.failUnless(isinstance(res, Failure))
    557             self.failUnless(res.check(NotEnoughSharesError))
    558         d.addBoth(_done)
    559         return d
    560 
    561     def test_one_share_per_peer(self):
    562         return self.send_and_recover()
    563 
    564     def test_74(self):
    565         return self.send_and_recover(datalen=74)
    566     def test_75(self):
    567         return self.send_and_recover(datalen=75)
    568     def test_51(self):
    569         return self.send_and_recover(datalen=51)
    570 
    571     def test_99(self):
    572         return self.send_and_recover(datalen=99)
    573     def test_100(self):
    574         return self.send_and_recover(datalen=100)
    575     def test_76(self):
    576         return self.send_and_recover(datalen=76)
    577 
    578     def test_124(self):
    579         return self.send_and_recover(datalen=124)
    580     def test_125(self):
    581         return self.send_and_recover(datalen=125)
    582     def test_101(self):
    583         return self.send_and_recover(datalen=101)
    584 
    585     def test_pause(self):
    586         # use a download target that does pauseProducing/resumeProducing a
    587         # few times, then finishes
    588         c = PausingConsumer()
    589         d = self.send_and_recover(consumer=c)
    590         return d
    591 
    592     def test_pause_then_stop(self):
    593         # use a download target that pauses, then stops.
    594         c = PausingAndStoppingConsumer()
    595         d = self.shouldFail(download.DownloadStopped, "test_pause_then_stop",
    596                             "our Consumer called stopProducing()",
    597                             self.send_and_recover, consumer=c)
    598         return d
    599 
    600     def test_stop(self):
    601         # use a download targetthat does an immediate stop (ticket #473)
    602         c = StoppingConsumer()
    603         d = self.shouldFail(download.DownloadStopped, "test_stop",
    604                             "our Consumer called stopProducing()",
    605                             self.send_and_recover, consumer=c)
    606         return d
    607 
    608     # the following tests all use 4-out-of-10 encoding
    609 
    610     def test_bad_blocks(self):
    611         # the first 6 servers have bad blocks, which will be caught by the
    612         # blockhashes
    613         modemap = dict([(i, "bad block")
    614                         for i in range(6)]
    615                        + [(i, "good")
    616                           for i in range(6, 10)])
    617         return self.send_and_recover((4,8,10), bucket_modes=modemap)
    618 
    619     def test_bad_blocks_failure(self):
    620         # the first 7 servers have bad blocks, which will be caught by the
    621         # blockhashes, and the download will fail
    622         modemap = dict([(i, "bad block")
    623                         for i in range(7)]
    624                        + [(i, "good")
    625                           for i in range(7, 10)])
    626         d = self.send_and_recover((4,8,10), bucket_modes=modemap)
    627         def _done(res):
    628             self.failUnless(isinstance(res, Failure), res)
    629             self.failUnless(res.check(NotEnoughSharesError), res)
    630         d.addBoth(_done)
    631         return d
    632 
    633     def test_bad_blockhashes(self):
    634         # the first 6 servers have bad block hashes, so the blockhash tree
    635         # will not validate
    636         modemap = dict([(i, "bad blockhash")
    637                         for i in range(6)]
    638                        + [(i, "good")
    639                           for i in range(6, 10)])
    640         return self.send_and_recover((4,8,10), bucket_modes=modemap)
    641 
    642     def test_bad_blockhashes_failure(self):
    643         # the first 7 servers have bad block hashes, so the blockhash tree
    644         # will not validate, and the download will fail
    645         modemap = dict([(i, "bad blockhash")
    646                         for i in range(7)]
    647                        + [(i, "good")
    648                           for i in range(7, 10)])
    649         d = self.send_and_recover((4,8,10), bucket_modes=modemap)
    650         def _done(res):
    651             self.failUnless(isinstance(res, Failure))
    652             self.failUnless(res.check(NotEnoughSharesError), res)
    653         d.addBoth(_done)
    654         return d
    655 
    656     def test_bad_sharehashes(self):
    657         # the first 6 servers have bad block hashes, so the sharehash tree
    658         # will not validate
    659         modemap = dict([(i, "bad sharehash")
    660                         for i in range(6)]
    661                        + [(i, "good")
    662                           for i in range(6, 10)])
    663         return self.send_and_recover((4,8,10), bucket_modes=modemap)
    664 
    665     def assertFetchFailureIn(self, fd, where):
    666         expected = {"uri_extension": 0,
    667                     "crypttext_hash_tree": 0,
    668                     }
    669         if where is not None:
    670             expected[where] += 1
    671         self.failUnlessEqual(fd._fetch_failures, expected)
    672 
    673     def test_good(self):
    674         # just to make sure the test harness works when we aren't
    675         # intentionally causing failures
    676         modemap = dict([(i, "good") for i in range(0, 10)])
    677         d = self.send_and_recover((4,8,10), bucket_modes=modemap)
    678         d.addCallback(self.assertFetchFailureIn, None)
    679         return d
    680 
    681     def test_bad_uri_extension(self):
    682         # the first server has a bad uri_extension block, so we will fail
    683         # over to a different server.
    684         modemap = dict([(i, "bad uri_extension") for i in range(1)] +
    685                        [(i, "good") for i in range(1, 10)])
    686         d = self.send_and_recover((4,8,10), bucket_modes=modemap)
    687         d.addCallback(self.assertFetchFailureIn, "uri_extension")
    688         return d
    689 
    690     def test_bad_crypttext_hashroot(self):
    691         # the first server has a bad crypttext hashroot, so we will fail
    692         # over to a different server.
    693         modemap = dict([(i, "bad crypttext hashroot") for i in range(1)] +
    694                        [(i, "good") for i in range(1, 10)])
    695         d = self.send_and_recover((4,8,10), bucket_modes=modemap)
    696         d.addCallback(self.assertFetchFailureIn, "crypttext_hash_tree")
    697         return d
    698 
    699     def test_bad_crypttext_hashes(self):
    700         # the first server has a bad crypttext hash block, so we will fail
    701         # over to a different server.
    702         modemap = dict([(i, "bad crypttext hash") for i in range(1)] +
    703                        [(i, "good") for i in range(1, 10)])
    704         d = self.send_and_recover((4,8,10), bucket_modes=modemap)
    705         d.addCallback(self.assertFetchFailureIn, "crypttext_hash_tree")
    706         return d
    707 
    708     def test_bad_crypttext_hashes_failure(self):
    709         # to test that the crypttext merkle tree is really being applied, we
    710         # sneak into the download process and corrupt two things: we replace
    711         # everybody's crypttext hashtree with a bad version (computed over
    712         # bogus data), and we modify the supposedly-validated uri_extension
    713         # block to match the new crypttext hashtree root. The download
    714         # process should notice that the crypttext coming out of FEC doesn't
    715         # match the tree, and fail.
    716 
    717         modemap = dict([(i, "good") for i in range(0, 10)])
    718         d = self.send_and_recover((4,8,10), bucket_modes=modemap,
    719                                   recover_mode=("corrupt_crypttext_hashes"))
    720         def _done(res):
    721             self.failUnless(isinstance(res, Failure))
    722             self.failUnless(res.check(hashtree.BadHashError), res)
    723         d.addBoth(_done)
    724         return d
     357    # a series of 3*3 tests to check out edge conditions. One axis is how the
     358    # plaintext is divided into segments: kn+(-1,0,1). Another way to express
     359    # this is n%k == -1 or 0 or 1. For example, for 25-byte segments, we
     360    # might test 74 bytes, 75 bytes, and 76 bytes.
    725361
    726     def OFF_test_bad_plaintext(self):
    727         # faking a decryption failure is easier: just corrupt the key
    728         modemap = dict([(i, "good") for i in range(0, 10)])
    729         d = self.send_and_recover((4,8,10), bucket_modes=modemap,
    730                                   recover_mode=("corrupt_key"))
    731         def _done(res):
    732             self.failUnless(isinstance(res, Failure))
    733             self.failUnless(res.check(hashtree.BadHashError), res)
    734         d.addBoth(_done)
    735         return d
     362    # on the other axis is how many leaves in the block hash tree we wind up
     363    # with, relative to a power of 2, so 2^a+(-1,0,1). Each segment turns
     364    # into a single leaf. So we'd like to check out, e.g., 3 segments, 4
     365    # segments, and 5 segments.
    736366
    737     def test_bad_sharehashes_failure(self):
    738         # all ten servers have bad share hashes, so the sharehash tree
    739         # will not validate, and the download will fail
    740         modemap = dict([(i, "bad sharehash")
    741                         for i in range(10)])
    742         d = self.send_and_recover((4,8,10), bucket_modes=modemap)
    743         def _done(res):
    744             self.failUnless(isinstance(res, Failure))
    745             self.failUnless(res.check(NotEnoughSharesError))
    746         d.addBoth(_done)
    747         return d
     367    # that results in the following series of data lengths:
     368    #  3 segs: 74, 75, 51
     369    #  4 segs: 99, 100, 76
     370    #  5 segs: 124, 125, 101
    748371
    749     def test_missing_sharehashes(self):
    750         # the first 6 servers are missing their sharehashes, so the
    751         # sharehash tree will not validate
    752         modemap = dict([(i, "missing sharehash")
    753                         for i in range(6)]
    754                        + [(i, "good")
    755                           for i in range(6, 10)])
    756         return self.send_and_recover((4,8,10), bucket_modes=modemap)
    757 
    758     def test_missing_sharehashes_failure(self):
    759         # all servers are missing their sharehashes, so the sharehash tree will not validate,
    760         # and the download will fail
    761         modemap = dict([(i, "missing sharehash")
    762                         for i in range(10)])
    763         d = self.send_and_recover((4,8,10), bucket_modes=modemap)
    764         def _done(res):
    765             self.failUnless(isinstance(res, Failure), res)
    766             self.failUnless(res.check(NotEnoughSharesError), res)
    767         d.addBoth(_done)
    768         return d
     372    # all tests encode to 100 shares, which means the share hash tree will
     373    # have 128 leaves, which means that buckets will be given an 8-long share
     374    # hash chain
    769375
    770     def test_lost_one_shareholder(self):
    771         # we have enough shareholders when we start, but one segment in we
    772         # lose one of them. The upload should still succeed, as long as we
    773         # still have 'servers_of_happiness' peers left.
    774         modemap = dict([(i, "good") for i in range(9)] +
    775                        [(i, "lost") for i in range(9, 10)])
    776         return self.send_and_recover((4,8,10), bucket_modes=modemap)
    777 
    778     def test_lost_one_shareholder_early(self):
    779         # we have enough shareholders when we choose peers, but just before
    780         # we send the 'start' message, we lose one of them. The upload should
    781         # still succeed, as long as we still have 'servers_of_happiness' peers
    782         # left.
    783         modemap = dict([(i, "good") for i in range(9)] +
    784                        [(i, "lost-early") for i in range(9, 10)])
    785         return self.send_and_recover((4,8,10), bucket_modes=modemap)
    786 
    787     def test_lost_many_shareholders(self):
    788         # we have enough shareholders when we start, but one segment in we
    789         # lose all but one of them. The upload should fail.
    790         modemap = dict([(i, "good") for i in range(1)] +
    791                        [(i, "lost") for i in range(1, 10)])
    792         d = self.send_and_recover((4,8,10), bucket_modes=modemap)
    793         def _done(res):
    794             self.failUnless(isinstance(res, Failure))
    795             self.failUnless(res.check(UploadUnhappinessError), res)
    796         d.addBoth(_done)
     376    # all 3-segment files will have a 4-leaf blockhashtree, and thus expect
     377    # to get 7 blockhashes. 4-segment files will also get 4-leaf block hash
     378    # trees and 7 blockhashes. 5-segment files will get 8-leaf block hash
     379    # trees, which gets 15 blockhashes.
     380
     381    def test_74(self): return self.do_test_size(74)
     382    def test_75(self): return self.do_test_size(75)
     383    def test_51(self): return self.do_test_size(51)
     384    def test_99(self): return self.do_test_size(99)
     385    def test_100(self): return self.do_test_size(100)
     386    def test_76(self): return self.do_test_size(76)
     387    def test_124(self): return self.do_test_size(124)
     388    def test_125(self): return self.do_test_size(125)
     389    def test_101(self): return self.do_test_size(101)
     390
     391    def upload(self, data):
     392        u = upload.Data(data, None)
     393        u.max_segment_size = 25
     394        u.encoding_param_k = 25
     395        u.encoding_param_happy = 1
     396        u.encoding_param_n = 100
     397        d = self.c0.upload(u)
     398        d.addCallback(lambda ur: self.c0.create_node_from_uri(ur.uri))
     399        # returns a FileNode
    797400        return d
    798401
    799     def test_lost_all_shareholders(self):
    800         # we have enough shareholders when we start, but one segment in we
    801         # lose all of them. The upload should fail.
    802         modemap = dict([(i, "lost") for i in range(10)])
    803         d = self.send_and_recover((4,8,10), bucket_modes=modemap)
    804         def _done(res):
    805             self.failUnless(isinstance(res, Failure))
    806             self.failUnless(res.check(UploadUnhappinessError))
    807         d.addBoth(_done)
     402    def do_test_size(self, size):
     403        self.basedir = self.mktemp()
     404        self.set_up_grid()
     405        self.c0 = self.g.clients[0]
     406        DATA = "p"*size
     407        d = self.upload(DATA)
     408        d.addCallback(lambda n: download_to_data(n))
     409        def _downloaded(newdata):
     410            self.failUnlessEqual(newdata, DATA)
     411        d.addCallback(_downloaded)
    808412        return d
  • src/allmydata/test/test_filenode.py

    diff --git a/src/allmydata/test/test_filenode.py b/src/allmydata/test/test_filenode.py
    index 5f3feaa..61bb0e8 100644
    a b  
    22from twisted.trial import unittest
    33from allmydata import uri, client
    44from allmydata.monitor import Monitor
    5 from allmydata.immutable.filenode import ImmutableFileNode, LiteralFileNode
     5from allmydata.immutable.literal import LiteralFileNode
     6from allmydata.immutable.filenode import ImmutableFileNode
    67from allmydata.mutable.filenode import MutableFileNode
    7 from allmydata.util import hashutil, cachedir
     8from allmydata.util import hashutil
    89from allmydata.util.consumer import download_to_data
    910
    1011class NotANode:
    class Node(unittest.TestCase): 
    3031                           needed_shares=3,
    3132                           total_shares=10,
    3233                           size=1000)
    33         cf = cachedir.CacheFile("none")
    34         fn1 = ImmutableFileNode(u, None, None, None, None, cf)
    35         fn2 = ImmutableFileNode(u, None, None, None, None, cf)
     34        fn1 = ImmutableFileNode(u, None, None, None, None)
     35        fn2 = ImmutableFileNode(u, None, None, None, None)
    3636        self.failUnlessEqual(fn1, fn2)
    3737        self.failIfEqual(fn1, "I am not a filenode")
    3838        self.failIfEqual(fn1, NotANode())
  • src/allmydata/test/test_hung_server.py

    diff --git a/src/allmydata/test/test_hung_server.py b/src/allmydata/test/test_hung_server.py
    index b1def16..8856ce2 100644
    a b from allmydata.mutable.common import UnrecoverableFileError 
    1010from allmydata.storage.common import storage_index_to_dir
    1111from allmydata.test.no_network import GridTestMixin
    1212from allmydata.test.common import ShouldFailMixin, _corrupt_share_data
     13from allmydata.util.pollmixin import PollMixin
    1314from allmydata.interfaces import NotEnoughSharesError
    1415
    1516immutable_plaintext = "data" * 10000
    1617mutable_plaintext = "muta" * 10000
    1718
    18 class HungServerDownloadTest(GridTestMixin, ShouldFailMixin, unittest.TestCase):
     19class HungServerDownloadTest(GridTestMixin, ShouldFailMixin, PollMixin,
     20                             unittest.TestCase):
    1921    # Many of these tests take around 60 seconds on François's ARM buildslave:
    2022    # http://tahoe-lafs.org/buildbot/builders/FranXois%20lenny-armv5tel
    2123    # allmydata.test.test_hung_server.HungServerDownloadTest.test_2_good_8_broken_duplicate_share_fail once ERRORed after 197 seconds on Midnight Magic's NetBSD buildslave:
    class HungServerDownloadTest(GridTestMixin, ShouldFailMixin, unittest.TestCase): 
    3638        for (id, ss) in servers:
    3739            self.g.unhang_server(id, **kwargs)
    3840
     41    def _hang_shares(self, shnums, **kwargs):
     42        # hang all servers who are holding the given shares
     43        hung_serverids = set()
     44        for (i_shnum, i_serverid, i_sharefile) in self.shares:
     45            if i_shnum in shnums:
     46                if i_serverid not in hung_serverids:
     47                    self.g.hang_server(i_serverid, **kwargs)
     48                    hung_serverids.add(i_serverid)
     49
    3950    def _delete_all_shares_from(self, servers):
    4051        serverids = [id for (id, ss) in servers]
    4152        for (i_shnum, i_serverid, i_sharefile) in self.shares:
    class HungServerDownloadTest(GridTestMixin, ShouldFailMixin, unittest.TestCase): 
    113124            stage_4_d = None # currently we aren't doing any tests which require this for mutable files
    114125        else:
    115126            d = download_to_data(n)
    116             stage_4_d = n._downloader._all_downloads.keys()[0]._stage_4_d # too ugly! FIXME
     127            #stage_4_d = n._downloader._all_downloads.keys()[0]._stage_4_d # too ugly! FIXME
     128            stage_4_d = None
    117129        return (d, stage_4_d,)
    118130
    119131    def _wait_for_data(self, n):
    class HungServerDownloadTest(GridTestMixin, ShouldFailMixin, unittest.TestCase): 
    141153                                   self._download_and_check)
    142154        else:
    143155            return self.shouldFail(NotEnoughSharesError, self.basedir,
    144                                    "Failed to get enough shareholders",
     156                                   "ran out of shares",
    145157                                   self._download_and_check)
    146158
    147159
    class HungServerDownloadTest(GridTestMixin, ShouldFailMixin, unittest.TestCase): 
    204216
    205217    # The tests below do not currently pass for mutable files.
    206218
    207     def test_3_good_7_hung(self):
     219    def test_3_good_7_hung_immutable(self):
    208220        d = defer.succeed(None)
    209         for mutable in [False]:
    210             d.addCallback(lambda ign: self._set_up(mutable, "test_3_good_7_hung"))
    211             d.addCallback(lambda ign: self._hang(self.servers[3:]))
    212             d.addCallback(lambda ign: self._download_and_check())
     221        d.addCallback(lambda ign: self._set_up(False, "test_3_good_7_hung"))
     222        d.addCallback(lambda ign: self._hang(self.servers[3:]))
     223        d.addCallback(lambda ign: self._download_and_check())
    213224        return d
    214225
    215     def test_2_good_8_hung_then_1_recovers(self):
     226    def test_5_overdue_immutable(self):
     227        # restrict the ShareFinder to only allow 5 outstanding requests, and
     228        # arrange for the first 5 servers to hang. Then trigger the OVERDUE
     229        # timers (simulating 10 seconds passed), at which point the
     230        # ShareFinder should send additional queries and finish the download
     231        # quickly. If we didn't have OVERDUE timers, this test would fail by
     232        # timing out.
     233        done = []
     234        d = self._set_up(False, "test_5_overdue_immutable")
     235        def _reduce_max_outstanding_requests_and_download(ign):
     236            self._hang_shares(range(5))
     237            n = self.c0.create_node_from_uri(self.uri)
     238            self._sf = n._cnode._node._sharefinder
     239            self._sf.max_outstanding_requests = 5
     240            self._sf.OVERDUE_TIMEOUT = 1000.0
     241            d2 = download_to_data(n)
     242            # start download, but don't wait for it to complete yet
     243            def _done(res):
     244                done.append(res) # we will poll for this later
     245            d2.addBoth(_done)
     246        d.addCallback(_reduce_max_outstanding_requests_and_download)
     247        from foolscap.eventual import fireEventually, flushEventualQueue
     248        # wait here a while
     249        d.addCallback(lambda res: fireEventually(res))
     250        d.addCallback(lambda res: flushEventualQueue())
     251        d.addCallback(lambda ign: self.failIf(done))
     252        def _check_waiting(ign):
     253            # all the share requests should now be stuck waiting
     254            self.failUnlessEqual(len(self._sf.pending_requests), 5)
     255            # but none should be marked as OVERDUE until the timers expire
     256            self.failUnlessEqual(len(self._sf.overdue_requests), 0)
     257        d.addCallback(_check_waiting)
     258        def _mark_overdue(ign):
     259            # declare four requests overdue, allowing new requests to take
     260            # their place, and leaving one stuck. The finder will keep
     261            # sending requests until there are 5 non-overdue ones
     262            # outstanding, at which point we'll have 4 OVERDUE, 1
     263            # stuck-but-not-overdue, and 4 live requests. All 4 live requests
     264            # will retire before the download is complete and the ShareFinder
     265            # is shut off. That will leave 4 OVERDUE and 1
     266            # stuck-but-not-overdue, for a total of 5 requests in in
     267            # _sf.pending_requests
     268            for t in self._sf.overdue_timers.values()[:4]:
     269                t.reset(-1.0)
     270            # the timers ought to fire before the eventual-send does
     271            return fireEventually()
     272        d.addCallback(_mark_overdue)
     273        def _we_are_done():
     274            return bool(done)
     275        d.addCallback(lambda ign: self.poll(_we_are_done))
     276        def _check_done(ign):
     277            self.failUnlessEqual(done, [immutable_plaintext])
     278            self.failUnlessEqual(len(self._sf.pending_requests), 5)
     279            self.failUnlessEqual(len(self._sf.overdue_requests), 4)
     280        d.addCallback(_check_done)
     281        return d
     282
     283    def test_3_good_7_hung_mutable(self):
     284        raise unittest.SkipTest("still broken")
    216285        d = defer.succeed(None)
    217         for mutable in [False]:
    218             d.addCallback(lambda ign: self._set_up(mutable, "test_2_good_8_hung_then_1_recovers"))
    219             d.addCallback(lambda ign: self._hang(self.servers[2:3]))
    220             d.addCallback(lambda ign: self._hang(self.servers[3:]))
    221             d.addCallback(lambda ign: self._unhang(self.servers[2:3]))
    222             d.addCallback(lambda ign: self._download_and_check())
     286        d.addCallback(lambda ign: self._set_up(True, "test_3_good_7_hung"))
     287        d.addCallback(lambda ign: self._hang(self.servers[3:]))
     288        d.addCallback(lambda ign: self._download_and_check())
    223289        return d
    224290
    225     def test_2_good_8_hung_then_1_recovers_with_2_shares(self):
     291    def test_2_good_8_hung_then_1_recovers_immutable(self):
    226292        d = defer.succeed(None)
    227         for mutable in [False]:
    228             d.addCallback(lambda ign: self._set_up(mutable, "test_2_good_8_hung_then_1_recovers_with_2_shares"))
    229             d.addCallback(lambda ign: self._copy_all_shares_from(self.servers[0:1], self.servers[2]))
    230             d.addCallback(lambda ign: self._hang(self.servers[2:3]))
    231             d.addCallback(lambda ign: self._hang(self.servers[3:]))
    232             d.addCallback(lambda ign: self._unhang(self.servers[2:3]))
    233             d.addCallback(lambda ign: self._download_and_check())
     293        d.addCallback(lambda ign: self._set_up(False, "test_2_good_8_hung_then_1_recovers"))
     294        d.addCallback(lambda ign: self._hang(self.servers[2:3]))
     295        d.addCallback(lambda ign: self._hang(self.servers[3:]))
     296        d.addCallback(lambda ign: self._unhang(self.servers[2:3]))
     297        d.addCallback(lambda ign: self._download_and_check())
     298        return d
     299
     300    def test_2_good_8_hung_then_1_recovers_mutable(self):
     301        raise unittest.SkipTest("still broken")
     302        d = defer.succeed(None)
     303        d.addCallback(lambda ign: self._set_up(True, "test_2_good_8_hung_then_1_recovers"))
     304        d.addCallback(lambda ign: self._hang(self.servers[2:3]))
     305        d.addCallback(lambda ign: self._hang(self.servers[3:]))
     306        d.addCallback(lambda ign: self._unhang(self.servers[2:3]))
     307        d.addCallback(lambda ign: self._download_and_check())
    234308        return d
    235309
    236     def test_failover_during_stage_4(self):
    237         # See #287
     310    def test_2_good_8_hung_then_1_recovers_with_2_shares_immutable(self):
    238311        d = defer.succeed(None)
    239         for mutable in [False]:
    240             d.addCallback(lambda ign: self._set_up(mutable, "test_failover_during_stage_4"))
    241             d.addCallback(lambda ign: self._corrupt_all_shares_in(self.servers[2:3], _corrupt_share_data))
    242             d.addCallback(lambda ign: self._set_up(mutable, "test_failover_during_stage_4"))
    243             d.addCallback(lambda ign: self._hang(self.servers[3:]))
    244             d.addCallback(lambda ign: self._start_download())
    245             def _after_starting_download((doned, started4d)):
    246                 started4d.addCallback(lambda ign: self._unhang(self.servers[3:4]))
    247                 doned.addCallback(self._check)
    248                 return doned
    249             d.addCallback(_after_starting_download)
     312        d.addCallback(lambda ign: self._set_up(False, "test_2_good_8_hung_then_1_recovers_with_2_shares"))
     313        d.addCallback(lambda ign: self._copy_all_shares_from(self.servers[0:1], self.servers[2]))
     314        d.addCallback(lambda ign: self._hang(self.servers[2:3]))
     315        d.addCallback(lambda ign: self._hang(self.servers[3:]))
     316        d.addCallback(lambda ign: self._unhang(self.servers[2:3]))
     317        d.addCallback(lambda ign: self._download_and_check())
     318        return d
    250319
     320    def test_2_good_8_hung_then_1_recovers_with_2_shares_mutable(self):
     321        raise unittest.SkipTest("still broken")
     322        d = defer.succeed(None)
     323        d.addCallback(lambda ign: self._set_up(True, "test_2_good_8_hung_then_1_recovers_with_2_shares"))
     324        d.addCallback(lambda ign: self._copy_all_shares_from(self.servers[0:1], self.servers[2]))
     325        d.addCallback(lambda ign: self._hang(self.servers[2:3]))
     326        d.addCallback(lambda ign: self._hang(self.servers[3:]))
     327        d.addCallback(lambda ign: self._unhang(self.servers[2:3]))
     328        d.addCallback(lambda ign: self._download_and_check())
    251329        return d
  • src/allmydata/test/test_immutable.py

    diff --git a/src/allmydata/test/test_immutable.py b/src/allmydata/test/test_immutable.py
    index a7eaa1d..813c5be 100644
    a b from twisted.internet import defer 
    55from twisted.trial import unittest
    66import random
    77
    8 class Test(common.ShareManglingMixin, unittest.TestCase):
     8class Test(common.ShareManglingMixin, common.ShouldFailMixin, unittest.TestCase):
    99    def test_test_code(self):
    1010        # The following process of stashing the shares, running
    1111        # replace_shares, and asserting that the new set of shares equals the
    class Test(common.ShareManglingMixin, unittest.TestCase): 
    1818            return res
    1919        d.addCallback(_stash_it)
    2020
    21         # The following process of deleting 8 of the shares and asserting that you can't
    22         # download it is more to test this test code than to test the Tahoe code...
     21        # The following process of deleting 8 of the shares and asserting
     22        # that you can't download it is more to test this test code than to
     23        # test the Tahoe code...
    2324        def _then_delete_8(unused=None):
    2425            self.replace_shares(stash[0], storage_index=self.uri.get_storage_index())
    2526            for i in range(8):
    class Test(common.ShareManglingMixin, unittest.TestCase): 
    4243        return d
    4344
    4445    def test_download(self):
    45         """ Basic download.  (This functionality is more or less already tested by test code in
    46         other modules, but this module is also going to test some more specific things about
    47         immutable download.)
     46        """ Basic download. (This functionality is more or less already
     47        tested by test code in other modules, but this module is also going
     48        to test some more specific things about immutable download.)
    4849        """
    4950        d = defer.succeed(None)
    5051        before_download_reads = self._count_reads()
    5152        def _after_download(unused=None):
    5253            after_download_reads = self._count_reads()
    53             self.failIf(after_download_reads-before_download_reads > 27, (after_download_reads, before_download_reads))
     54            #print before_download_reads, after_download_reads
     55            self.failIf(after_download_reads-before_download_reads > 27,
     56                        (after_download_reads, before_download_reads))
    5457        d.addCallback(self._download_and_check_plaintext)
    5558        d.addCallback(_after_download)
    5659        return d
    5760
    5861    def test_download_from_only_3_remaining_shares(self):
    59         """ Test download after 7 random shares (of the 10) have been removed. """
     62        """ Test download after 7 random shares (of the 10) have been
     63        removed."""
    6064        d = defer.succeed(None)
    6165        def _then_delete_7(unused=None):
    6266            for i in range(7):
    class Test(common.ShareManglingMixin, unittest.TestCase): 
    6569        d.addCallback(_then_delete_7)
    6670        def _after_download(unused=None):
    6771            after_download_reads = self._count_reads()
     72            #print before_download_reads, after_download_reads
    6873            self.failIf(after_download_reads-before_download_reads > 27, (after_download_reads, before_download_reads))
    6974        d.addCallback(self._download_and_check_plaintext)
    7075        d.addCallback(_after_download)
    7176        return d
    7277
    7378    def test_download_from_only_3_shares_with_good_crypttext_hash(self):
    74         """ Test download after 7 random shares (of the 10) have had their crypttext hash tree corrupted. """
     79        """ Test download after 7 random shares (of the 10) have had their
     80        crypttext hash tree corrupted."""
    7581        d = defer.succeed(None)
    7682        def _then_corrupt_7(unused=None):
    7783            shnums = range(10)
    class Test(common.ShareManglingMixin, unittest.TestCase): 
    8490        return d
    8591
    8692    def test_download_abort_if_too_many_missing_shares(self):
    87         """ Test that download gives up quickly when it realizes there aren't enough shares out
    88         there."""
    89         d = defer.succeed(None)
    90         def _then_delete_8(unused=None):
    91             for i in range(8):
    92                 self._delete_a_share()
    93         d.addCallback(_then_delete_8)
    94 
    95         before_download_reads = self._count_reads()
    96         def _attempt_to_download(unused=None):
    97             d2 = download_to_data(self.n)
    98 
    99             def _callb(res):
    100                 self.fail("Should have gotten an error from attempt to download, not %r" % (res,))
    101             def _errb(f):
    102                 self.failUnless(f.check(NotEnoughSharesError))
    103             d2.addCallbacks(_callb, _errb)
    104             return d2
    105 
    106         d.addCallback(_attempt_to_download)
    107 
    108         def _after_attempt(unused=None):
    109             after_download_reads = self._count_reads()
    110             # To pass this test, you are required to give up before actually trying to read any
    111             # share data.
    112             self.failIf(after_download_reads-before_download_reads > 0, (after_download_reads, before_download_reads))
    113         d.addCallback(_after_attempt)
     93        """ Test that download gives up quickly when it realizes there aren't
     94        enough shares out there."""
     95        for i in range(8):
     96            self._delete_a_share()
     97        d = self.shouldFail(NotEnoughSharesError, "delete 8", None,
     98                            download_to_data, self.n)
     99        # the new downloader pipelines a bunch of read requests in parallel,
     100        # so don't bother asserting anything about the number of reads
    114101        return d
    115102
    116103    def test_download_abort_if_too_many_corrupted_shares(self):
    117         """ Test that download gives up quickly when it realizes there aren't enough uncorrupted
    118         shares out there. It should be able to tell because the corruption occurs in the
    119         sharedata version number, which it checks first."""
     104        """Test that download gives up quickly when it realizes there aren't
     105        enough uncorrupted shares out there. It should be able to tell
     106        because the corruption occurs in the sharedata version number, which
     107        it checks first."""
    120108        d = defer.succeed(None)
    121109        def _then_corrupt_8(unused=None):
    122110            shnums = range(10)
    class Test(common.ShareManglingMixin, unittest.TestCase): 
    140128
    141129        def _after_attempt(unused=None):
    142130            after_download_reads = self._count_reads()
    143             # To pass this test, you are required to give up before reading all of the share
    144             # data.  Actually, we could give up sooner than 45 reads, but currently our download
    145             # code does 45 reads.  This test then serves as a "performance regression detector"
    146             # -- if you change download code so that it takes *more* reads, then this test will
    147             # fail.
    148             self.failIf(after_download_reads-before_download_reads > 45, (after_download_reads, before_download_reads))
     131            #print before_download_reads, after_download_reads
     132            # To pass this test, you are required to give up before reading
     133            # all of the share data. Actually, we could give up sooner than
     134            # 45 reads, but currently our download code does 45 reads. This
     135            # test then serves as a "performance regression detector" -- if
     136            # you change download code so that it takes *more* reads, then
     137            # this test will fail.
     138            self.failIf(after_download_reads-before_download_reads > 45,
     139                        (after_download_reads, before_download_reads))
    149140        d.addCallback(_after_attempt)
    150141        return d
    151142
    152143
    153 # XXX extend these tests to show bad behavior of various kinds from servers: raising exception from each remove_foo() method, for example
     144# XXX extend these tests to show bad behavior of various kinds from servers:
     145# raising exception from each remove_foo() method, for example
    154146
    155147# XXX test disconnect DeadReferenceError from get_buckets and get_block_whatsit
    156148
     149# TODO: delete this whole file
  • src/allmydata/test/test_mutable.py

    diff --git a/src/allmydata/test/test_mutable.py b/src/allmydata/test/test_mutable.py
    index 30d1083..021e196 100644
    a b def make_nodemaker(s=None, num_peers=10): 
    197197    keygen = client.KeyGenerator()
    198198    keygen.set_default_keysize(522)
    199199    nodemaker = NodeMaker(storage_broker, sh, None,
    200                           None, None, None,
     200                          None, None,
    201201                          {"k": 3, "n": 10}, keygen)
    202202    return nodemaker
    203203
  • src/allmydata/test/test_repairer.py

    diff --git a/src/allmydata/test/test_repairer.py b/src/allmydata/test/test_repairer.py
    index 02264e4..bb30cc4 100644
    a b from allmydata.test import common 
    33from allmydata.monitor import Monitor
    44from allmydata import check_results
    55from allmydata.interfaces import NotEnoughSharesError
    6 from allmydata.immutable import repairer, upload
     6from allmydata.immutable import upload
    77from allmydata.util.consumer import download_to_data
    88from twisted.internet import defer
    99from twisted.trial import unittest
    WRITE_LEEWAY = 35 
    363363# Optimally, you could repair one of these (small) files in a single write.
    364364DELTA_WRITES_PER_SHARE = 1 * WRITE_LEEWAY
    365365
    366 class DownUpConnector(unittest.TestCase):
    367     def test_deferred_satisfaction(self):
    368         duc = repairer.DownUpConnector()
    369         duc.registerProducer(None, True) # just because you have to call registerProducer first
    370         # case 1: total data in buf is < requested data at time of request
    371         duc.write('\x01')
    372         d = duc.read_encrypted(2, False)
    373         def _then(data):
    374             self.failUnlessEqual(len(data), 2)
    375             self.failUnlessEqual(data[0], '\x01')
    376             self.failUnlessEqual(data[1], '\x02')
    377         d.addCallback(_then)
    378         duc.write('\x02')
    379         return d
    380 
    381     def test_extra(self):
    382         duc = repairer.DownUpConnector()
    383         duc.registerProducer(None, True) # just because you have to call registerProducer first
    384         # case 1: total data in buf is < requested data at time of request
    385         duc.write('\x01')
    386         d = duc.read_encrypted(2, False)
    387         def _then(data):
    388             self.failUnlessEqual(len(data), 2)
    389             self.failUnlessEqual(data[0], '\x01')
    390             self.failUnlessEqual(data[1], '\x02')
    391         d.addCallback(_then)
    392         duc.write('\x02\0x03')
    393         return d
    394 
    395     def test_short_reads_1(self):
    396         # You don't get fewer bytes than you requested -- instead you get no callback at all.
    397         duc = repairer.DownUpConnector()
    398         duc.registerProducer(None, True) # just because you have to call registerProducer first
    399 
    400         d = duc.read_encrypted(2, False)
    401         duc.write('\x04')
    402 
    403         def _callb(res):
    404             self.fail("Shouldn't have gotten this callback res: %s" % (res,))
    405         d.addCallback(_callb)
    406 
    407         # Also in the other order of read-vs-write:
    408         duc2 = repairer.DownUpConnector()
    409         duc2.registerProducer(None, True) # just because you have to call registerProducer first
    410         duc2.write('\x04')
    411         d = duc2.read_encrypted(2, False)
    412 
    413         def _callb2(res):
    414             self.fail("Shouldn't have gotten this callback res: %s" % (res,))
    415         d.addCallback(_callb2)
    416 
    417         # But once the DUC is closed then you *do* get short reads.
    418         duc3 = repairer.DownUpConnector()
    419         duc3.registerProducer(None, True) # just because you have to call registerProducer first
    420 
    421         d = duc3.read_encrypted(2, False)
    422         duc3.write('\x04')
    423         duc3.close()
    424         def _callb3(res):
    425             self.failUnlessEqual(len(res), 1)
    426             self.failUnlessEqual(res[0], '\x04')
    427         d.addCallback(_callb3)
    428         return d
    429 
    430     def test_short_reads_2(self):
    431         # Also in the other order of read-vs-write.
    432         duc = repairer.DownUpConnector()
    433         duc.registerProducer(None, True) # just because you have to call registerProducer first
    434 
    435         duc.write('\x04')
    436         d = duc.read_encrypted(2, False)
    437         duc.close()
    438 
    439         def _callb(res):
    440             self.failUnlessEqual(len(res), 1)
    441             self.failUnlessEqual(res[0], '\x04')
    442         d.addCallback(_callb)
    443         return d
    444 
    445     def test_short_reads_3(self):
    446         # Also if it is closed before the read.
    447         duc = repairer.DownUpConnector()
    448         duc.registerProducer(None, True) # just because you have to call registerProducer first
    449 
    450         duc.write('\x04')
    451         duc.close()
    452         d = duc.read_encrypted(2, False)
    453         def _callb(res):
    454             self.failUnlessEqual(len(res), 1)
    455             self.failUnlessEqual(res[0], '\x04')
    456         d.addCallback(_callb)
    457         return d
    458 
    459366class Repairer(GridTestMixin, unittest.TestCase, RepairTestMixin,
    460367               common.ShouldFailMixin):
    461368
  • src/allmydata/test/test_system.py

    diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py
    index 3351102..61662f2 100644
    a b from allmydata import uri 
    99from allmydata.storage.mutable import MutableShareFile
    1010from allmydata.storage.server import si_a2b
    1111from allmydata.immutable import offloaded, upload
    12 from allmydata.immutable.filenode import ImmutableFileNode, LiteralFileNode
     12from allmydata.immutable.literal import LiteralFileNode
     13from allmydata.immutable.filenode import ImmutableFileNode
    1314from allmydata.util import idlib, mathutil
    1415from allmydata.util import log, base32
    1516from allmydata.util.consumer import MemoryConsumer, download_to_data
  • src/allmydata/test/test_upload.py

    diff --git a/src/allmydata/test/test_upload.py b/src/allmydata/test/test_upload.py
    index 917472a..25d2d08 100644
    a b class EncodingParameters(GridTestMixin, unittest.TestCase, SetDEPMixin, 
    20862086#  upload with exactly 75 peers (shares_of_happiness)
    20872087#  have a download fail
    20882088#  cancel a download (need to implement more cancel stuff)
     2089
     2090# from test_encode:
     2091# NoNetworkGrid, upload part of ciphertext, kill server, continue upload
     2092# check with Kevan, they want to live in test_upload, existing tests might cover
     2093#     def test_lost_one_shareholder(self): # these are upload-side tests
     2094#     def test_lost_one_shareholder_early(self):
     2095#     def test_lost_many_shareholders(self):
     2096#     def test_lost_all_shareholders(self):
  • src/allmydata/test/test_util.py

    diff --git a/src/allmydata/test/test_util.py b/src/allmydata/test/test_util.py
    index 0a326b3..2fceee5 100644
    a b from twisted.trial import unittest 
    77from twisted.internet import defer, reactor
    88from twisted.python.failure import Failure
    99from twisted.python import log
     10from hashlib import md5
    1011
    1112from allmydata.util import base32, idlib, humanreadable, mathutil, hashutil
    1213from allmydata.util import assertutil, fileutil, deferredutil, abbreviate
    1314from allmydata.util import limiter, time_format, pollmixin, cachedir
    1415from allmydata.util import statistics, dictutil, pipeline
    1516from allmydata.util import log as tahoe_log
     17from allmydata.util.spans import Spans, overlap, DataSpans
    1618
    1719class Base32(unittest.TestCase):
    1820    def test_b2a_matches_Pythons(self):
    class Log(unittest.TestCase): 
    15371539        tahoe_log.err(format="intentional sample error",
    15381540                      failure=f, level=tahoe_log.OPERATIONAL, umid="wO9UoQ")
    15391541        self.flushLoggedErrors(SampleError)
     1542
     1543
     1544class SimpleSpans:
     1545    # this is a simple+inefficient form of util.spans.Spans . We compare the
     1546    # behavior of this reference model against the real (efficient) form.
     1547
     1548    def __init__(self, _span_or_start=None, length=None):
     1549        self._have = set()
     1550        if length is not None:
     1551            for i in range(_span_or_start, _span_or_start+length):
     1552                self._have.add(i)
     1553        elif _span_or_start:
     1554            for (start,length) in _span_or_start:
     1555                self.add(start, length)
     1556
     1557    def add(self, start, length):
     1558        for i in range(start, start+length):
     1559            self._have.add(i)
     1560        return self
     1561
     1562    def remove(self, start, length):
     1563        for i in range(start, start+length):
     1564            self._have.discard(i)
     1565        return self
     1566
     1567    def each(self):
     1568        return sorted(self._have)
     1569
     1570    def __iter__(self):
     1571        items = sorted(self._have)
     1572        prevstart = None
     1573        prevend = None
     1574        for i in items:
     1575            if prevstart is None:
     1576                prevstart = prevend = i
     1577                continue
     1578            if i == prevend+1:
     1579                prevend = i
     1580                continue
     1581            yield (prevstart, prevend-prevstart+1)
     1582            prevstart = prevend = i
     1583        if prevstart is not None:
     1584            yield (prevstart, prevend-prevstart+1)
     1585
     1586    def __len__(self):
     1587        # this also gets us bool(s)
     1588        return len(self._have)
     1589
     1590    def __add__(self, other):
     1591        s = self.__class__(self)
     1592        for (start, length) in other:
     1593            s.add(start, length)
     1594        return s
     1595
     1596    def __sub__(self, other):
     1597        s = self.__class__(self)
     1598        for (start, length) in other:
     1599            s.remove(start, length)
     1600        return s
     1601
     1602    def __iadd__(self, other):
     1603        for (start, length) in other:
     1604            self.add(start, length)
     1605        return self
     1606
     1607    def __isub__(self, other):
     1608        for (start, length) in other:
     1609            self.remove(start, length)
     1610        return self
     1611
     1612    def __and__(self, other):
     1613        s = self.__class__()
     1614        for i in other.each():
     1615            if i in self._have:
     1616                s.add(i, 1)
     1617        return s
     1618
     1619    def __contains__(self, (start,length)):
     1620        for i in range(start, start+length):
     1621            if i not in self._have:
     1622                return False
     1623        return True
     1624
     1625class ByteSpans(unittest.TestCase):
     1626    def test_basic(self):
     1627        s = Spans()
     1628        self.failUnlessEqual(list(s), [])
     1629        self.failIf(s)
     1630        self.failIf((0,1) in s)
     1631        self.failUnlessEqual(len(s), 0)
     1632
     1633        s1 = Spans(3, 4) # 3,4,5,6
     1634        self._check1(s1)
     1635
     1636        s2 = Spans(s1)
     1637        self._check1(s2)
     1638
     1639        s2.add(10,2) # 10,11
     1640        self._check1(s1)
     1641        self.failUnless((10,1) in s2)
     1642        self.failIf((10,1) in s1)
     1643        self.failUnlessEqual(list(s2.each()), [3,4,5,6,10,11])
     1644        self.failUnlessEqual(len(s2), 6)
     1645
     1646        s2.add(15,2).add(20,2)
     1647        self.failUnlessEqual(list(s2.each()), [3,4,5,6,10,11,15,16,20,21])
     1648        self.failUnlessEqual(len(s2), 10)
     1649
     1650        s2.remove(4,3).remove(15,1)
     1651        self.failUnlessEqual(list(s2.each()), [3,10,11,16,20,21])
     1652        self.failUnlessEqual(len(s2), 6)
     1653
     1654        s1 = SimpleSpans(3, 4) # 3 4 5 6
     1655        s2 = SimpleSpans(5, 4) # 5 6 7 8
     1656        i = s1 & s2
     1657        self.failUnlessEqual(list(i.each()), [5, 6])
     1658
     1659    def _check1(self, s):
     1660        self.failUnlessEqual(list(s), [(3,4)])
     1661        self.failUnless(s)
     1662        self.failUnlessEqual(len(s), 4)
     1663        self.failIf((0,1) in s)
     1664        self.failUnless((3,4) in s)
     1665        self.failUnless((3,1) in s)
     1666        self.failUnless((5,2) in s)
     1667        self.failUnless((6,1) in s)
     1668        self.failIf((6,2) in s)
     1669        self.failIf((7,1) in s)
     1670        self.failUnlessEqual(list(s.each()), [3,4,5,6])
     1671
     1672    def test_math(self):
     1673        s1 = Spans(0, 10) # 0,1,2,3,4,5,6,7,8,9
     1674        s2 = Spans(5, 3) # 5,6,7
     1675        s3 = Spans(8, 4) # 8,9,10,11
     1676
     1677        s = s1 - s2
     1678        self.failUnlessEqual(list(s.each()), [0,1,2,3,4,8,9])
     1679        s = s1 - s3
     1680        self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7])
     1681        s = s2 - s3
     1682        self.failUnlessEqual(list(s.each()), [5,6,7])
     1683        s = s1 & s2
     1684        self.failUnlessEqual(list(s.each()), [5,6,7])
     1685        s = s2 & s1
     1686        self.failUnlessEqual(list(s.each()), [5,6,7])
     1687        s = s1 & s3
     1688        self.failUnlessEqual(list(s.each()), [8,9])
     1689        s = s3 & s1
     1690        self.failUnlessEqual(list(s.each()), [8,9])
     1691        s = s2 & s3
     1692        self.failUnlessEqual(list(s.each()), [])
     1693        s = s3 & s2
     1694        self.failUnlessEqual(list(s.each()), [])
     1695        s = Spans() & s3
     1696        self.failUnlessEqual(list(s.each()), [])
     1697        s = s3 & Spans()
     1698        self.failUnlessEqual(list(s.each()), [])
     1699
     1700        s = s1 + s2
     1701        self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7,8,9])
     1702        s = s1 + s3
     1703        self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7,8,9,10,11])
     1704        s = s2 + s3
     1705        self.failUnlessEqual(list(s.each()), [5,6,7,8,9,10,11])
     1706
     1707        s = Spans(s1)
     1708        s -= s2
     1709        self.failUnlessEqual(list(s.each()), [0,1,2,3,4,8,9])
     1710        s = Spans(s1)
     1711        s -= s3
     1712        self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7])
     1713        s = Spans(s2)
     1714        s -= s3
     1715        self.failUnlessEqual(list(s.each()), [5,6,7])
     1716
     1717        s = Spans(s1)
     1718        s += s2
     1719        self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7,8,9])
     1720        s = Spans(s1)
     1721        s += s3
     1722        self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7,8,9,10,11])
     1723        s = Spans(s2)
     1724        s += s3
     1725        self.failUnlessEqual(list(s.each()), [5,6,7,8,9,10,11])
     1726
     1727    def test_random(self):
     1728        # attempt to increase coverage of corner cases by comparing behavior
     1729        # of a simple-but-slow model implementation against the
     1730        # complex-but-fast actual implementation, in a large number of random
     1731        # operations
     1732        S1 = SimpleSpans
     1733        S2 = Spans
     1734        s1 = S1(); s2 = S2()
     1735        seed = ""
     1736        def _create(subseed):
     1737            ns1 = S1(); ns2 = S2()
     1738            for i in range(10):
     1739                what = md5(subseed+str(i)).hexdigest()
     1740                start = int(what[2:4], 16)
     1741                length = max(1,int(what[5:6], 16))
     1742                ns1.add(start, length); ns2.add(start, length)
     1743            return ns1, ns2
     1744
     1745        #print
     1746        for i in range(1000):
     1747            what = md5(seed+str(i)).hexdigest()
     1748            op = what[0]
     1749            subop = what[1]
     1750            start = int(what[2:4], 16)
     1751            length = max(1,int(what[5:6], 16))
     1752            #print what
     1753            if op in "0":
     1754                if subop in "01234":
     1755                    s1 = S1(); s2 = S2()
     1756                elif subop in "5678":
     1757                    s1 = S1(start, length); s2 = S2(start, length)
     1758                else:
     1759                    s1 = S1(s1); s2 = S2(s2)
     1760                #print "s2 = %s" % s2.dump()
     1761            elif op in "123":
     1762                #print "s2.add(%d,%d)" % (start, length)
     1763                s1.add(start, length); s2.add(start, length)
     1764            elif op in "456":
     1765                #print "s2.remove(%d,%d)" % (start, length)
     1766                s1.remove(start, length); s2.remove(start, length)
     1767            elif op in "78":
     1768                ns1, ns2 = _create(what[7:11])
     1769                #print "s2 + %s" % ns2.dump()
     1770                s1 = s1 + ns1; s2 = s2 + ns2
     1771            elif op in "9a":
     1772                ns1, ns2 = _create(what[7:11])
     1773                #print "%s - %s" % (s2.dump(), ns2.dump())
     1774                s1 = s1 - ns1; s2 = s2 - ns2
     1775            elif op in "bc":
     1776                ns1, ns2 = _create(what[7:11])
     1777                #print "s2 += %s" % ns2.dump()
     1778                s1 += ns1; s2 += ns2
     1779            elif op in "de":
     1780                ns1, ns2 = _create(what[7:11])
     1781                #print "%s -= %s" % (s2.dump(), ns2.dump())
     1782                s1 -= ns1; s2 -= ns2
     1783            else:
     1784                ns1, ns2 = _create(what[7:11])
     1785                #print "%s &= %s" % (s2.dump(), ns2.dump())
     1786                s1 = s1 & ns1; s2 = s2 & ns2
     1787            #print "s2 now %s" % s2.dump()
     1788            self.failUnlessEqual(list(s1.each()), list(s2.each()))
     1789            self.failUnlessEqual(len(s1), len(s2))
     1790            self.failUnlessEqual(bool(s1), bool(s2))
     1791            self.failUnlessEqual(list(s1), list(s2))
     1792            for j in range(10):
     1793                what = md5(what[12:14]+str(j)).hexdigest()
     1794                start = int(what[2:4], 16)
     1795                length = max(1, int(what[5:6], 16))
     1796                span = (start, length)
     1797                self.failUnlessEqual(bool(span in s1), bool(span in s2))
     1798
     1799
     1800    # s()
     1801    # s(start,length)
     1802    # s(s0)
     1803    # s.add(start,length) : returns s
     1804    # s.remove(start,length)
     1805    # s.each() -> list of byte offsets, mostly for testing
     1806    # list(s) -> list of (start,length) tuples, one per span
     1807    # (start,length) in s -> True if (start..start+length-1) are all members
     1808    #  NOT equivalent to x in list(s)
     1809    # len(s) -> number of bytes, for testing, bool(), and accounting/limiting
     1810    # bool(s)  (__len__)
     1811    # s = s1+s2, s1-s2, +=s1, -=s1
     1812
     1813    def test_overlap(self):
     1814        for a in range(20):
     1815            for b in range(10):
     1816                for c in range(20):
     1817                    for d in range(10):
     1818                        self._test_overlap(a,b,c,d)
     1819
     1820    def _test_overlap(self, a, b, c, d):
     1821        s1 = set(range(a,a+b))
     1822        s2 = set(range(c,c+d))
     1823        #print "---"
     1824        #self._show_overlap(s1, "1")
     1825        #self._show_overlap(s2, "2")
     1826        o = overlap(a,b,c,d)
     1827        expected = s1.intersection(s2)
     1828        if not expected:
     1829            self.failUnlessEqual(o, None)
     1830        else:
     1831            start,length = o
     1832            so = set(range(start,start+length))
     1833            #self._show(so, "o")
     1834            self.failUnlessEqual(so, expected)
     1835
     1836    def _show_overlap(self, s, c):
     1837        import sys
     1838        out = sys.stdout
     1839        if s:
     1840            for i in range(max(s)):
     1841                if i in s:
     1842                    out.write(c)
     1843                else:
     1844                    out.write(" ")
     1845        out.write("\n")
     1846
     1847def extend(s, start, length, fill):
     1848    if len(s) >= start+length:
     1849        return s
     1850    assert len(fill) == 1
     1851    return s + fill*(start+length-len(s))
     1852
     1853def replace(s, start, data):
     1854    assert len(s) >= start+len(data)
     1855    return s[:start] + data + s[start+len(data):]
     1856
     1857class SimpleDataSpans:
     1858    def __init__(self, other=None):
     1859        self.missing = "" # "1" where missing, "0" where found
     1860        self.data = ""
     1861        if other:
     1862            for (start, data) in other.get_chunks():
     1863                self.add(start, data)
     1864
     1865    def __len__(self):
     1866        return len(self.missing.translate(None, "1"))
     1867    def _dump(self):
     1868        return [i for (i,c) in enumerate(self.missing) if c == "0"]
     1869    def _have(self, start, length):
     1870        m = self.missing[start:start+length]
     1871        if not m or len(m)<length or int(m):
     1872            return False
     1873        return True
     1874    def get_chunks(self):
     1875        for i in self._dump():
     1876            yield (i, self.data[i])
     1877    def get_spans(self):
     1878        return SimpleSpans([(start,len(data))
     1879                            for (start,data) in self.get_chunks()])
     1880    def get(self, start, length):
     1881        if self._have(start, length):
     1882            return self.data[start:start+length]
     1883        return None
     1884    def pop(self, start, length):
     1885        data = self.get(start, length)
     1886        if data:
     1887            self.remove(start, length)
     1888        return data
     1889    def remove(self, start, length):
     1890        self.missing = replace(extend(self.missing, start, length, "1"),
     1891                               start, "1"*length)
     1892    def add(self, start, data):
     1893        self.missing = replace(extend(self.missing, start, len(data), "1"),
     1894                               start, "0"*len(data))
     1895        self.data = replace(extend(self.data, start, len(data), " "),
     1896                            start, data)
     1897
     1898
     1899class StringSpans(unittest.TestCase):
     1900    def do_basic(self, klass):
     1901        ds = klass()
     1902        self.failUnlessEqual(len(ds), 0)
     1903        self.failUnlessEqual(list(ds._dump()), [])
     1904        self.failUnlessEqual(sum([len(d) for (s,d) in ds.get_chunks()]), 0)
     1905        s = ds.get_spans()
     1906        self.failUnlessEqual(ds.get(0, 4), None)
     1907        self.failUnlessEqual(ds.pop(0, 4), None)
     1908        ds.remove(0, 4)
     1909
     1910        ds.add(2, "four")
     1911        self.failUnlessEqual(len(ds), 4)
     1912        self.failUnlessEqual(list(ds._dump()), [2,3,4,5])
     1913        self.failUnlessEqual(sum([len(d) for (s,d) in ds.get_chunks()]), 4)
     1914        s = ds.get_spans()
     1915        self.failUnless((2,2) in s)
     1916        self.failUnlessEqual(ds.get(0, 4), None)
     1917        self.failUnlessEqual(ds.pop(0, 4), None)
     1918        self.failUnlessEqual(ds.get(4, 4), None)
     1919
     1920        ds2 = klass(ds)
     1921        self.failUnlessEqual(len(ds2), 4)
     1922        self.failUnlessEqual(list(ds2._dump()), [2,3,4,5])
     1923        self.failUnlessEqual(sum([len(d) for (s,d) in ds2.get_chunks()]), 4)
     1924        self.failUnlessEqual(ds2.get(0, 4), None)
     1925        self.failUnlessEqual(ds2.pop(0, 4), None)
     1926        self.failUnlessEqual(ds2.pop(2, 3), "fou")
     1927        self.failUnlessEqual(sum([len(d) for (s,d) in ds2.get_chunks()]), 1)
     1928        self.failUnlessEqual(ds2.get(2, 3), None)
     1929        self.failUnlessEqual(ds2.get(5, 1), "r")
     1930        self.failUnlessEqual(ds.get(2, 3), "fou")
     1931        self.failUnlessEqual(sum([len(d) for (s,d) in ds.get_chunks()]), 4)
     1932
     1933        ds.add(0, "23")
     1934        self.failUnlessEqual(len(ds), 6)
     1935        self.failUnlessEqual(list(ds._dump()), [0,1,2,3,4,5])
     1936        self.failUnlessEqual(sum([len(d) for (s,d) in ds.get_chunks()]), 6)
     1937        self.failUnlessEqual(ds.get(0, 4), "23fo")
     1938        self.failUnlessEqual(ds.pop(0, 4), "23fo")
     1939        self.failUnlessEqual(sum([len(d) for (s,d) in ds.get_chunks()]), 2)
     1940        self.failUnlessEqual(ds.get(0, 4), None)
     1941        self.failUnlessEqual(ds.pop(0, 4), None)
     1942
     1943        ds = klass()
     1944        ds.add(2, "four")
     1945        ds.add(3, "ea")
     1946        self.failUnlessEqual(ds.get(2, 4), "fear")
     1947
     1948    def do_scan(self, klass):
     1949        # do a test with gaps and spans of size 1 and 2
     1950        #  left=(1,11) * right=(1,11) * gapsize=(1,2)
     1951        # 111, 112, 121, 122, 211, 212, 221, 222
     1952        #    211
     1953        #      121
     1954        #         112
     1955        #            212
     1956        #               222
     1957        #                   221
     1958        #                      111
     1959        #                        122
     1960        #  11 1  1 11 11  11  1 1  111
     1961        # 0123456789012345678901234567
     1962        # abcdefghijklmnopqrstuvwxyz-=
     1963        pieces = [(1, "bc"),
     1964                  (4, "e"),
     1965                  (7, "h"),
     1966                  (9, "jk"),
     1967                  (12, "mn"),
     1968                  (16, "qr"),
     1969                  (20, "u"),
     1970                  (22, "w"),
     1971                  (25, "z-="),
     1972                  ]
     1973        p_elements = set([1,2,4,7,9,10,12,13,16,17,20,22,25,26,27])
     1974        S = "abcdefghijklmnopqrstuvwxyz-="
     1975        # TODO: when adding data, add capital letters, to make sure we aren't
     1976        # just leaving the old data in place
     1977        l = len(S)
     1978        def base():
     1979            ds = klass()
     1980            for start, data in pieces:
     1981                ds.add(start, data)
     1982            return ds
     1983        def dump(s):
     1984            p = set(s._dump())
     1985            # wow, this is the first time I've ever wanted ?: in python
     1986            # note: this requires python2.5
     1987            d = "".join([(S[i] if i in p else " ") for i in range(l)])
     1988            assert len(d) == l
     1989            return d
     1990        DEBUG = False
     1991        for start in range(0, l):
     1992            for end in range(start+1, l):
     1993                # add [start-end) to the baseline
     1994                which = "%d-%d" % (start, end-1)
     1995                p_added = set(range(start, end))
     1996                b = base()
     1997                if DEBUG:
     1998                    print
     1999                    print dump(b), which
     2000                    add = klass(); add.add(start, S[start:end])
     2001                    print dump(add)
     2002                b.add(start, S[start:end])
     2003                if DEBUG:
     2004                    print dump(b)
     2005                # check that the new span is there
     2006                d = b.get(start, end-start)
     2007                self.failUnlessEqual(d, S[start:end], which)
     2008                # check that all the original pieces are still there
     2009                for t_start, t_data in pieces:
     2010                    t_len = len(t_data)
     2011                    self.failUnlessEqual(b.get(t_start, t_len),
     2012                                         S[t_start:t_start+t_len],
     2013                                         "%s %d+%d" % (which, t_start, t_len))
     2014                # check that a lot of subspans are mostly correct
     2015                for t_start in range(l):
     2016                    for t_len in range(1,4):
     2017                        d = b.get(t_start, t_len)
     2018                        if d is not None:
     2019                            which2 = "%s+(%d-%d)" % (which, t_start,
     2020                                                     t_start+t_len-1)
     2021                            self.failUnlessEqual(d, S[t_start:t_start+t_len],
     2022                                                 which2)
     2023                        # check that removing a subspan gives the right value
     2024                        b2 = klass(b)
     2025                        b2.remove(t_start, t_len)
     2026                        removed = set(range(t_start, t_start+t_len))
     2027                        for i in range(l):
     2028                            exp = (((i in p_elements) or (i in p_added))
     2029                                   and (i not in removed))
     2030                            which2 = "%s-(%d-%d)" % (which, t_start,
     2031                                                     t_start+t_len-1)
     2032                            self.failUnlessEqual(bool(b2.get(i, 1)), exp,
     2033                                                 which2+" %d" % i)
     2034
     2035    def test_test(self):
     2036        self.do_basic(SimpleDataSpans)
     2037        self.do_scan(SimpleDataSpans)
     2038
     2039    def test_basic(self):
     2040        self.do_basic(DataSpans)
     2041        self.do_scan(DataSpans)
     2042
     2043    def test_random(self):
     2044        # attempt to increase coverage of corner cases by comparing behavior
     2045        # of a simple-but-slow model implementation against the
     2046        # complex-but-fast actual implementation, in a large number of random
     2047        # operations
     2048        S1 = SimpleDataSpans
     2049        S2 = DataSpans
     2050        s1 = S1(); s2 = S2()
     2051        seed = ""
     2052        def _randstr(length, seed):
     2053            created = 0
     2054            pieces = []
     2055            while created < length:
     2056                piece = md5(seed + str(created)).hexdigest()
     2057                pieces.append(piece)
     2058                created += len(piece)
     2059            return "".join(pieces)[:length]
     2060        def _create(subseed):
     2061            ns1 = S1(); ns2 = S2()
     2062            for i in range(10):
     2063                what = md5(subseed+str(i)).hexdigest()
     2064                start = int(what[2:4], 16)
     2065                length = max(1,int(what[5:6], 16))
     2066                ns1.add(start, _randstr(length, what[7:9]));
     2067                ns2.add(start, _randstr(length, what[7:9]))
     2068            return ns1, ns2
     2069
     2070        #print
     2071        for i in range(1000):
     2072            what = md5(seed+str(i)).hexdigest()
     2073            op = what[0]
     2074            subop = what[1]
     2075            start = int(what[2:4], 16)
     2076            length = max(1,int(what[5:6], 16))
     2077            #print what
     2078            if op in "0":
     2079                if subop in "0123456":
     2080                    s1 = S1(); s2 = S2()
     2081                else:
     2082                    s1, s2 = _create(what[7:11])
     2083                #print "s2 = %s" % list(s2._dump())
     2084            elif op in "123456":
     2085                #print "s2.add(%d,%d)" % (start, length)
     2086                s1.add(start, _randstr(length, what[7:9]));
     2087                s2.add(start, _randstr(length, what[7:9]))
     2088            elif op in "789abc":
     2089                #print "s2.remove(%d,%d)" % (start, length)
     2090                s1.remove(start, length); s2.remove(start, length)
     2091            else:
     2092                #print "s2.pop(%d,%d)" % (start, length)
     2093                d1 = s1.pop(start, length); d2 = s2.pop(start, length)
     2094                self.failUnlessEqual(d1, d2)
     2095            #print "s1 now %s" % list(s1._dump())
     2096            #print "s2 now %s" % list(s2._dump())
     2097            self.failUnlessEqual(len(s1), len(s2))
     2098            self.failUnlessEqual(list(s1._dump()), list(s2._dump()))
     2099            for j in range(100):
     2100                what = md5(what[12:14]+str(j)).hexdigest()
     2101                start = int(what[2:4], 16)
     2102                length = max(1, int(what[5:6], 16))
     2103                d1 = s1.get(start, length); d2 = s2.get(start, length)
     2104                self.failUnlessEqual(d1, d2, "%d+%d" % (start, length))
  • src/allmydata/test/test_web.py

    diff --git a/src/allmydata/test/test_web.py b/src/allmydata/test/test_web.py
    index 3770864..a707722 100644
    a b from nevow import rend 
    1212from allmydata import interfaces, uri, webish, dirnode
    1313from allmydata.storage.shares import get_share_file
    1414from allmydata.storage_client import StorageFarmBroker
    15 from allmydata.immutable import upload, download
     15from allmydata.immutable import upload
     16from allmydata.immutable.downloader.status import DownloadStatus
    1617from allmydata.dirnode import DirectoryNode
    1718from allmydata.nodemaker import NodeMaker
    1819from allmydata.unknown import UnknownNode
    class FakeUploader(service.Service): 
    7576
    7677class FakeHistory:
    7778    _all_upload_status = [upload.UploadStatus()]
    78     _all_download_status = [download.DownloadStatus()]
     79    _all_download_status = [DownloadStatus("storage_index", 1234)]
    7980    _all_mapupdate_statuses = [servermap.UpdateStatus()]
    8081    _all_publish_statuses = [publish.PublishStatus()]
    8182    _all_retrieve_statuses = [retrieve.RetrieveStatus()]
    class FakeClient(Client): 
    111112        self.uploader = FakeUploader()
    112113        self.uploader.setServiceParent(self)
    113114        self.nodemaker = FakeNodeMaker(None, self._secret_holder, None,
    114                                        self.uploader, None, None,
     115                                       self.uploader, None,
    115116                                       None, None)
    116117
    117118    def startService(self):
    class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi 
    41864187                   "no servers were connected, but it might also indicate "
    41874188                   "severe corruption. You should perform a filecheck on "
    41884189                   "this object to learn more. The full error message is: "
    4189                    "Failed to get enough shareholders: have 0, need 3")
     4190                   "no shares (need 3). Last failure: None")
    41904191            self.failUnlessReallyEqual(exp, body)
    41914192        d.addCallback(_check_zero_shares)
    41924193
    class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi 
    41984199        def _check_one_share(body):
    41994200            self.failIf("<html>" in body, body)
    42004201            body = " ".join(body.strip().split())
    4201             exp = ("NotEnoughSharesError: This indicates that some "
     4202            msg = ("NotEnoughSharesError: This indicates that some "
    42024203                   "servers were unavailable, or that shares have been "
    42034204                   "lost to server departure, hard drive failure, or disk "
    42044205                   "corruption. You should perform a filecheck on "
    42054206                   "this object to learn more. The full error message is:"
    4206                    " Failed to get enough shareholders: have 1, need 3")
    4207             self.failUnlessReallyEqual(exp, body)
     4207                   " ran out of shares: %d complete, %d pending, 0 overdue,"
     4208                   " 0 unused, need 3. Last failure: None")
     4209            msg1 = msg % (1, 0)
     4210            msg2 = msg % (0, 1)
     4211            self.failUnless(body == msg1 or body == msg2, body)
    42084212        d.addCallback(_check_one_share)
    42094213
    42104214        d.addCallback(lambda ignored:
  • src/allmydata/util/observer.py

    diff --git a/src/allmydata/util/observer.py b/src/allmydata/util/observer.py
    index 13e4b51..3dc1d27 100644
    a b  
    11# -*- test-case-name: allmydata.test.test_observer -*-
    22
     3import weakref
    34from twisted.internet import defer
    45from foolscap.api import eventually
    56
    class ObserverList: 
    9192    def notify(self, *args, **kwargs):
    9293        for o in self._watchers:
    9394            eventually(o, *args, **kwargs)
     95
     96class EventStreamObserver:
     97    """A simple class to distribute multiple events to a single subscriber.
     98    It accepts arbitrary kwargs, but no posargs."""
     99    def __init__(self):
     100        self._watcher = None
     101        self._undelivered_results = []
     102        self._canceler = None
     103
     104    def set_canceler(self, c, methname):
     105        """I will call c.METHNAME(self) when somebody cancels me."""
     106        # we use a weakref to avoid creating a cycle between us and the thing
     107        # we're observing: they'll be holding a reference to us to compare
     108        # against the value we pass to their canceler function. However,
     109        # since bound methods are first-class objects (and not kept alive by
     110        # the object they're bound to), we can't just stash a weakref to the
     111        # bound cancel method. Instead, we must hold a weakref to the actual
     112        # object, and obtain its cancel method later.
     113        # http://code.activestate.com/recipes/81253-weakmethod/ has an
     114        # alternative.
     115        self._canceler = (weakref.ref(c), methname)
     116
     117    def subscribe(self, observer, **watcher_kwargs):
     118        self._watcher = (observer, watcher_kwargs)
     119        while self._undelivered_results:
     120            self._notify(self._undelivered_results.pop(0))
     121
     122    def notify(self, **result_kwargs):
     123        if self._watcher:
     124            self._notify(result_kwargs)
     125        else:
     126            self._undelivered_results.append(result_kwargs)
     127
     128    def _notify(self, result_kwargs):
     129        o, watcher_kwargs = self._watcher
     130        kwargs = dict(result_kwargs)
     131        kwargs.update(watcher_kwargs)
     132        eventually(o, **kwargs)
     133
     134    def cancel(self):
     135        wr,methname = self._canceler
     136        o = wr()
     137        if o:
     138            getattr(o,methname)(self)
  • new file src/allmydata/util/spans.py

    diff --git a/src/allmydata/util/spans.py b/src/allmydata/util/spans.py
    new file mode 100755
    index 0000000..2a199f0
    - +  
     1
     2class Spans:
     3    """I represent a compressed list of booleans, one per index (an integer).
     4    Typically, each index represents an offset into a large string, pointing
     5    to a specific byte of a share. In this context, True means that byte has
     6    been received, or has been requested.
     7
     8    Another way to look at this is maintaining a set of integers, optimized
     9    for operations on spans like 'add range to set' and 'is range in set?'.
     10
     11    This is a python equivalent of perl's Set::IntSpan module, frequently
     12    used to represent .newsrc contents.
     13
     14    Rather than storing an actual (large) list or dictionary, I represent my
     15    internal state as a sorted list of spans, each with a start and a length.
     16    My API is presented in terms of start+length pairs. I provide set
     17    arithmetic operators, to efficiently answer questions like 'I want bytes
     18    XYZ, I already requested bytes ABC, and I've already received bytes DEF:
     19    what bytes should I request now?'.
     20
     21    The new downloader will use it to keep track of which bytes we've requested
     22    or received already.
     23    """
     24
     25    def __init__(self, _span_or_start=None, length=None):
     26        self._spans = list()
     27        if length is not None:
     28            self._spans.append( (_span_or_start, length) )
     29        elif _span_or_start:
     30            for (start,length) in _span_or_start:
     31                self.add(start, length)
     32        self._check()
     33
     34    def _check(self):
     35        assert sorted(self._spans) == self._spans
     36        prev_end = None
     37        try:
     38            for (start,length) in self._spans:
     39                if prev_end is not None:
     40                    assert start > prev_end
     41                prev_end = start+length
     42        except AssertionError:
     43            print "BAD:", self.dump()
     44            raise
     45
     46    def add(self, start, length):
     47        assert start >= 0
     48        assert length > 0
     49        #print " ADD [%d+%d -%d) to %s" % (start, length, start+length, self.dump())
     50        first_overlap = last_overlap = None
     51        for i,(s_start,s_length) in enumerate(self._spans):
     52            #print "  (%d+%d)-> overlap=%s adjacent=%s" % (s_start,s_length, overlap(s_start, s_length, start, length), adjacent(s_start, s_length, start, length))
     53            if (overlap(s_start, s_length, start, length)
     54                or adjacent(s_start, s_length, start, length)):
     55                last_overlap = i
     56                if first_overlap is None:
     57                    first_overlap = i
     58                continue
     59            # no overlap
     60            if first_overlap is not None:
     61                break
     62        #print "  first_overlap", first_overlap, last_overlap
     63        if first_overlap is None:
     64            # no overlap, so just insert the span and sort by starting
     65            # position.
     66            self._spans.insert(0, (start,length))
     67            self._spans.sort()
     68        else:
     69            # everything from [first_overlap] to [last_overlap] overlapped
     70            first_start,first_length = self._spans[first_overlap]
     71            last_start,last_length = self._spans[last_overlap]
     72            newspan_start = min(start, first_start)
     73            newspan_end = max(start+length, last_start+last_length)
     74            newspan_length = newspan_end - newspan_start
     75            newspan = (newspan_start, newspan_length)
     76            self._spans[first_overlap:last_overlap+1] = [newspan]
     77        #print "  ADD done: %s" % self.dump()
     78        self._check()
     79
     80        return self
     81
     82    def remove(self, start, length):
     83        assert start >= 0
     84        assert length > 0
     85        #print " REMOVE [%d+%d -%d) from %s" % (start, length, start+length, self.dump())
     86        first_complete_overlap = last_complete_overlap = None
     87        for i,(s_start,s_length) in enumerate(self._spans):
     88            s_end = s_start + s_length
     89            o = overlap(s_start, s_length, start, length)
     90            if o:
     91                o_start, o_length = o
     92                o_end = o_start+o_length
     93                if o_start == s_start and o_end == s_end:
     94                    # delete this span altogether
     95                    if first_complete_overlap is None:
     96                        first_complete_overlap = i
     97                    last_complete_overlap = i
     98                elif o_start == s_start:
     99                    # we only overlap the left side, so trim the start
     100                    #    1111
     101                    #  rrrr
     102                    #    oo
     103                    # ->   11
     104                    new_start = o_end
     105                    new_end = s_end
     106                    assert new_start > s_start
     107                    new_length = new_end - new_start
     108                    self._spans[i] = (new_start, new_length)
     109                elif o_end == s_end:
     110                    # we only overlap the right side
     111                    #    1111
     112                    #      rrrr
     113                    #      oo
     114                    # -> 11
     115                    new_start = s_start
     116                    new_end = o_start
     117                    assert new_end < s_end
     118                    new_length = new_end - new_start
     119                    self._spans[i] = (new_start, new_length)
     120                else:
     121                    # we overlap the middle, so create a new span. No need to
     122                    # examine any other spans.
     123                    #    111111
     124                    #      rr
     125                    #    LL  RR
     126                    left_start = s_start
     127                    left_end = o_start
     128                    left_length = left_end - left_start
     129                    right_start = o_end
     130                    right_end = s_end
     131                    right_length = right_end - right_start
     132                    self._spans[i] = (left_start, left_length)
     133                    self._spans.append( (right_start, right_length) )
     134                    self._spans.sort()
     135                    break
     136        if first_complete_overlap is not None:
     137            del self._spans[first_complete_overlap:last_complete_overlap+1]
     138        #print "  REMOVE done: %s" % self.dump()
     139        self._check()
     140        return self
     141
     142    def dump(self):
     143        return "len=%d: %s" % (len(self),
     144                               ",".join(["[%d-%d]" % (start,start+l-1)
     145                                         for (start,l) in self._spans]) )
     146
     147    def each(self):
     148        for start, length in self._spans:
     149            for i in range(start, start+length):
     150                yield i
     151
     152    def __iter__(self):
     153        for s in self._spans:
     154            yield s
     155
     156    def __len__(self):
     157        # this also gets us bool(s)
     158        return sum([length for start,length in self._spans])
     159
     160    def __add__(self, other):
     161        s = self.__class__(self)
     162        for (start, length) in other:
     163            s.add(start, length)
     164        return s
     165
     166    def __sub__(self, other):
     167        s = self.__class__(self)
     168        for (start, length) in other:
     169            s.remove(start, length)
     170        return s
     171
     172    def __iadd__(self, other):
     173        for (start, length) in other:
     174            self.add(start, length)
     175        return self
     176
     177    def __isub__(self, other):
     178        for (start, length) in other:
     179            self.remove(start, length)
     180        return self
     181
     182    def __and__(self, other):
     183        if not self._spans:
     184            return self.__class__()
     185        bounds = self.__class__(self._spans[0][0],
     186                                self._spans[-1][0]+self._spans[-1][1])
     187        not_other = bounds - other
     188        return self - not_other
     189
     190    def __contains__(self, (start,length)):
     191        for span_start,span_length in self._spans:
     192            o = overlap(start, length, span_start, span_length)
     193            if o:
     194                o_start,o_length = o
     195                if o_start == start and o_length == length:
     196                    return True
     197        return False
     198
     199def overlap(start0, length0, start1, length1):
     200    # return start2,length2 of the overlapping region, or None
     201    #  00      00   000   0000  00  00 000  00   00  00      00
     202    #     11    11   11    11   111 11 11  1111 111 11    11
     203    left = max(start0, start1)
     204    right = min(start0+length0, start1+length1)
     205    # if there is overlap, 'left' will be its start, and right-1 will
     206    # be the end'
     207    if left < right:
     208        return (left, right-left)
     209    return None
     210
     211def adjacent(start0, length0, start1, length1):
     212    if (start0 < start1) and start0+length0 == start1:
     213        return True
     214    elif (start1 < start0) and start1+length1 == start0:
     215        return True
     216    return False
     217
     218class DataSpans:
     219    """I represent portions of a large string. Equivalently, I can be said to
     220    maintain a large array of characters (with gaps of empty elements). I can
     221    be used to manage access to a remote share, where some pieces have been
     222    retrieved, some have been requested, and others have not been read.
     223    """
     224
     225    def __init__(self, other=None):
     226        self.spans = [] # (start, data) tuples, non-overlapping, merged
     227        if other:
     228            for (start, data) in other.get_chunks():
     229                self.add(start, data)
     230
     231    def __len__(self):
     232        # return number of bytes we're holding
     233        return sum([len(data) for (start,data) in self.spans])
     234
     235    def _dump(self):
     236        # return iterator of sorted list of offsets, one per byte
     237        for (start,data) in self.spans:
     238            for i in range(start, start+len(data)):
     239                yield i
     240
     241    def dump(self):
     242        return "len=%d: %s" % (len(self),
     243                               ",".join(["[%d-%d]" % (start,start+len(data)-1)
     244                                         for (start,data) in self.spans]) )
     245
     246    def get_chunks(self):
     247        return list(self.spans)
     248
     249    def get_spans(self):
     250        """Return a Spans object with a bit set for each byte I hold"""
     251        return Spans([(start, len(data)) for (start,data) in self.spans])
     252
     253    def assert_invariants(self):
     254        if not self.spans:
     255            return
     256        prev_start = self.spans[0][0]
     257        prev_end = prev_start + len(self.spans[0][1])
     258        for start, data in self.spans[1:]:
     259            if not start > prev_end:
     260                # adjacent or overlapping: bad
     261                print "ASSERTION FAILED", self.spans
     262                raise AssertionError
     263
     264    def get(self, start, length):
     265        # returns a string of LENGTH, or None
     266        #print "get", start, length, self.spans
     267        end = start+length
     268        for (s_start,s_data) in self.spans:
     269            s_end = s_start+len(s_data)
     270            #print " ",s_start,s_end
     271            if s_start <= start < s_end:
     272                # we want some data from this span. Because we maintain
     273                # strictly merged and non-overlapping spans, everything we
     274                # want must be in this span.
     275                offset = start - s_start
     276                if offset + length > len(s_data):
     277                    #print " None, span falls short"
     278                    return None # span falls short
     279                #print " some", s_data[offset:offset+length]
     280                return s_data[offset:offset+length]
     281            if s_start >= end:
     282                # we've gone too far: no further spans will overlap
     283                #print " None, gone too far"
     284                return None
     285        #print " None, ran out of spans"
     286        return None
     287
     288    def add(self, start, data):
     289        # first: walk through existing spans, find overlap, modify-in-place
     290        #  create list of new spans
     291        #  add new spans
     292        #  sort
     293        #  merge adjacent spans
     294        #print "add", start, data, self.spans
     295        end = start + len(data)
     296        i = 0
     297        while len(data):
     298            #print " loop", start, data, i, len(self.spans), self.spans
     299            if i >= len(self.spans):
     300                #print " append and done"
     301                # append a last span
     302                self.spans.append( (start, data) )
     303                break
     304            (s_start,s_data) = self.spans[i]
     305            # five basic cases:
     306            #  a: OLD  b:OLDD  c1:OLD  c2:OLD   d1:OLDD  d2:OLD  e: OLLDD
     307            #    NEW     NEW      NEW     NEWW      NEW      NEW     NEW
     308            #
     309            # we handle A by inserting a new segment (with "N") and looping,
     310            # turning it into B or C. We handle B by replacing a prefix and
     311            # terminating. We handle C (both c1 and c2) by replacing the
     312            # segment (and, for c2, looping, turning it into A). We handle D
     313            # by replacing a suffix (and, for d2, looping, turning it into
     314            # A). We handle E by replacing the middle and terminating.
     315            if start < s_start:
     316                # case A: insert a new span, then loop with the remainder
     317                #print " insert new psan"
     318                s_len = s_start-start
     319                self.spans.insert(i, (start, data[:s_len]))
     320                i += 1
     321                start = s_start
     322                data = data[s_len:]
     323                continue
     324            s_len = len(s_data)
     325            s_end = s_start+s_len
     326            if s_start <= start < s_end:
     327                #print " modify this span", s_start, start, s_end
     328                # we want to modify some data in this span: a prefix, a
     329                # suffix, or the whole thing
     330                if s_start == start:
     331                    if s_end <= end:
     332                        #print " replace whole segment"
     333                        # case C: replace this segment
     334                        self.spans[i] = (s_start, data[:s_len])
     335                        i += 1
     336                        start += s_len
     337                        data = data[s_len:]
     338                        # C2 is where len(data)>0
     339                        continue
     340                    # case B: modify the prefix, retain the suffix
     341                    #print " modify prefix"
     342                    self.spans[i] = (s_start, data + s_data[len(data):])
     343                    break
     344                if start > s_start and end < s_end:
     345                    # case E: modify the middle
     346                    #print " modify middle"
     347                    prefix_len = start - s_start # we retain this much
     348                    suffix_len = s_end - end # and retain this much
     349                    newdata = s_data[:prefix_len] + data + s_data[-suffix_len:]
     350                    self.spans[i] = (s_start, newdata)
     351                    break
     352                # case D: retain the prefix, modify the suffix
     353                #print " modify suffix"
     354                prefix_len = start - s_start # we retain this much
     355                suffix_len = s_len - prefix_len # we replace this much
     356                #print "  ", s_data, prefix_len, suffix_len, s_len, data
     357                self.spans[i] = (s_start,
     358                                 s_data[:prefix_len] + data[:suffix_len])
     359                i += 1
     360                start += suffix_len
     361                data = data[suffix_len:]
     362                #print "  now", start, data
     363                # D2 is where len(data)>0
     364                continue
     365            # else we're not there yet
     366            #print " still looking"
     367            i += 1
     368            continue
     369        # now merge adjacent spans
     370        #print " merging", self.spans
     371        newspans = []
     372        for (s_start,s_data) in self.spans:
     373            if newspans and adjacent(newspans[-1][0], len(newspans[-1][1]),
     374                                     s_start, len(s_data)):
     375                newspans[-1] = (newspans[-1][0], newspans[-1][1] + s_data)
     376            else:
     377                newspans.append( (s_start, s_data) )
     378        self.spans = newspans
     379        self.assert_invariants()
     380        #print " done", self.spans
     381
     382    def remove(self, start, length):
     383        i = 0
     384        end = start + length
     385        #print "remove", start, length, self.spans
     386        while i < len(self.spans):
     387            (s_start,s_data) = self.spans[i]
     388            if s_start >= end:
     389                # this segment is entirely right of the removed region, and
     390                # all further segments are even further right. We're done.
     391                break
     392            s_len = len(s_data)
     393            s_end = s_start + s_len
     394            o = overlap(start, length, s_start, s_len)
     395            if not o:
     396                i += 1
     397                continue
     398            o_start, o_len = o
     399            o_end = o_start + o_len
     400            if o_len == s_len:
     401                # remove the whole segment
     402                del self.spans[i]
     403                continue
     404            if o_start == s_start:
     405                # remove a prefix, leaving the suffix from o_end to s_end
     406                prefix_len = o_end - o_start
     407                self.spans[i] = (o_end, s_data[prefix_len:])
     408                i += 1
     409                continue
     410            elif o_end == s_end:
     411                # remove a suffix, leaving the prefix from s_start to o_start
     412                prefix_len = o_start - s_start
     413                self.spans[i] = (s_start, s_data[:prefix_len])
     414                i += 1
     415                continue
     416            # remove the middle, creating a new segment
     417            # left is s_start:o_start, right is o_end:s_end
     418            left_len = o_start - s_start
     419            left = s_data[:left_len]
     420            right_len = s_end - o_end
     421            right = s_data[-right_len:]
     422            self.spans[i] = (s_start, left)
     423            self.spans.insert(i+1, (o_end, right))
     424            break
     425        #print " done", self.spans
     426
     427    def pop(self, start, length):
     428        data = self.get(start, length)
     429        if data:
     430            self.remove(start, length)
     431        return data
  • src/allmydata/web/download-status.xhtml

    diff --git a/src/allmydata/web/download-status.xhtml b/src/allmydata/web/download-status.xhtml
    index 77342ba..30abfca 100644
    a b  
    1818  <li>Status: <span n:render="status"/></li>
    1919</ul>
    2020
     21<div n:render="events"></div>
    2122
    2223<div n:render="results">
    2324  <h2>Download Results</h2>
  • src/allmydata/web/status.py

    diff --git a/src/allmydata/web/status.py b/src/allmydata/web/status.py
    index e4241a3..c3a55d7 100644
    a b class DownloadStatusPage(DownloadResultsRendererMixin, rend.Page): 
    358358    def download_results(self):
    359359        return defer.maybeDeferred(self.download_status.get_results)
    360360
     361    def relative_time(self, t):
     362        if t is None:
     363            return t
     364        if self.download_status.started is not None:
     365            return t - self.download_status.started
     366        return t
     367    def short_relative_time(self, t):
     368        t = self.relative_time(t)
     369        if t is None:
     370            return ""
     371        return "+%.6fs" % t
     372
     373    def renderHTTP(self, ctx):
     374        req = inevow.IRequest(ctx)
     375        t = get_arg(req, "t")
     376        if t == "json":
     377            return self.json(req)
     378        return rend.Page.renderHTTP(self, ctx)
     379
     380    def json(self, req):
     381        req.setHeader("content-type", "text/plain")
     382        data = {}
     383        dyhb_events = []
     384        for serverid,requests in self.download_status.dyhb_requests.iteritems():
     385            for req in requests:
     386                dyhb_events.append( (base32.b2a(serverid),) + req )
     387        dyhb_events.sort(key=lambda req: req[1])
     388        data["dyhb"] = dyhb_events
     389        request_events = []
     390        for serverid,requests in self.download_status.requests.iteritems():
     391            for req in requests:
     392                request_events.append( (base32.b2a(serverid),) + req )
     393        request_events.sort(key=lambda req: (req[4],req[1]))
     394        data["requests"] = request_events
     395        data["segment"] = self.download_status.segment_events
     396        data["read"] = self.download_status.read_events
     397        return simplejson.dumps(data, indent=1) + "\n"
     398
     399    def render_events(self, ctx, data):
     400        if not self.download_status.storage_index:
     401            return
     402        srt = self.short_relative_time
     403        l = T.ul()
     404
     405        t = T.table(class_="status-download-events")
     406        t[T.tr[T.td["serverid"], T.td["sent"], T.td["received"],
     407               T.td["shnums"], T.td["RTT"]]]
     408        dyhb_events = []
     409        for serverid,requests in self.download_status.dyhb_requests.iteritems():
     410            for req in requests:
     411                dyhb_events.append( (serverid,) + req )
     412        dyhb_events.sort(key=lambda req: req[1])
     413        for d_ev in dyhb_events:
     414            (serverid, sent, shnums, received) = d_ev
     415            serverid_s = idlib.shortnodeid_b2a(serverid)
     416            rtt = received - sent
     417            t[T.tr(style="background: %s" % self.color(serverid))[
     418                [T.td[serverid_s], T.td[srt(sent)], T.td[srt(received)],
     419                 T.td[",".join([str(shnum) for shnum in shnums])],
     420                 T.td[self.render_time(None, rtt)],
     421                 ]]]
     422        l["DYHB Requests:", t]
     423
     424        t = T.table(class_="status-download-events")
     425        t[T.tr[T.td["range"], T.td["start"], T.td["finish"], T.td["got"],
     426               T.td["time"], T.td["decrypttime"], T.td["pausedtime"],
     427               T.td["speed"]]]
     428        for r_ev in self.download_status.read_events:
     429            (start, length, requesttime, finishtime, bytes, decrypt, paused) = r_ev
     430            print r_ev
     431            if finishtime is not None:
     432                rtt = finishtime - requesttime - paused
     433                speed = self.render_rate(None, 1.0 * bytes / rtt)
     434                rtt = self.render_time(None, rtt)
     435                decrypt = self.render_time(None, decrypt)
     436                paused = self.render_time(None, paused)
     437            else:
     438                speed, rtt, decrypt, paused = "","","",""
     439            t[T.tr[T.td["[%d:+%d]" % (start, length)],
     440                   T.td[srt(requesttime)], T.td[srt(finishtime)],
     441                   T.td[bytes], T.td[rtt], T.td[decrypt], T.td[paused],
     442                   T.td[speed],
     443                   ]]
     444        l["Read Events:", t]
     445
     446        t = T.table(class_="status-download-events")
     447        t[T.tr[T.td["type"], T.td["segnum"], T.td["when"], T.td["range"],
     448               T.td["decodetime"], T.td["segtime"], T.td["speed"]]]
     449        reqtime = (None, None)
     450        for s_ev in self.download_status.segment_events:
     451            (etype, segnum, when, segstart, seglen, decodetime) = s_ev
     452            if etype == "request":
     453                t[T.tr[T.td["request"], T.td["seg%d" % segnum],
     454                       T.td[srt(when)]]]
     455                reqtime = (segnum, when)
     456            elif etype == "delivery":
     457                if reqtime[0] == segnum:
     458                    segtime = when - reqtime[1]
     459                    speed = self.render_rate(None, 1.0 * seglen / segtime)
     460                    segtime = self.render_time(None, segtime)
     461                else:
     462                    segtime, speed = "", ""
     463                t[T.tr[T.td["delivery"], T.td["seg%d" % segnum],
     464                       T.td[srt(when)],
     465                       T.td["[%d:+%d]" % (segstart, seglen)],
     466                       T.td[self.render_time(None,decodetime)],
     467                       T.td[segtime], T.td[speed]]]
     468            elif etype == "error":
     469                t[T.tr[T.td["error"], T.td["seg%d" % segnum]]]
     470        l["Segment Events:", t]
     471
     472        t = T.table(border="1")
     473        t[T.tr[T.td["serverid"], T.td["shnum"], T.td["range"],
     474               T.td["txtime"], T.td["rxtime"], T.td["received"], T.td["RTT"]]]
     475        reqtime = (None, None)
     476        request_events = []
     477        for serverid,requests in self.download_status.requests.iteritems():
     478            for req in requests:
     479                request_events.append( (serverid,) + req )
     480        request_events.sort(key=lambda req: (req[4],req[1]))
     481        for r_ev in request_events:
     482            (peerid, shnum, start, length, sent, receivedlen, received) = r_ev
     483            rtt = None
     484            if received is not None:
     485                rtt = received - sent
     486            peerid_s = idlib.shortnodeid_b2a(peerid)
     487            t[T.tr(style="background: %s" % self.color(peerid))[
     488                T.td[peerid_s], T.td[shnum],
     489                T.td["[%d:+%d]" % (start, length)],
     490                T.td[srt(sent)], T.td[srt(received)], T.td[receivedlen],
     491                T.td[self.render_time(None, rtt)],
     492                ]]
     493        l["Requests:", t]
     494
     495        return l
     496
     497    def color(self, peerid):
     498        def m(c):
     499            return min(ord(c) / 2 + 0x80, 0xff)
     500        return "#%02x%02x%02x" % (m(peerid[0]), m(peerid[1]), m(peerid[2]))
     501
    361502    def render_results(self, ctx, data):
    362503        d = self.download_results()
    363504        def _got_results(results):
    class DownloadStatusPage(DownloadResultsRendererMixin, rend.Page): 
    371512        TIME_FORMAT = "%H:%M:%S %d-%b-%Y"
    372513        started_s = time.strftime(TIME_FORMAT,
    373514                                  time.localtime(data.get_started()))
    374         return started_s
     515        return started_s + " (%s)" % data.get_started()
    375516
    376517    def render_si(self, ctx, data):
    377518        si_s = base32.b2a_or_none(data.get_storage_index())
  • src/allmydata/web/tahoe.css

    diff --git a/src/allmydata/web/tahoe.css b/src/allmydata/web/tahoe.css
    index a9aced6..0ed83fc 100644
    a b table.tahoe-directory { 
    135135  display: inline;
    136136  text-align: center;
    137137  padding: 0 1em;
    138 }
    139  No newline at end of file
     138}
     139
     140/* recent upload/download status pages */
     141
     142table.status-download-events {
     143  border: 1px solid #aaa;
     144}
     145table.status-download-events td {
     146  border: 1px solid #a00;
     147  padding: 2px
     148}