Sun Jul 18 22:47:44 MDT 2010 zooko@zooko.com * trivial: rename and add in-line doc to clarify "used_peers" => "upload_servers" Sun Jul 18 22:49:48 MDT 2010 zooko@zooko.com * benchmarking: update bench_dirnode to be correct and use the shiniest new pyutil.benchutil features concerning what units you measure in Mon Jul 19 02:20:00 MDT 2010 zooko@zooko.com * immutable: use PrefixingLogMixin to organize logging in Tahoe2PeerSelector and add more detailed messages about peer New patches: [trivial: rename and add in-line doc to clarify "used_peers" => "upload_servers" zooko@zooko.com**20100719044744 Ignore-this: 93c42081676e0dea181e55187cfc506d ] { hunk ./src/allmydata/immutable/upload.py 178 """ @return: (used_peers, already_peers), where used_peers is a set of PeerTracker instances that have agreed to hold some shares - for us (the shnum is stashed inside the PeerTracker), + for us (the shareids are stashed inside the PeerTracker), and already_peers is a dict mapping shnum to a set of peers which claim to already have the share. """ hunk ./src/allmydata/immutable/upload.py 913 def set_shareholders(self, (used_peers, already_peers), encoder): """ - @param used_peers: a sequence of PeerTracker objects + @param used_peers: a sequence of PeerTracker objects that have agreed to hold some shares for us (the shareids are stashed inside the PeerTracker) @paran already_peers: a dict mapping sharenum to a set of peerids that claim to already have this share """ replace ./src/allmydata/immutable/upload.py [A-Za-z_0-9] used_peers upload_servers replace ./src/allmydata/test/test_upload.py [A-Za-z_0-9] used_peers upload_servers replace ./src/allmydata/util/happinessutil.py [A-Za-z_0-9] used_peers upload_servers } [benchmarking: update bench_dirnode to be correct and use the shiniest new pyutil.benchutil features concerning what units you measure in zooko@zooko.com**20100719044948 Ignore-this: b72059e4ff921741b490e6b47ec687c6 ] { hunk ./src/allmydata/test/bench_dirnode.py 38 def create_from_cap(self, writecap, readcap=None, deep_immutable=False, name=''): return FakeNode() -def random_unicode(l): - while True: +def random_unicode(n=140, b=3, codec='utf-8'): + l = [] + while len(l) < n: try: hunk ./src/allmydata/test/bench_dirnode.py 42 - return os.urandom(l).decode('utf-8') + u = os.urandom(b).decode(codec)[0] except UnicodeDecodeError: pass hunk ./src/allmydata/test/bench_dirnode.py 45 + else: + l.append(u) + return u''.join(l) encoding_parameters = {"k": 3, "n": 10} def random_metadata(): hunk ./src/allmydata/test/bench_dirnode.py 52 d = {} - d['ctime'] = random.random() - d['mtime'] = random.random() d['tahoe'] = {} d['tahoe']['linkcrtime'] = random.random() d['tahoe']['linkmotime'] = random.random() hunk ./src/allmydata/test/bench_dirnode.py 93 def init_for_pack(self, N): for i in xrange(len(self.children), N): - name = random_unicode(random.randrange(1, 9)) + name = random_unicode(random.randrange(0, 10)) self.children.append( (name, self.random_child()) ) def init_for_unpack(self, N): hunk ./src/allmydata/test/bench_dirnode.py 114 (self.init_for_pack, self.pack), (self.init_for_unpack, self.unpack_and_repack)]: print "benchmarking %s" % (func,) - benchutil.bench(self.unpack_and_repack, initfunc=self.init_for_unpack, - TOPXP=12)#, profile=profile, profresults=PROF_FILE_NAME) + for N in 16, 512, 2048, 16384: + print "%5d" % N, + benchutil.rep_bench(func, N, initfunc=initfunc, MAXREPS=20, UNITS_PER_SECOND=1000) + benchutil.print_bench_footer(UNITS_PER_SECOND=1000) + print "(milliseconds)" def prof_benchmarks(self): # This requires pyutil >= v1.3.34. } [immutable: use PrefixingLogMixin to organize logging in Tahoe2PeerSelector and add more detailed messages about peer zooko@zooko.com**20100719082000 Ignore-this: e034c4988b327f7e138a106d913a3082 ] { hunk ./src/allmydata/immutable/upload.py 77 # TODO: actual extensions are closer to 419 bytes, so we can probably lower # this. +def pretty_print_shnum_to_servers(s): + return ', '.join([ "sh%s: %s" % (k, '+'.join([idlib.shortnodeid_b2a(x) for x in v])) for k, v in s.iteritems() ]) + class PeerTracker: def __init__(self, peerid, storage_server, sharesize, blocksize, num_segments, num_share_hashes, hunk ./src/allmydata/immutable/upload.py 158 del self.buckets[sharenum] -class Tahoe2PeerSelector: +class Tahoe2PeerSelector(log.PrefixingLogMixin): def __init__(self, upload_id, logparent=None, upload_status=None): self.upload_id = upload_id hunk ./src/allmydata/immutable/upload.py 169 self.num_peers_contacted = 0 self.last_failure_msg = None self._status = IUploadStatus(upload_status) - self._log_parent = log.msg("%s starting" % self, parent=logparent) + log.PrefixingLogMixin.__init__(self, 'tahoe.immutable.upload', logparent, prefix=upload_id) + self.log("starting", level=log.OPERATIONAL) def __repr__(self): return "" % self.upload_id hunk ./src/allmydata/immutable/upload.py 275 ds.append(d) self.num_peers_contacted += 1 self.query_count += 1 - log.msg("asking peer %s for any existing shares for " - "upload id %s" - % (idlib.shortnodeid_b2a(peer.peerid), self.upload_id), - level=log.NOISY, parent=self._log_parent) + self.log("asking peer %s for any existing shares" % + (idlib.shortnodeid_b2a(peer.peerid),), + level=log.NOISY) dl = defer.DeferredList(ds) dl.addCallback(lambda ign: self._loop()) return dl hunk ./src/allmydata/immutable/upload.py 289 Tahoe2PeerSelector._existing_shares. """ if isinstance(res, failure.Failure): - log.msg("%s got error during existing shares check: %s" + self.log("%s got error during existing shares check: %s" % (idlib.shortnodeid_b2a(peer), res), hunk ./src/allmydata/immutable/upload.py 291 - level=log.UNUSUAL, parent=self._log_parent) + level=log.UNUSUAL) self.error_count += 1 self.bad_query_count += 1 else: hunk ./src/allmydata/immutable/upload.py 298 buckets = res if buckets: self.peers_with_shares.add(peer) - log.msg("response from peer %s: alreadygot=%s" + self.log("response to get_buckets() from peer %s: alreadygot=%s" % (idlib.shortnodeid_b2a(peer), tuple(sorted(buckets))), hunk ./src/allmydata/immutable/upload.py 300 - level=log.NOISY, parent=self._log_parent) + level=log.NOISY) for bucket in buckets: self.preexisting_shares.setdefault(bucket, set()).add(peer) if self.homeless_shares and bucket in self.homeless_shares: hunk ./src/allmydata/immutable/upload.py 334 merged = merge_peers(self.preexisting_shares, self.use_peers) effective_happiness = servers_of_happiness(merged) if self.servers_of_happiness <= effective_happiness: - msg = ("peer selection successful for %s: %s" % (self, - self._get_progress_message())) - log.msg(msg, parent=self._log_parent) + msg = ("server selection successful for %s: %s: pretty_print_merged: %s, self.use_peers: %s, self.preexisting_shares: %s" % (self, self._get_progress_message(), pretty_print_shnum_to_servers(merged), [', '.join(["%s: %s" % (k, idlib.shortnodeid_b2a(v._nodeid),) for k,v in p.buckets.iteritems()]) for p in self.use_peers], pretty_print_shnum_to_servers(self.preexisting_shares))) + self.log(msg, level=log.OPERATIONAL) return (self.use_peers, self.preexisting_shares) else: # We're not okay right now, but maybe we can fix it by hunk ./src/allmydata/immutable/upload.py 379 self.needed_shares, self.servers_of_happiness, effective_happiness) - log.msg("server selection unsuccessful for %r: %s (%s), merged=%r" - % (self, msg, self._get_progress_message(), merged), level=log.INFREQUENT) + self.log("server selection unsuccessful for %r: %s (%s), merged=%s" % (self, msg, self._get_progress_message(), pretty_print_shnum_to_servers(merged)), level=log.INFREQUENT) return self._failed("%s (%s)" % (msg, self._get_progress_message())) if self.uncontacted_peers: hunk ./src/allmydata/immutable/upload.py 402 elif self.contacted_peers: # ask a peer that we've already asked. if not self._started_second_pass: - log.msg("starting second pass", parent=self._log_parent, + self.log("starting second pass", level=log.NOISY) self._started_second_pass = True num_shares = mathutil.div_ceil(len(self.homeless_shares), hunk ./src/allmydata/immutable/upload.py 440 self._get_progress_message())) if self.last_failure_msg: msg += " (%s)" % (self.last_failure_msg,) - log.msg(msg, level=log.UNUSUAL, parent=self._log_parent) + self.log(msg, level=log.UNUSUAL) return self._failed(msg) else: # we placed enough to be happy, so we're done hunk ./src/allmydata/immutable/upload.py 446 if self._status: self._status.set_status("Placed all shares") + msg = ("server selection successful (no more servers) for %s: %s: %s" % (self, + self._get_progress_message(), pretty_print_shnum_to_servers(merged))) + self.log(msg, level=log.OPERATIONAL) return (self.use_peers, self.preexisting_shares) def _got_response(self, res, peer, shares_to_ask, put_peer_here): hunk ./src/allmydata/immutable/upload.py 455 if isinstance(res, failure.Failure): # This is unusual, and probably indicates a bug or a network # problem. - log.msg("%s got error during peer selection: %s" % (peer, res), - level=log.UNUSUAL, parent=self._log_parent) + self.log("%s got error during peer selection: %s" % (peer, res), + level=log.UNUSUAL) self.error_count += 1 self.bad_query_count += 1 self.homeless_shares = list(shares_to_ask) + self.homeless_shares hunk ./src/allmydata/immutable/upload.py 475 self.last_failure_msg = msg else: (alreadygot, allocated) = res - log.msg("response from peer %s: alreadygot=%s, allocated=%s" + self.log("response to allocate_buckets() from peer %s: alreadygot=%s, allocated=%s" % (idlib.shortnodeid_b2a(peer.peerid), tuple(sorted(alreadygot)), tuple(sorted(allocated))), hunk ./src/allmydata/immutable/upload.py 478 - level=log.NOISY, parent=self._log_parent) + level=log.NOISY) progress = False for s in alreadygot: self.preexisting_shares.setdefault(s, set()).add(peer.peerid) hunk ./src/allmydata/immutable/upload.py 921 @paran already_peers: a dict mapping sharenum to a set of peerids that claim to already have this share """ - self.log("_send_shares, upload_servers is %s" % (upload_servers,)) + self.log("set_shareholders; upload_servers is %s, already_peers is %s" % ([', '.join(["%s: %s" % (k, idlib.shortnodeid_b2a(v._nodeid),) for k,v in p.buckets.iteritems()]) for p in upload_servers], already_peers)) # record already-present shares in self._results self._results.preexisting_shares = len(already_peers) hunk ./src/allmydata/immutable/upload.py 935 for shnum in peer.buckets: self._peer_trackers[shnum] = peer servermap.setdefault(shnum, set()).add(peer.peerid) + self.log("set_shareholders; %s (%s) == %s (%s)" % (len(buckets), buckets, sum([len(peer.buckets) for peer in upload_servers]), [(p.buckets, p.peerid) for p in upload_servers])) assert len(buckets) == sum([len(peer.buckets) for peer in upload_servers]), "%s (%s) != %s (%s)" % (len(buckets), buckets, sum([len(peer.buckets) for peer in upload_servers]), [(p.buckets, p.peerid) for p in upload_servers]) encoder.set_shareholders(buckets, servermap) hunk ./src/allmydata/storage/server.py 8 from zope.interface import implements from allmydata.interfaces import RIStorageServer, IStatsProducer -from allmydata.util import fileutil, log, time_format +from allmydata.util import fileutil, idlib, log, time_format import allmydata # for __full_version__ from allmydata.storage.common import si_b2a, si_a2b, storage_index_to_dir hunk ./src/allmydata/storage/server.py 109 expiration_sharetypes) self.lease_checker.setServiceParent(self) + def __repr__(self): + return "" % (idlib.shortnodeid_b2a(self.my_nodeid),) + def add_bucket_counter(self): statefile = os.path.join(self.storedir, "bucket_counter.state") self.bucket_counter = BucketCountingCrawler(self, statefile) hunk ./src/allmydata/test/test_upload.py 14 from allmydata import uri, monitor, client from allmydata.immutable import upload, encode from allmydata.interfaces import FileTooLargeError, UploadUnhappinessError +from allmydata.util import log from allmydata.util.assertutil import precondition from allmydata.util.deferredutil import DeferredListShouldSucceed from allmydata.test.no_network import GridTestMixin hunk ./src/allmydata/test/test_upload.py 714 def is_happy_enough(servertoshnums, h, k): """ I calculate whether servertoshnums achieves happiness level h. I do this with a naïve "brute force search" approach. (See src/allmydata/util/happinessutil.py for a better algorithm.) """ + print "servertoshnums: ", servertoshnums, "h: ", h, "k: ", k if len(servertoshnums) < h: return False # print "servertoshnums: ", servertoshnums, h, k hunk ./src/allmydata/test/test_upload.py 803 def _add_server(self, server_number, readonly=False): assert self.g, "I tried to find a grid at self.g, but failed" ss = self.g.make_server(server_number, readonly) + log.msg("just created a server, number: %s => %s" % (server_number, ss,)) self.g.add_server(server_number, ss) hunk ./src/allmydata/test/test_upload.py 806 - def _add_server_with_share(self, server_number, share_number=None, readonly=False): self._add_server(server_number, readonly) hunk ./src/allmydata/test/test_upload.py 866 d.addCallback(_store_shares) return d - def test_configure_parameters(self): self.basedir = self.mktemp() hooks = {0: self._set_up_nodes_extra_config} } Context: [TAG allmydata-tahoe-1.7.1 zooko@zooko.com**20100719131352 Ignore-this: 6942056548433dc653a746703819ad8c ] Patch bundle hash: 23cbf4e3d70f30b162d9e51c64c64b100923c5c6